-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
+/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2014, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
#define DEBUG_SUBSYSTEM S_LNET
+#include <linux/kthread.h>
#include <libcfs/libcfs.h>
-typedef struct cfs_wi_sched {
-#ifdef __KERNEL__
- /** serialised workitems */
- cfs_spinlock_t ws_lock;
- /** where schedulers sleep */
- cfs_waitq_t ws_waitq;
-#endif
- /** concurrent workitems */
- cfs_list_t ws_runq;
- /** rescheduled running-workitems */
- cfs_list_t ws_rerunq;
- /** shutting down */
- int ws_shuttingdown;
-} cfs_wi_sched_t;
-
-#ifdef __KERNEL__
-/**
- * we have 2 cfs_wi_sched_t so far:
- * one for CFS_WI_SCHED_ANY, another for CFS_WI_SCHED_SERIAL
- * per-cpu implementation will be added for SMP scalability
- */
-
-#define CFS_WI_NSCHED 2
-#else
-/** always 2 for userspace */
-#define CFS_WI_NSCHED 2
-#endif /* __KERNEL__ */
-
-struct cfs_workitem_data {
- /** serialize */
- cfs_spinlock_t wi_glock;
- /** number of cfs_wi_sched_t */
- int wi_nsched;
- /** number of threads (all schedulers) */
- int wi_nthreads;
- /** default scheduler */
- cfs_wi_sched_t *wi_scheds;
+#define CFS_WS_NAME_LEN 16
+
+struct cfs_wi_sched {
+ struct list_head ws_list; /* chain on global list */
+ /** serialised workitems */
+ spinlock_t ws_lock;
+ /** where schedulers sleep */
+ wait_queue_head_t ws_waitq;
+ /** concurrent workitems */
+ struct list_head ws_runq;
+ /** rescheduled running-workitems, a workitem can be rescheduled
+ * while running in wi_action(), but we don't to execute it again
+ * unless it returns from wi_action(), so we put it on ws_rerunq
+ * while rescheduling, and move it to runq after it returns
+ * from wi_action() */
+ struct list_head ws_rerunq;
+ /** CPT-table for this scheduler */
+ struct cfs_cpt_table *ws_cptab;
+ /** CPT id for affinity */
+ int ws_cpt;
+ /** number of scheduled workitems */
+ int ws_nscheduled;
+ /** started scheduler thread, protected by cfs_wi_data::wi_glock */
+ unsigned int ws_nthreads:30;
+ /** shutting down, protected by cfs_wi_data::wi_glock */
+ unsigned int ws_stopping:1;
+ /** serialize starting thread, protected by cfs_wi_data::wi_glock */
+ unsigned int ws_starting:1;
+ /** scheduler name */
+ char ws_name[CFS_WS_NAME_LEN];
+};
+
+static struct cfs_workitem_data {
+ /** serialize */
+ spinlock_t wi_glock;
+ /** list of all schedulers */
+ struct list_head wi_scheds;
+ /** WI module is initialized */
+ int wi_init;
+ /** shutting down the whole WI module */
+ int wi_stopping;
} cfs_wi_data;
-static inline cfs_wi_sched_t *
-cfs_wi_to_sched(cfs_workitem_t *wi)
-{
- LASSERT(wi->wi_sched_id == CFS_WI_SCHED_ANY ||
- wi->wi_sched_id == CFS_WI_SCHED_SERIAL ||
- (wi->wi_sched_id >= 0 &&
- wi->wi_sched_id < cfs_wi_data.wi_nsched));
-
- if (wi->wi_sched_id == CFS_WI_SCHED_ANY)
- return &cfs_wi_data.wi_scheds[0];
- if (wi->wi_sched_id == CFS_WI_SCHED_SERIAL)
- return &cfs_wi_data.wi_scheds[cfs_wi_data.wi_nsched - 1];
-
- return &cfs_wi_data.wi_scheds[wi->wi_sched_id];
-}
-
-#ifdef __KERNEL__
-static inline void
-cfs_wi_sched_lock(cfs_wi_sched_t *sched)
-{
- cfs_spin_lock(&sched->ws_lock);
-}
-
-static inline void
-cfs_wi_sched_unlock(cfs_wi_sched_t *sched)
-{
- cfs_spin_unlock(&sched->ws_lock);
-}
-
static inline int
-cfs_wi_sched_cansleep(cfs_wi_sched_t *sched)
-{
- cfs_wi_sched_lock(sched);
- if (sched->ws_shuttingdown) {
- cfs_wi_sched_unlock(sched);
- return 0;
- }
-
- if (!cfs_list_empty(&sched->ws_runq)) {
- cfs_wi_sched_unlock(sched);
- return 0;
- }
- cfs_wi_sched_unlock(sched);
- return 1;
-}
-
-#else
-
-static inline void
-cfs_wi_sched_lock(cfs_wi_sched_t *sched)
+cfs_wi_sched_cansleep(struct cfs_wi_sched *sched)
{
- cfs_spin_lock(&cfs_wi_data.wi_glock);
+ spin_lock(&sched->ws_lock);
+ if (sched->ws_stopping) {
+ spin_unlock(&sched->ws_lock);
+ return 0;
+ }
+
+ if (!list_empty(&sched->ws_runq)) {
+ spin_unlock(&sched->ws_lock);
+ return 0;
+ }
+ spin_unlock(&sched->ws_lock);
+ return 1;
}
-static inline void
-cfs_wi_sched_unlock(cfs_wi_sched_t *sched)
-{
- cfs_spin_unlock(&cfs_wi_data.wi_glock);
-}
-
-#endif
-
/* XXX:
* 0. it only works when called from wi->wi_action.
* 1. when it returns no one shall try to schedule the workitem.
*/
void
-cfs_wi_exit(cfs_workitem_t *wi)
+cfs_wi_exit(struct cfs_wi_sched *sched, struct cfs_workitem *wi)
{
- cfs_wi_sched_t *sched = cfs_wi_to_sched(wi);
+ LASSERT(!in_interrupt()); /* because we use plain spinlock */
+ LASSERT(!sched->ws_stopping);
- LASSERT (!cfs_in_interrupt()); /* because we use plain spinlock */
- LASSERT (!sched->ws_shuttingdown);
+ spin_lock(&sched->ws_lock);
- cfs_wi_sched_lock(sched);
+ LASSERT(wi->wi_running);
-#ifdef __KERNEL__
- LASSERT (wi->wi_running);
-#endif
- if (wi->wi_scheduled) { /* cancel pending schedules */
- LASSERT (!cfs_list_empty(&wi->wi_list));
- cfs_list_del_init(&wi->wi_list);
- }
+ if (wi->wi_scheduled) { /* cancel pending schedules */
+ LASSERT(!list_empty(&wi->wi_list));
+ list_del_init(&wi->wi_list);
+
+ LASSERT(sched->ws_nscheduled > 0);
+ sched->ws_nscheduled--;
+ }
+
+ LASSERT(list_empty(&wi->wi_list));
- LASSERT (cfs_list_empty(&wi->wi_list));
- wi->wi_scheduled = 1; /* LBUG future schedule attempts */
+ wi->wi_scheduled = 1; /* LBUG future schedule attempts */
+ spin_unlock(&sched->ws_lock);
- cfs_wi_sched_unlock(sched);
- return;
+ return;
}
-CFS_EXPORT_SYMBOL(cfs_wi_exit);
+EXPORT_SYMBOL(cfs_wi_exit);
/**
- * cancel a workitem:
+ * cancel schedule request of workitem \a wi
*/
int
-cfs_wi_cancel (cfs_workitem_t *wi)
+cfs_wi_deschedule(struct cfs_wi_sched *sched, struct cfs_workitem *wi)
{
- cfs_wi_sched_t *sched = cfs_wi_to_sched(wi);
- int rc;
+ int rc;
- LASSERT (!cfs_in_interrupt()); /* because we use plain spinlock */
- LASSERT (!sched->ws_shuttingdown);
+ LASSERT(!in_interrupt()); /* because we use plain spinlock */
+ LASSERT(!sched->ws_stopping);
- cfs_wi_sched_lock(sched);
/*
* return 0 if it's running already, otherwise return 1, which
* means the workitem will not be scheduled and will not have
* any race with wi_action.
*/
- rc = !(wi->wi_running);
+ spin_lock(&sched->ws_lock);
- if (wi->wi_scheduled) { /* cancel pending schedules */
- LASSERT (!cfs_list_empty(&wi->wi_list));
- cfs_list_del_init(&wi->wi_list);
- wi->wi_scheduled = 0;
- }
+ rc = !(wi->wi_running);
- LASSERT (cfs_list_empty(&wi->wi_list));
+ if (wi->wi_scheduled) { /* cancel pending schedules */
+ LASSERT(!list_empty(&wi->wi_list));
+ list_del_init(&wi->wi_list);
- cfs_wi_sched_unlock(sched);
- return rc;
-}
+ LASSERT(sched->ws_nscheduled > 0);
+ sched->ws_nscheduled--;
-CFS_EXPORT_SYMBOL(cfs_wi_cancel);
+ wi->wi_scheduled = 0;
+ }
+
+ LASSERT (list_empty(&wi->wi_list));
+
+ spin_unlock(&sched->ws_lock);
+ return rc;
+}
+EXPORT_SYMBOL(cfs_wi_deschedule);
/*
* Workitem scheduled with (serial == 1) is strictly serialised not only with
* be added, and even dynamic creation of serialised queues might be supported.
*/
void
-cfs_wi_schedule(cfs_workitem_t *wi)
+cfs_wi_schedule(struct cfs_wi_sched *sched, struct cfs_workitem *wi)
{
- cfs_wi_sched_t *sched = cfs_wi_to_sched(wi);
-
- LASSERT (!cfs_in_interrupt()); /* because we use plain spinlock */
- LASSERT (!sched->ws_shuttingdown);
-
- cfs_wi_sched_lock(sched);
+ LASSERT(!in_interrupt()); /* because we use plain spinlock */
+ LASSERT(!sched->ws_stopping);
+
+ spin_lock(&sched->ws_lock);
+
+ if (!wi->wi_scheduled) {
+ LASSERT (list_empty(&wi->wi_list));
+
+ wi->wi_scheduled = 1;
+ sched->ws_nscheduled++;
+ if (!wi->wi_running) {
+ list_add_tail(&wi->wi_list, &sched->ws_runq);
+ wake_up(&sched->ws_waitq);
+ } else {
+ list_add(&wi->wi_list, &sched->ws_rerunq);
+ }
+ }
+
+ LASSERT (!list_empty(&wi->wi_list));
+ spin_unlock(&sched->ws_lock);
+ return;
+}
+EXPORT_SYMBOL(cfs_wi_schedule);
- if (!wi->wi_scheduled) {
- LASSERT (cfs_list_empty(&wi->wi_list));
+static int
+cfs_wi_scheduler(void *arg)
+{
+ struct cfs_wi_sched *sched = (struct cfs_wi_sched *)arg;
- wi->wi_scheduled = 1;
- if (!wi->wi_running) {
- cfs_list_add_tail(&wi->wi_list, &sched->ws_runq);
-#ifdef __KERNEL__
- cfs_waitq_signal(&sched->ws_waitq);
-#endif
- } else {
- cfs_list_add(&wi->wi_list, &sched->ws_rerunq);
- }
- }
+ cfs_block_allsigs();
- LASSERT (!cfs_list_empty(&wi->wi_list));
- cfs_wi_sched_unlock(sched);
- return;
-}
+ /* CPT affinity scheduler? */
+ if (sched->ws_cptab != NULL)
+ if (cfs_cpt_bind(sched->ws_cptab, sched->ws_cpt) != 0)
+ CWARN("Unable to bind %s on CPU partition %d\n",
+ sched->ws_name, sched->ws_cpt);
-CFS_EXPORT_SYMBOL(cfs_wi_schedule);
+ spin_lock(&cfs_wi_data.wi_glock);
-#ifdef __KERNEL__
+ LASSERT(sched->ws_starting == 1);
+ sched->ws_starting--;
+ sched->ws_nthreads++;
-static int
-cfs_wi_scheduler (void *arg)
-{
- int id = (int)(long_ptr_t) arg;
- int serial = (id == -1);
- char name[24];
- cfs_wi_sched_t *sched;
-
- if (serial) {
- sched = &cfs_wi_data.wi_scheds[cfs_wi_data.wi_nsched - 1];
- cfs_daemonize("wi_serial_sd");
- } else {
- /* will be sched = &cfs_wi_data.wi_scheds[id] in the future */
- sched = &cfs_wi_data.wi_scheds[0];
- snprintf(name, sizeof(name), "cfs_wi_sd%03d", id);
- cfs_daemonize(name);
- }
+ spin_unlock(&cfs_wi_data.wi_glock);
- cfs_block_allsigs();
+ spin_lock(&sched->ws_lock);
- cfs_wi_sched_lock(sched);
+ while (!sched->ws_stopping) {
+ int nloops = 0;
+ int rc;
+ struct cfs_workitem *wi;
- while (!sched->ws_shuttingdown) {
- int nloops = 0;
- int rc;
- cfs_workitem_t *wi;
+ while (!list_empty(&sched->ws_runq) &&
+ nloops < CFS_WI_RESCHED) {
+ wi = list_entry(sched->ws_runq.next,
+ struct cfs_workitem, wi_list);
+ LASSERT(wi->wi_scheduled && !wi->wi_running);
- while (!cfs_list_empty(&sched->ws_runq) &&
- nloops < CFS_WI_RESCHED) {
- wi = cfs_list_entry(sched->ws_runq.next,
- cfs_workitem_t, wi_list);
- LASSERT (wi->wi_scheduled && !wi->wi_running);
+ list_del_init(&wi->wi_list);
- cfs_list_del_init(&wi->wi_list);
+ LASSERT(sched->ws_nscheduled > 0);
+ sched->ws_nscheduled--;
wi->wi_running = 1;
wi->wi_scheduled = 0;
- cfs_wi_sched_unlock(sched);
+
+ spin_unlock(&sched->ws_lock);
nloops++;
rc = (*wi->wi_action) (wi);
- cfs_wi_sched_lock(sched);
+ spin_lock(&sched->ws_lock);
if (rc != 0) /* WI should be dead, even be freed! */
continue;
- wi->wi_running = 0;
- if (cfs_list_empty(&wi->wi_list))
+ wi->wi_running = 0;
+ if (list_empty(&wi->wi_list))
continue;
- LASSERT (wi->wi_scheduled);
- /* wi is rescheduled, should be on rerunq now, we
- * move it to runq so it can run action now */
- cfs_list_move_tail(&wi->wi_list, &sched->ws_runq);
- }
-
- if (!cfs_list_empty(&sched->ws_runq)) {
- cfs_wi_sched_unlock(sched);
- /* don't sleep because some workitems still
- * expect me to come back soon */
- cfs_cond_resched();
- cfs_wi_sched_lock(sched);
- continue;
+ LASSERT(wi->wi_scheduled);
+ /* wi is rescheduled, should be on rerunq now, we
+ * move it to runq so it can run action now */
+ list_move_tail(&wi->wi_list, &sched->ws_runq);
}
- cfs_wi_sched_unlock(sched);
- cfs_wait_event_interruptible_exclusive(sched->ws_waitq,
- !cfs_wi_sched_cansleep(sched), rc);
- cfs_wi_sched_lock(sched);
+ if (!list_empty(&sched->ws_runq)) {
+ spin_unlock(&sched->ws_lock);
+ /* don't sleep because some workitems still
+ * expect me to come back soon */
+ cond_resched();
+ spin_lock(&sched->ws_lock);
+ continue;
+ }
+
+ spin_unlock(&sched->ws_lock);
+ rc = wait_event_interruptible_exclusive(sched->ws_waitq,
+ !cfs_wi_sched_cansleep(sched));
+ spin_lock(&sched->ws_lock);
}
- cfs_wi_sched_unlock(sched);
+ spin_unlock(&sched->ws_lock);
- cfs_spin_lock(&cfs_wi_data.wi_glock);
- cfs_wi_data.wi_nthreads--;
- cfs_spin_unlock(&cfs_wi_data.wi_glock);
- return 0;
+ spin_lock(&cfs_wi_data.wi_glock);
+ sched->ws_nthreads--;
+ spin_unlock(&cfs_wi_data.wi_glock);
+
+ return 0;
}
-static int
-cfs_wi_start_thread (int (*func) (void*), void *arg)
+void
+cfs_wi_sched_destroy(struct cfs_wi_sched *sched)
{
- long pid;
-
- pid = cfs_kernel_thread(func, arg, 0);
- if (pid < 0)
- return (int)pid;
-
- cfs_spin_lock(&cfs_wi_data.wi_glock);
- cfs_wi_data.wi_nthreads++;
- cfs_spin_unlock(&cfs_wi_data.wi_glock);
- return 0;
-}
+ LASSERT(cfs_wi_data.wi_init);
+ LASSERT(!cfs_wi_data.wi_stopping);
-#else /* __KERNEL__ */
+ spin_lock(&cfs_wi_data.wi_glock);
+ if (sched->ws_stopping) {
+ CDEBUG(D_INFO, "%s is in progress of stopping\n",
+ sched->ws_name);
+ spin_unlock(&cfs_wi_data.wi_glock);
+ return;
+ }
-int
-cfs_wi_check_events (void)
-{
- int n = 0;
- cfs_workitem_t *wi;
- cfs_list_t *q;
+ LASSERT(!list_empty(&sched->ws_list));
+ sched->ws_stopping = 1;
- cfs_spin_lock(&cfs_wi_data.wi_glock);
+ spin_unlock(&cfs_wi_data.wi_glock);
- for (;;) {
- /** rerunq is always empty for userspace */
- if (!cfs_list_empty(&cfs_wi_data.wi_scheds[1].ws_runq))
- q = &cfs_wi_data.wi_scheds[1].ws_runq;
- else if (!cfs_list_empty(&cfs_wi_data.wi_scheds[0].ws_runq))
- q = &cfs_wi_data.wi_scheds[0].ws_runq;
- else
- break;
+ wake_up_all(&sched->ws_waitq);
- wi = cfs_list_entry(q->next, cfs_workitem_t, wi_list);
- cfs_list_del_init(&wi->wi_list);
+ spin_lock(&cfs_wi_data.wi_glock);
+ {
+ int i = 2;
- LASSERT (wi->wi_scheduled);
- wi->wi_scheduled = 0;
- cfs_spin_unlock(&cfs_wi_data.wi_glock);
+ while (sched->ws_nthreads > 0) {
+ CDEBUG(is_power_of_2(++i) ? D_WARNING : D_NET,
+ "waiting for %d threads of WI sched[%s] to "
+ "terminate\n", sched->ws_nthreads,
+ sched->ws_name);
- n++;
- (*wi->wi_action) (wi);
+ spin_unlock(&cfs_wi_data.wi_glock);
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(cfs_time_seconds(1) / 20);
+ spin_lock(&cfs_wi_data.wi_glock);
+ }
+ }
- cfs_spin_lock(&cfs_wi_data.wi_glock);
- }
+ list_del(&sched->ws_list);
- cfs_spin_unlock(&cfs_wi_data.wi_glock);
- return n;
-}
+ spin_unlock(&cfs_wi_data.wi_glock);
-#endif
+ LASSERT(sched->ws_nscheduled == 0);
-static void
-cfs_wi_sched_init(cfs_wi_sched_t *sched)
-{
- sched->ws_shuttingdown = 0;
-#ifdef __KERNEL__
- cfs_spin_lock_init(&sched->ws_lock);
- cfs_waitq_init(&sched->ws_waitq);
-#endif
- CFS_INIT_LIST_HEAD(&sched->ws_runq);
- CFS_INIT_LIST_HEAD(&sched->ws_rerunq);
+ LIBCFS_FREE(sched, sizeof(*sched));
}
+EXPORT_SYMBOL(cfs_wi_sched_destroy);
-static void
-cfs_wi_sched_shutdown(cfs_wi_sched_t *sched)
+int
+cfs_wi_sched_create(char *name, struct cfs_cpt_table *cptab,
+ int cpt, int nthrs, struct cfs_wi_sched **sched_pp)
{
- cfs_wi_sched_lock(sched);
-
- LASSERT(cfs_list_empty(&sched->ws_runq));
- LASSERT(cfs_list_empty(&sched->ws_rerunq));
-
- sched->ws_shuttingdown = 1;
-
-#ifdef __KERNEL__
- cfs_waitq_broadcast(&sched->ws_waitq);
-#endif
- cfs_wi_sched_unlock(sched);
+ struct cfs_wi_sched *sched;
+
+ LASSERT(cfs_wi_data.wi_init);
+ LASSERT(!cfs_wi_data.wi_stopping);
+ LASSERT(cptab == NULL || cpt == CFS_CPT_ANY ||
+ (cpt >= 0 && cpt < cfs_cpt_number(cptab)));
+
+ LIBCFS_ALLOC(sched, sizeof(*sched));
+ if (sched == NULL)
+ return -ENOMEM;
+
+ if (strlen(name) > sizeof(sched->ws_name)-1) {
+ LIBCFS_FREE(sched, sizeof(*sched));
+ return -E2BIG;
+ }
+ strlcpy(sched->ws_name, name, sizeof(sched->ws_name));
+
+ sched->ws_cptab = cptab;
+ sched->ws_cpt = cpt;
+
+ spin_lock_init(&sched->ws_lock);
+ init_waitqueue_head(&sched->ws_waitq);
+
+ INIT_LIST_HEAD(&sched->ws_runq);
+ INIT_LIST_HEAD(&sched->ws_rerunq);
+ INIT_LIST_HEAD(&sched->ws_list);
+
+ for (; nthrs > 0; nthrs--) {
+ char name[16];
+ struct task_struct *task;
+
+ spin_lock(&cfs_wi_data.wi_glock);
+ while (sched->ws_starting > 0) {
+ spin_unlock(&cfs_wi_data.wi_glock);
+ schedule();
+ spin_lock(&cfs_wi_data.wi_glock);
+ }
+
+ sched->ws_starting++;
+ spin_unlock(&cfs_wi_data.wi_glock);
+
+ if (sched->ws_cptab != NULL && sched->ws_cpt >= 0) {
+ snprintf(name, sizeof(name), "%s_%02d_%02d",
+ sched->ws_name, sched->ws_cpt,
+ sched->ws_nthreads);
+ } else {
+ snprintf(name, sizeof(name), "%s_%02d",
+ sched->ws_name, sched->ws_nthreads);
+ }
+
+ task = kthread_run(cfs_wi_scheduler, sched, name);
+ if (IS_ERR(task)) {
+ int rc = PTR_ERR(task);
+
+ CERROR("Failed to create thread for "
+ "WI scheduler %s: %d\n", name, rc);
+
+ spin_lock(&cfs_wi_data.wi_glock);
+
+ /* make up for cfs_wi_sched_destroy */
+ list_add(&sched->ws_list, &cfs_wi_data.wi_scheds);
+ sched->ws_starting--;
+
+ spin_unlock(&cfs_wi_data.wi_glock);
+
+ cfs_wi_sched_destroy(sched);
+ return rc;
+ }
+ }
+
+ spin_lock(&cfs_wi_data.wi_glock);
+ list_add(&sched->ws_list, &cfs_wi_data.wi_scheds);
+ spin_unlock(&cfs_wi_data.wi_glock);
+
+ *sched_pp = sched;
+ return 0;
}
-
+EXPORT_SYMBOL(cfs_wi_sched_create);
int
-cfs_wi_startup (void)
+cfs_wi_startup(void)
{
- int i;
- int n;
- int rc;
-
- cfs_wi_data.wi_nthreads = 0;
- cfs_wi_data.wi_nsched = CFS_WI_NSCHED;
- LIBCFS_ALLOC(cfs_wi_data.wi_scheds,
- cfs_wi_data.wi_nsched * sizeof(cfs_wi_sched_t));
- if (cfs_wi_data.wi_scheds == NULL)
- return -ENOMEM;
-
- cfs_spin_lock_init(&cfs_wi_data.wi_glock);
- for (i = 0; i < cfs_wi_data.wi_nsched; i++)
- cfs_wi_sched_init(&cfs_wi_data.wi_scheds[i]);
-
-#ifdef __KERNEL__
- n = cfs_num_online_cpus();
- for (i = 0; i <= n; i++) {
- rc = cfs_wi_start_thread(cfs_wi_scheduler,
- (void *)(long_ptr_t)(i == n ? -1 : i));
- if (rc != 0) {
- CERROR ("Can't spawn workitem scheduler: %d\n", rc);
- cfs_wi_shutdown();
- return rc;
- }
- }
-#else
- n = rc = 0;
-#endif
+ memset(&cfs_wi_data, 0, sizeof(struct cfs_workitem_data));
+
+ spin_lock_init(&cfs_wi_data.wi_glock);
+ INIT_LIST_HEAD(&cfs_wi_data.wi_scheds);
+ cfs_wi_data.wi_init = 1;
- return 0;
+ return 0;
}
void
cfs_wi_shutdown (void)
{
- int i;
-
- if (cfs_wi_data.wi_scheds == NULL)
- return;
-
- for (i = 0; i < cfs_wi_data.wi_nsched; i++)
- cfs_wi_sched_shutdown(&cfs_wi_data.wi_scheds[i]);
-
-#ifdef __KERNEL__
- cfs_spin_lock(&cfs_wi_data.wi_glock);
- i = 2;
- while (cfs_wi_data.wi_nthreads != 0) {
- CDEBUG(IS_PO2(++i) ? D_WARNING : D_NET,
- "waiting for %d threads to terminate\n",
- cfs_wi_data.wi_nthreads);
- cfs_spin_unlock(&cfs_wi_data.wi_glock);
-
- cfs_pause(cfs_time_seconds(1));
-
- cfs_spin_lock(&cfs_wi_data.wi_glock);
- }
- cfs_spin_unlock(&cfs_wi_data.wi_glock);
-#endif
- LIBCFS_FREE(cfs_wi_data.wi_scheds,
- cfs_wi_data.wi_nsched * sizeof(cfs_wi_sched_t));
- return;
+ struct cfs_wi_sched *sched;
+
+ spin_lock(&cfs_wi_data.wi_glock);
+ cfs_wi_data.wi_stopping = 1;
+ spin_unlock(&cfs_wi_data.wi_glock);
+
+ /* nobody should contend on this list */
+ list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) {
+ sched->ws_stopping = 1;
+ wake_up_all(&sched->ws_waitq);
+ }
+
+ list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) {
+ spin_lock(&cfs_wi_data.wi_glock);
+
+ while (sched->ws_nthreads != 0) {
+ spin_unlock(&cfs_wi_data.wi_glock);
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(cfs_time_seconds(1) / 20);
+ spin_lock(&cfs_wi_data.wi_glock);
+ }
+ spin_unlock(&cfs_wi_data.wi_glock);
+ }
+
+ while (!list_empty(&cfs_wi_data.wi_scheds)) {
+ sched = list_entry(cfs_wi_data.wi_scheds.next,
+ struct cfs_wi_sched, ws_list);
+ list_del(&sched->ws_list);
+ LIBCFS_FREE(sched, sizeof(*sched));
+ }
+
+ cfs_wi_data.wi_stopping = 0;
+ cfs_wi_data.wi_init = 0;
}