X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=libcfs%2Flibcfs%2Fworkitem.c;h=7768e5c9eb38a13dbf5652ee82c6d076a762eef7;hb=128137adfc539dd2dd92040c14a63ff27f969820;hp=5bcab3f01b464062d490dbd9797ae03cfb8763af;hpb=c48a869557fe7663f4f3370b130d4c248958180e;p=fs%2Flustre-release.git diff --git a/libcfs/libcfs/workitem.c b/libcfs/libcfs/workitem.c index 5bcab3f..7768e5c 100644 --- a/libcfs/libcfs/workitem.c +++ b/libcfs/libcfs/workitem.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -27,7 +23,7 @@ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2011, Whamcloud, Inc. + * Copyright (c) 2011, 2013, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -41,26 +37,25 @@ #define DEBUG_SUBSYSTEM S_LNET +#include #include #define CFS_WS_NAME_LEN 16 -typedef struct cfs_wi_sched { - cfs_list_t ws_list; /* chain on global list */ -#ifdef __KERNEL__ +struct cfs_wi_sched { + struct list_head ws_list; /* chain on global list */ /** serialised workitems */ - cfs_spinlock_t ws_lock; + spinlock_t ws_lock; /** where schedulers sleep */ - cfs_waitq_t ws_waitq; -#endif + wait_queue_head_t ws_waitq; /** concurrent workitems */ - cfs_list_t ws_runq; + struct list_head ws_runq; /** rescheduled running-workitems, a workitem can be rescheduled * while running in wi_action(), but we don't to execute it again * unless it returns from wi_action(), so we put it on ws_rerunq * while rescheduling, and move it to runq after it returns * from wi_action() */ - cfs_list_t ws_rerunq; + struct list_head ws_rerunq; /** CPT-table for this scheduler */ struct cfs_cpt_table *ws_cptab; /** CPT id for affinity */ @@ -75,94 +70,62 @@ typedef struct cfs_wi_sched { unsigned int ws_starting:1; /** scheduler name */ char ws_name[CFS_WS_NAME_LEN]; -} cfs_wi_sched_t; +}; -struct cfs_workitem_data { +static struct cfs_workitem_data { /** serialize */ - cfs_spinlock_t wi_glock; + spinlock_t wi_glock; /** list of all schedulers */ - cfs_list_t wi_scheds; + struct list_head wi_scheds; /** WI module is initialized */ int wi_init; /** shutting down the whole WI module */ int wi_stopping; } cfs_wi_data; -#ifdef __KERNEL__ -static inline void -cfs_wi_sched_lock(cfs_wi_sched_t *sched) -{ - cfs_spin_lock(&sched->ws_lock); -} - -static inline void -cfs_wi_sched_unlock(cfs_wi_sched_t *sched) -{ - cfs_spin_unlock(&sched->ws_lock); -} - static inline int -cfs_wi_sched_cansleep(cfs_wi_sched_t *sched) +cfs_wi_sched_cansleep(struct cfs_wi_sched *sched) { - cfs_wi_sched_lock(sched); + spin_lock(&sched->ws_lock); if (sched->ws_stopping) { - cfs_wi_sched_unlock(sched); - return 0; - } - - if (!cfs_list_empty(&sched->ws_runq)) { - cfs_wi_sched_unlock(sched); - return 0; - } - cfs_wi_sched_unlock(sched); - return 1; -} - -#else /* !__KERNEL__ */ - -static inline void -cfs_wi_sched_lock(cfs_wi_sched_t *sched) -{ - cfs_spin_lock(&cfs_wi_data.wi_glock); -} + spin_unlock(&sched->ws_lock); + return 0; + } -static inline void -cfs_wi_sched_unlock(cfs_wi_sched_t *sched) -{ - cfs_spin_unlock(&cfs_wi_data.wi_glock); + if (!list_empty(&sched->ws_runq)) { + spin_unlock(&sched->ws_lock); + return 0; + } + spin_unlock(&sched->ws_lock); + return 1; } -#endif /* __KERNEL__ */ - /* XXX: * 0. it only works when called from wi->wi_action. * 1. when it returns no one shall try to schedule the workitem. */ void -cfs_wi_exit(struct cfs_wi_sched *sched, cfs_workitem_t *wi) +cfs_wi_exit(struct cfs_wi_sched *sched, struct cfs_workitem *wi) { - LASSERT(!cfs_in_interrupt()); /* because we use plain spinlock */ + LASSERT(!in_interrupt()); /* because we use plain spinlock */ LASSERT(!sched->ws_stopping); - cfs_wi_sched_lock(sched); + spin_lock(&sched->ws_lock); -#ifdef __KERNEL__ LASSERT(wi->wi_running); -#endif + if (wi->wi_scheduled) { /* cancel pending schedules */ - LASSERT(!cfs_list_empty(&wi->wi_list)); - cfs_list_del_init(&wi->wi_list); + LASSERT(!list_empty(&wi->wi_list)); + list_del_init(&wi->wi_list); LASSERT(sched->ws_nscheduled > 0); sched->ws_nscheduled--; } - LASSERT(cfs_list_empty(&wi->wi_list)); + LASSERT(list_empty(&wi->wi_list)); wi->wi_scheduled = 1; /* LBUG future schedule attempts */ - cfs_wi_sched_unlock(sched); - - return; + spin_unlock(&sched->ws_lock); } EXPORT_SYMBOL(cfs_wi_exit); @@ -170,11 +133,11 @@ EXPORT_SYMBOL(cfs_wi_exit); * cancel schedule request of workitem \a wi */ int -cfs_wi_deschedule(struct cfs_wi_sched *sched, cfs_workitem_t *wi) +cfs_wi_deschedule(struct cfs_wi_sched *sched, struct cfs_workitem *wi) { int rc; - LASSERT(!cfs_in_interrupt()); /* because we use plain spinlock */ + LASSERT(!in_interrupt()); /* because we use plain spinlock */ LASSERT(!sched->ws_stopping); /* @@ -182,24 +145,24 @@ cfs_wi_deschedule(struct cfs_wi_sched *sched, cfs_workitem_t *wi) * means the workitem will not be scheduled and will not have * any race with wi_action. */ - cfs_wi_sched_lock(sched); + spin_lock(&sched->ws_lock); rc = !(wi->wi_running); if (wi->wi_scheduled) { /* cancel pending schedules */ - LASSERT(!cfs_list_empty(&wi->wi_list)); - cfs_list_del_init(&wi->wi_list); + LASSERT(!list_empty(&wi->wi_list)); + list_del_init(&wi->wi_list); LASSERT(sched->ws_nscheduled > 0); sched->ws_nscheduled--; - wi->wi_scheduled = 0; - } + wi->wi_scheduled = 0; + } - LASSERT (cfs_list_empty(&wi->wi_list)); + LASSERT (list_empty(&wi->wi_list)); - cfs_wi_sched_unlock(sched); - return rc; + spin_unlock(&sched->ws_lock); + return rc; } EXPORT_SYMBOL(cfs_wi_deschedule); @@ -211,79 +174,66 @@ EXPORT_SYMBOL(cfs_wi_deschedule); * be added, and even dynamic creation of serialised queues might be supported. */ void -cfs_wi_schedule(struct cfs_wi_sched *sched, cfs_workitem_t *wi) +cfs_wi_schedule(struct cfs_wi_sched *sched, struct cfs_workitem *wi) { - LASSERT(!cfs_in_interrupt()); /* because we use plain spinlock */ + LASSERT(!in_interrupt()); /* because we use plain spinlock */ LASSERT(!sched->ws_stopping); - cfs_wi_sched_lock(sched); + spin_lock(&sched->ws_lock); - if (!wi->wi_scheduled) { - LASSERT (cfs_list_empty(&wi->wi_list)); + if (!wi->wi_scheduled) { + LASSERT (list_empty(&wi->wi_list)); - wi->wi_scheduled = 1; + wi->wi_scheduled = 1; sched->ws_nscheduled++; - if (!wi->wi_running) { - cfs_list_add_tail(&wi->wi_list, &sched->ws_runq); -#ifdef __KERNEL__ - cfs_waitq_signal(&sched->ws_waitq); -#endif - } else { - cfs_list_add(&wi->wi_list, &sched->ws_rerunq); - } - } + if (!wi->wi_running) { + list_add_tail(&wi->wi_list, &sched->ws_runq); + wake_up(&sched->ws_waitq); + } else { + list_add(&wi->wi_list, &sched->ws_rerunq); + } + } - LASSERT (!cfs_list_empty(&wi->wi_list)); - cfs_wi_sched_unlock(sched); - return; + LASSERT (!list_empty(&wi->wi_list)); + spin_unlock(&sched->ws_lock); } EXPORT_SYMBOL(cfs_wi_schedule); -#ifdef __KERNEL__ - static int -cfs_wi_scheduler (void *arg) +cfs_wi_scheduler(void *arg) { - struct cfs_wi_sched *sched = (cfs_wi_sched_t *)arg; - char name[16]; - - if (sched->ws_cptab != NULL && sched->ws_cpt >= 0) { - snprintf(name, sizeof(name), "%s_%02d_%02d", - sched->ws_name, sched->ws_cpt, sched->ws_nthreads); - } else { - snprintf(name, sizeof(name), "%s_%02d", - sched->ws_name, sched->ws_nthreads); - } + struct cfs_wi_sched *sched = (struct cfs_wi_sched *)arg; - cfs_daemonize(name); cfs_block_allsigs(); /* CPT affinity scheduler? */ if (sched->ws_cptab != NULL) - cfs_cpt_bind(sched->ws_cptab, sched->ws_cpt); + if (cfs_cpt_bind(sched->ws_cptab, sched->ws_cpt) != 0) + CWARN("Unable to bind %s on CPU partition %d\n", + sched->ws_name, sched->ws_cpt); - cfs_spin_lock(&cfs_wi_data.wi_glock); + spin_lock(&cfs_wi_data.wi_glock); LASSERT(sched->ws_starting == 1); sched->ws_starting--; sched->ws_nthreads++; - cfs_spin_unlock(&cfs_wi_data.wi_glock); + spin_unlock(&cfs_wi_data.wi_glock); - cfs_wi_sched_lock(sched); + spin_lock(&sched->ws_lock); while (!sched->ws_stopping) { - int nloops = 0; - int rc; - cfs_workitem_t *wi; - - while (!cfs_list_empty(&sched->ws_runq) && - nloops < CFS_WI_RESCHED) { - wi = cfs_list_entry(sched->ws_runq.next, - cfs_workitem_t, wi_list); + int nloops = 0; + int rc; + struct cfs_workitem *wi; + + while (!list_empty(&sched->ws_runq) && + nloops < CFS_WI_RESCHED) { + wi = list_entry(sched->ws_runq.next, + struct cfs_workitem, wi_list); LASSERT(wi->wi_scheduled && !wi->wi_running); - cfs_list_del_init(&wi->wi_list); + list_del_init(&wi->wi_list); LASSERT(sched->ws_nscheduled > 0); sched->ws_nscheduled--; @@ -291,139 +241,90 @@ cfs_wi_scheduler (void *arg) wi->wi_running = 1; wi->wi_scheduled = 0; - - cfs_wi_sched_unlock(sched); + spin_unlock(&sched->ws_lock); nloops++; rc = (*wi->wi_action) (wi); - cfs_wi_sched_lock(sched); + spin_lock(&sched->ws_lock); if (rc != 0) /* WI should be dead, even be freed! */ continue; - wi->wi_running = 0; - if (cfs_list_empty(&wi->wi_list)) + wi->wi_running = 0; + if (list_empty(&wi->wi_list)) continue; LASSERT(wi->wi_scheduled); - /* wi is rescheduled, should be on rerunq now, we - * move it to runq so it can run action now */ - cfs_list_move_tail(&wi->wi_list, &sched->ws_runq); + /* wi is rescheduled, should be on rerunq now, we + * move it to runq so it can run action now */ + list_move_tail(&wi->wi_list, &sched->ws_runq); } - if (!cfs_list_empty(&sched->ws_runq)) { - cfs_wi_sched_unlock(sched); - /* don't sleep because some workitems still - * expect me to come back soon */ - cfs_cond_resched(); - cfs_wi_sched_lock(sched); - continue; - } + if (!list_empty(&sched->ws_runq)) { + spin_unlock(&sched->ws_lock); + /* don't sleep because some workitems still + * expect me to come back soon */ + cond_resched(); + spin_lock(&sched->ws_lock); + continue; + } - cfs_wi_sched_unlock(sched); - cfs_wait_event_interruptible_exclusive(sched->ws_waitq, - !cfs_wi_sched_cansleep(sched), rc); - cfs_wi_sched_lock(sched); + spin_unlock(&sched->ws_lock); + rc = wait_event_interruptible_exclusive(sched->ws_waitq, + !cfs_wi_sched_cansleep(sched)); + spin_lock(&sched->ws_lock); } - cfs_wi_sched_unlock(sched); + spin_unlock(&sched->ws_lock); - cfs_spin_lock(&cfs_wi_data.wi_glock); + spin_lock(&cfs_wi_data.wi_glock); sched->ws_nthreads--; - cfs_spin_unlock(&cfs_wi_data.wi_glock); + spin_unlock(&cfs_wi_data.wi_glock); - return 0; -} - -#else /* __KERNEL__ */ - -int -cfs_wi_check_events (void) -{ - int n = 0; - cfs_workitem_t *wi; - - cfs_spin_lock(&cfs_wi_data.wi_glock); - - for (;;) { - struct cfs_wi_sched *sched = NULL; - struct cfs_wi_sched *tmp; - - /** rerunq is always empty for userspace */ - cfs_list_for_each_entry(tmp, - &cfs_wi_data.wi_scheds, ws_list) { - if (!cfs_list_empty(&tmp->ws_runq)) { - sched = tmp; - break; - } - } - - if (sched == NULL) - break; - - wi = cfs_list_entry(sched->ws_runq.next, - cfs_workitem_t, wi_list); - cfs_list_del_init(&wi->wi_list); - - LASSERT(sched->ws_nscheduled > 0); - sched->ws_nscheduled--; - - LASSERT (wi->wi_scheduled); - wi->wi_scheduled = 0; - cfs_spin_unlock(&cfs_wi_data.wi_glock); - - n++; - (*wi->wi_action) (wi); - - cfs_spin_lock(&cfs_wi_data.wi_glock); - } - - cfs_spin_unlock(&cfs_wi_data.wi_glock); - return n; + return 0; } -#endif - void cfs_wi_sched_destroy(struct cfs_wi_sched *sched) { - int i; - LASSERT(cfs_wi_data.wi_init); LASSERT(!cfs_wi_data.wi_stopping); - cfs_spin_lock(&cfs_wi_data.wi_glock); + spin_lock(&cfs_wi_data.wi_glock); if (sched->ws_stopping) { CDEBUG(D_INFO, "%s is in progress of stopping\n", sched->ws_name); - cfs_spin_unlock(&cfs_wi_data.wi_glock); + spin_unlock(&cfs_wi_data.wi_glock); return; } - LASSERT(!cfs_list_empty(&sched->ws_list)); + LASSERT(!list_empty(&sched->ws_list)); sched->ws_stopping = 1; - cfs_spin_unlock(&cfs_wi_data.wi_glock); + spin_unlock(&cfs_wi_data.wi_glock); + + wake_up_all(&sched->ws_waitq); - i = 2; -#ifdef __KERNEL__ - cfs_waitq_broadcast(&sched->ws_waitq); + spin_lock(&cfs_wi_data.wi_glock); + { + int i = 2; - cfs_spin_lock(&cfs_wi_data.wi_glock); - while (sched->ws_nthreads > 0) { - CDEBUG(IS_PO2(++i) ? D_WARNING : D_NET, - "waiting for %d threads of WI sched[%s] to terminate\n", - sched->ws_nthreads, sched->ws_name); + while (sched->ws_nthreads > 0) { + CDEBUG(is_power_of_2(++i / 20) ? D_WARNING : D_NET, + "waiting %us for %d %s worker threads to exit\n", + i / 20, sched->ws_nthreads, sched->ws_name); - cfs_spin_unlock(&cfs_wi_data.wi_glock); - cfs_pause(cfs_time_seconds(1) / 20); - cfs_spin_lock(&cfs_wi_data.wi_glock); + spin_unlock(&cfs_wi_data.wi_glock); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(cfs_time_seconds(1) / 20); + spin_lock(&cfs_wi_data.wi_glock); + } } - cfs_list_del(&sched->ws_list); + list_del(&sched->ws_list); + + spin_unlock(&cfs_wi_data.wi_glock); - cfs_spin_unlock(&cfs_wi_data.wi_glock); -#endif LASSERT(sched->ws_nscheduled == 0); LIBCFS_FREE(sched, sizeof(*sched)); @@ -435,7 +336,6 @@ cfs_wi_sched_create(char *name, struct cfs_cpt_table *cptab, int cpt, int nthrs, struct cfs_wi_sched **sched_pp) { struct cfs_wi_sched *sched; - int rc; LASSERT(cfs_wi_data.wi_init); LASSERT(!cfs_wi_data.wi_stopping); @@ -446,55 +346,68 @@ cfs_wi_sched_create(char *name, struct cfs_cpt_table *cptab, if (sched == NULL) return -ENOMEM; - strncpy(sched->ws_name, name, CFS_WS_NAME_LEN); + if (strlen(name) > sizeof(sched->ws_name)-1) { + LIBCFS_FREE(sched, sizeof(*sched)); + return -E2BIG; + } + strlcpy(sched->ws_name, name, sizeof(sched->ws_name)); + sched->ws_cptab = cptab; sched->ws_cpt = cpt; -#ifdef __KERNEL__ - cfs_spin_lock_init(&sched->ws_lock); - cfs_waitq_init(&sched->ws_waitq); -#endif - CFS_INIT_LIST_HEAD(&sched->ws_runq); - CFS_INIT_LIST_HEAD(&sched->ws_rerunq); - CFS_INIT_LIST_HEAD(&sched->ws_list); - - rc = 0; -#ifdef __KERNEL__ - while (nthrs > 0) { - cfs_spin_lock(&cfs_wi_data.wi_glock); + spin_lock_init(&sched->ws_lock); + init_waitqueue_head(&sched->ws_waitq); + + INIT_LIST_HEAD(&sched->ws_runq); + INIT_LIST_HEAD(&sched->ws_rerunq); + INIT_LIST_HEAD(&sched->ws_list); + + for (; nthrs > 0; nthrs--) { + char name[16]; + struct task_struct *task; + + spin_lock(&cfs_wi_data.wi_glock); while (sched->ws_starting > 0) { - cfs_spin_unlock(&cfs_wi_data.wi_glock); - cfs_schedule(); - cfs_spin_lock(&cfs_wi_data.wi_glock); + spin_unlock(&cfs_wi_data.wi_glock); + schedule(); + spin_lock(&cfs_wi_data.wi_glock); } sched->ws_starting++; - cfs_spin_unlock(&cfs_wi_data.wi_glock); - - rc = cfs_create_thread(cfs_wi_scheduler, sched, 0); - if (rc >= 0) { - nthrs--; - continue; + spin_unlock(&cfs_wi_data.wi_glock); + + if (sched->ws_cptab != NULL && sched->ws_cpt >= 0) { + snprintf(name, sizeof(name), "%s_%02d_%02d", + sched->ws_name, sched->ws_cpt, + sched->ws_nthreads); + } else { + snprintf(name, sizeof(name), "%s_%02d", + sched->ws_name, sched->ws_nthreads); } - CERROR("Failed to create thread for WI scheduler %s: %d\n", - name, rc); + task = kthread_run(cfs_wi_scheduler, sched, name); + if (IS_ERR(task)) { + int rc = PTR_ERR(task); - cfs_spin_lock(&cfs_wi_data.wi_glock); + CERROR("Failed to create thread for " + "WI scheduler %s: %d\n", name, rc); - /* make up for cfs_wi_sched_destroy */ - cfs_list_add(&sched->ws_list, &cfs_wi_data.wi_scheds); - sched->ws_starting--; + spin_lock(&cfs_wi_data.wi_glock); - cfs_spin_unlock(&cfs_wi_data.wi_glock); + /* make up for cfs_wi_sched_destroy */ + list_add(&sched->ws_list, &cfs_wi_data.wi_scheds); + sched->ws_starting--; - cfs_wi_sched_destroy(sched); - return rc; + spin_unlock(&cfs_wi_data.wi_glock); + + cfs_wi_sched_destroy(sched); + return rc; + } } -#endif - cfs_spin_lock(&cfs_wi_data.wi_glock); - cfs_list_add(&sched->ws_list, &cfs_wi_data.wi_scheds); - cfs_spin_unlock(&cfs_wi_data.wi_glock); + + spin_lock(&cfs_wi_data.wi_glock); + list_add(&sched->ws_list, &cfs_wi_data.wi_scheds); + spin_unlock(&cfs_wi_data.wi_glock); *sched_pp = sched; return 0; @@ -504,10 +417,10 @@ EXPORT_SYMBOL(cfs_wi_sched_create); int cfs_wi_startup(void) { - memset(&cfs_wi_data, 0, sizeof(cfs_wi_data)); + memset(&cfs_wi_data, 0, sizeof(struct cfs_workitem_data)); - cfs_spin_lock_init(&cfs_wi_data.wi_glock); - CFS_INIT_LIST_HEAD(&cfs_wi_data.wi_scheds); + spin_lock_init(&cfs_wi_data.wi_glock); + INIT_LIST_HEAD(&cfs_wi_data.wi_scheds); cfs_wi_data.wi_init = 1; return 0; @@ -518,32 +431,32 @@ cfs_wi_shutdown (void) { struct cfs_wi_sched *sched; - cfs_spin_lock(&cfs_wi_data.wi_glock); + spin_lock(&cfs_wi_data.wi_glock); cfs_wi_data.wi_stopping = 1; - cfs_spin_unlock(&cfs_wi_data.wi_glock); + spin_unlock(&cfs_wi_data.wi_glock); -#ifdef __KERNEL__ /* nobody should contend on this list */ - cfs_list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) { + list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) { sched->ws_stopping = 1; - cfs_waitq_broadcast(&sched->ws_waitq); + wake_up_all(&sched->ws_waitq); } - cfs_list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) { - cfs_spin_lock(&cfs_wi_data.wi_glock); + list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) { + spin_lock(&cfs_wi_data.wi_glock); while (sched->ws_nthreads != 0) { - cfs_spin_unlock(&cfs_wi_data.wi_glock); - cfs_pause(cfs_time_seconds(1) / 20); - cfs_spin_lock(&cfs_wi_data.wi_glock); + spin_unlock(&cfs_wi_data.wi_glock); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(cfs_time_seconds(1) / 20); + spin_lock(&cfs_wi_data.wi_glock); } - cfs_spin_unlock(&cfs_wi_data.wi_glock); + spin_unlock(&cfs_wi_data.wi_glock); } -#endif - while (!cfs_list_empty(&cfs_wi_data.wi_scheds)) { - sched = cfs_list_entry(cfs_wi_data.wi_scheds.next, + + while (!list_empty(&cfs_wi_data.wi_scheds)) { + sched = list_entry(cfs_wi_data.wi_scheds.next, struct cfs_wi_sched, ws_list); - cfs_list_del(&sched->ws_list); + list_del(&sched->ws_list); LIBCFS_FREE(sched, sizeof(*sched)); }