EXTRA_DIST = \
bitmap.h \
libcfs.h \
- libcfs_cpu.h \
libcfs_crypto.h \
libcfs_debug.h \
libcfs_fail.h \
libcfs_hash.h \
libcfs_private.h \
- libcfs_string.h \
- libcfs_workitem.h
+ libcfs_string.h
#include <libcfs/libcfs_debug.h>
#include <libcfs/libcfs_private.h>
#include <libcfs/bitmap.h>
-#include <libcfs/libcfs_cpu.h>
#include <libcfs/libcfs_string.h>
-#include <libcfs/libcfs_workitem.h>
#include <libcfs/libcfs_hash.h>
#include <libcfs/libcfs_fail.h>
+++ /dev/null
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- *
- * libcfs/include/libcfs/libcfs_workitem.h
- *
- * Author: Isaac Huang <he.h.huang@oracle.com>
- * Liang Zhen <zhen.liang@sun.com>
- *
- * A workitems is deferred work with these semantics:
- * - a workitem always runs in thread context.
- * - a workitem can be concurrent with other workitems but is strictly
- * serialized with respect to itself.
- * - no CPU affinity, a workitem does not necessarily run on the same CPU
- * that schedules it. However, this might change in the future.
- * - if a workitem is scheduled again before it has a chance to run, it
- * runs only once.
- * - if a workitem is scheduled while it runs, it runs again after it
- * completes; this ensures that events occurring while other events are
- * being processed receive due attention. This behavior also allows a
- * workitem to reschedule itself.
- *
- * Usage notes:
- * - a workitem can sleep but it should be aware of how that sleep might
- * affect others.
- * - a workitem runs inside a kernel thread so there's no user space to access.
- * - do not use a workitem if the scheduling latency can't be tolerated.
- *
- * When wi_action returns non-zero, it means the workitem has either been
- * freed or reused and workitem scheduler won't touch it any more.
- */
-
-#ifndef __LIBCFS_WORKITEM_H__
-#define __LIBCFS_WORKITEM_H__
-
-struct cfs_wi_sched;
-
-void cfs_wi_sched_destroy(struct cfs_wi_sched *);
-int cfs_wi_sched_create(char *name, struct cfs_cpt_table *cptab, int cpt,
- int nthrs, struct cfs_wi_sched **);
-
-struct cfs_workitem;
-
-typedef int (*cfs_wi_action_t) (struct cfs_workitem *);
-
-struct cfs_workitem {
- /** chain on runq or rerunq */
- struct list_head wi_list;
- /** working function */
- cfs_wi_action_t wi_action;
- /** in running */
- unsigned short wi_running:1;
- /** scheduled */
- unsigned short wi_scheduled:1;
-};
-
-static inline void
-cfs_wi_init(struct cfs_workitem *wi, cfs_wi_action_t action)
-{
- INIT_LIST_HEAD(&wi->wi_list);
-
- wi->wi_running = 0;
- wi->wi_scheduled = 0;
- wi->wi_action = action;
-}
-
-void cfs_wi_schedule(struct cfs_wi_sched *sched, struct cfs_workitem *wi);
-int cfs_wi_deschedule(struct cfs_wi_sched *sched, struct cfs_workitem *wi);
-void cfs_wi_exit(struct cfs_wi_sched *sched, struct cfs_workitem *wi);
-
-int cfs_wi_startup(void);
-void cfs_wi_shutdown(void);
-
-/** # workitem scheduler loops before reschedule */
-#define CFS_WI_RESCHED 128
-
-#endif /* __LIBCFS_WORKITEM_H__ */
libcfs-linux-objs := $(addprefix linux/,$(libcfs-linux-objs))
libcfs-crypto-objs := $(addprefix crypto/,$(libcfs-crypto-objs))
-libcfs-objs-$(CONFIG_SMP) = libcfs_cpu.o
libcfs-all-objs := debug.o fail.o module.o tracefile.o \
libcfs_string.o hash.o \
- workitem.o \
- libcfs_mem.o \
- linux-crypto.o linux-crypto-adler.o \
- $(libcfs-objs-y)
+ linux-crypto.o linux-crypto-adler.o
libcfs-objs := $(libcfs-linux-objs) $(libcfs-all-objs)
@LLCRYPT_TRUE@libcfs-objs += $(libcfs-crypto-objs)
MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ linux/*.o libcfs crypto/*.o
EXTRA_DIST := $(libcfs-all-objs:%.o=%.c) tracefile.h \
- workitem.c fail.c libcfs_cpu.c \
- libcfs_mem.c linux-crypto.h
+ fail.c linux-crypto.h
+++ /dev/null
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- *
- * Author: liang@whamcloud.com
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/workqueue.h>
-#include <libcfs/libcfs.h>
-
-struct cfs_var_array {
- unsigned int va_count; /* # of buffers */
- unsigned int va_size; /* size of each var */
- struct cfs_cpt_table *va_cptab; /* cpu partition table */
- void *va_ptrs[0]; /* buffer addresses */
-};
-
-/*
- * free per-cpu data, see more detail in cfs_percpt_free
- */
-void
-cfs_percpt_free(void *vars)
-{
- struct cfs_var_array *arr;
- int i;
-
- arr = container_of(vars, struct cfs_var_array, va_ptrs[0]);
-
- for (i = 0; i < arr->va_count; i++) {
- if (arr->va_ptrs[i] != NULL)
- LIBCFS_FREE(arr->va_ptrs[i], arr->va_size);
- }
-
- LIBCFS_FREE(arr, offsetof(struct cfs_var_array,
- va_ptrs[arr->va_count]));
-}
-EXPORT_SYMBOL(cfs_percpt_free);
-
-/*
- * allocate per cpu-partition variables, returned value is an array of pointers,
- * variable can be indexed by CPU partition ID, i.e:
- *
- * arr = cfs_percpt_alloc(cfs_cpu_pt, size);
- * then caller can access memory block for CPU 0 by arr[0],
- * memory block for CPU 1 by arr[1]...
- * memory block for CPU N by arr[N]...
- *
- * cacheline aligned.
- */
-void *
-cfs_percpt_alloc(struct cfs_cpt_table *cptab, unsigned int size)
-{
- struct cfs_var_array *arr;
- int count;
- int i;
-
- count = cfs_cpt_number(cptab);
-
- LIBCFS_ALLOC(arr, offsetof(struct cfs_var_array, va_ptrs[count]));
- if (arr == NULL)
- return NULL;
-
- arr->va_size = size = L1_CACHE_ALIGN(size);
- arr->va_count = count;
- arr->va_cptab = cptab;
-
- for (i = 0; i < count; i++) {
- LIBCFS_CPT_ALLOC(arr->va_ptrs[i], cptab, i, size);
- if (arr->va_ptrs[i] == NULL) {
- cfs_percpt_free((void *)&arr->va_ptrs[0]);
- return NULL;
- }
- }
-
- return (void *)&arr->va_ptrs[0];
-}
-EXPORT_SYMBOL(cfs_percpt_alloc);
-
-/*
- * return number of CPUs (or number of elements in per-cpu data)
- * according to cptab of @vars
- */
-int
-cfs_percpt_number(void *vars)
-{
- struct cfs_var_array *arr;
-
- arr = container_of(vars, struct cfs_var_array, va_ptrs[0]);
-
- return arr->va_count;
-}
-EXPORT_SYMBOL(cfs_percpt_number);
#include <lustre_crypto.h>
#include "tracefile.h"
+int cpu_npartitions;
+EXPORT_SYMBOL(cpu_npartitions);
+module_param(cpu_npartitions, int, 0444);
+MODULE_PARM_DESC(cpu_npartitions, "# of CPU partitions");
+
+char *cpu_pattern = "N";
+EXPORT_SYMBOL(cpu_pattern);
+module_param(cpu_pattern, charp, 0444);
+MODULE_PARM_DESC(cpu_pattern, "CPU partitions pattern");
+
struct lnet_debugfs_symlink_def {
const char *name;
const char *target;
return len;
}
-static int proc_cpt_table(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- size_t nob = *lenp;
- loff_t pos = *ppos;
- char *buf = NULL;
- int len = 4096;
- int rc = 0;
-
- if (write)
- return -EPERM;
-
- while (1) {
- LIBCFS_ALLOC(buf, len);
- if (buf == NULL)
- return -ENOMEM;
-
- rc = cfs_cpt_table_print(cfs_cpt_tab, buf, len);
- if (rc >= 0)
- break;
-
- if (rc == -EFBIG) {
- LIBCFS_FREE(buf, len);
- len <<= 1;
- continue;
- }
- goto out;
- }
-
- if (pos >= rc) {
- rc = 0;
- goto out;
- }
-
- rc = cfs_trace_copyout_string(buffer, nob, buf + pos, NULL);
-out:
- if (buf != NULL)
- LIBCFS_FREE(buf, len);
- return rc;
-}
-
-static int proc_cpt_distance(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- size_t nob = *lenp;
- loff_t pos = *ppos;
- char *buf = NULL;
- int len = 4096;
- int rc = 0;
-
- if (write)
- return -EPERM;
-
- while (1) {
- LIBCFS_ALLOC(buf, len);
- if (buf == NULL)
- return -ENOMEM;
-
- rc = cfs_cpt_distance_print(cfs_cpt_tab, buf, len);
- if (rc >= 0)
- break;
-
- if (rc == -EFBIG) {
- LIBCFS_FREE(buf, len);
- len <<= 1;
- continue;
- }
- goto out;
- }
-
- if (pos >= rc) {
- rc = 0;
- goto out;
- }
-
- rc = cfs_trace_copyout_string(buffer, nob, buf + pos, NULL);
- out:
- if (buf != NULL)
- LIBCFS_FREE(buf, len);
- return rc;
-}
-
static struct ctl_table lnet_table[] = {
{
.procname = "debug",
.proc_handler = &proc_dobitmasks,
},
{
- .procname = "cpu_partition_table",
- .maxlen = 128,
- .mode = 0444,
- .proc_handler = &proc_cpt_table,
- },
- {
- .procname = "cpu_partition_distance",
- .maxlen = 128,
- .mode = 0444,
- .proc_handler = &proc_cpt_distance,
- },
- {
.procname = "debug_log_upcall",
.data = lnet_debug_log_upcall,
.maxlen = sizeof(lnet_debug_log_upcall),
return (rc);
}
- rc = cfs_cpu_init();
- if (rc != 0)
- goto cleanup_debug;
-
rc = misc_register(&libcfs_dev);
if (rc) {
CERROR("misc_register: error %d\n", rc);
- goto cleanup_cpu;
- }
-
- rc = cfs_wi_startup();
- if (rc) {
- CERROR("initialize workitem: error %d\n", rc);
- goto cleanup_deregister;
+ goto cleanup_debug;
}
cfs_rehash_wq = alloc_workqueue("cfs_rh", WQ_SYSFS, 4);
rc = cfs_crypto_register();
if (rc) {
CERROR("cfs_crypto_regster: error %d\n", rc);
- goto cleanup_wi;
+ goto cleanup_deregister;
}
lnet_insert_debugfs(lnet_table);
return 0;
cleanup_crypto:
cfs_crypto_unregister();
-cleanup_wi:
- cfs_wi_shutdown();
cleanup_deregister:
misc_deregister(&libcfs_dev);
-cleanup_cpu:
- cfs_cpu_fini();
cleanup_debug:
libcfs_debug_cleanup();
return rc;
}
cfs_crypto_unregister();
- cfs_wi_shutdown();
misc_deregister(&libcfs_dev);
- cfs_cpu_fini();
-
/* the below message is checked in test-framework.sh check_mem_leak() */
if (libcfs_kmem_read() != 0)
CERROR("Portals memory leaked: %lld bytes\n",
+++ /dev/null
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2013, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- *
- * libcfs/libcfs/workitem.c
- *
- * Author: Isaac Huang <isaac@clusterfs.com>
- * Liang Zhen <zhen.liang@sun.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/kthread.h>
-#include <libcfs/libcfs.h>
-
-#define CFS_WS_NAME_LEN 16
-
-struct cfs_wi_sched {
- struct list_head ws_list; /* chain on global list */
- /** serialised workitems */
- spinlock_t ws_lock;
- /** where schedulers sleep */
- wait_queue_head_t ws_waitq;
- /** concurrent workitems */
- struct list_head ws_runq;
- /** rescheduled running-workitems, a workitem can be rescheduled
- * while running in wi_action(), but we don't to execute it again
- * unless it returns from wi_action(), so we put it on ws_rerunq
- * while rescheduling, and move it to runq after it returns
- * from wi_action() */
- struct list_head ws_rerunq;
- /** CPT-table for this scheduler */
- struct cfs_cpt_table *ws_cptab;
- /** CPT id for affinity */
- int ws_cpt;
- /** number of scheduled workitems */
- int ws_nscheduled;
- /** started scheduler thread, protected by cfs_wi_data::wi_glock */
- unsigned int ws_nthreads:30;
- /** shutting down, protected by cfs_wi_data::wi_glock */
- unsigned int ws_stopping:1;
- /** serialize starting thread, protected by cfs_wi_data::wi_glock */
- unsigned int ws_starting:1;
- /** scheduler name */
- char ws_name[CFS_WS_NAME_LEN];
-};
-
-static struct cfs_workitem_data {
- /** serialize */
- spinlock_t wi_glock;
- /** list of all schedulers */
- struct list_head wi_scheds;
- /** WI module is initialized */
- int wi_init;
- /** shutting down the whole WI module */
- int wi_stopping;
-} cfs_wi_data;
-
-static inline int
-cfs_wi_sched_cansleep(struct cfs_wi_sched *sched)
-{
- spin_lock(&sched->ws_lock);
- if (sched->ws_stopping) {
- spin_unlock(&sched->ws_lock);
- return 0;
- }
-
- if (!list_empty(&sched->ws_runq)) {
- spin_unlock(&sched->ws_lock);
- return 0;
- }
- spin_unlock(&sched->ws_lock);
- return 1;
-}
-
-/* XXX:
- * 0. it only works when called from wi->wi_action.
- * 1. when it returns no one shall try to schedule the workitem.
- */
-void
-cfs_wi_exit(struct cfs_wi_sched *sched, struct cfs_workitem *wi)
-{
- LASSERT(!in_interrupt()); /* because we use plain spinlock */
- LASSERT(!sched->ws_stopping);
-
- spin_lock(&sched->ws_lock);
-
- LASSERT(wi->wi_running);
-
- if (wi->wi_scheduled) { /* cancel pending schedules */
- LASSERT(!list_empty(&wi->wi_list));
- list_del_init(&wi->wi_list);
-
- LASSERT(sched->ws_nscheduled > 0);
- sched->ws_nscheduled--;
- }
-
- LASSERT(list_empty(&wi->wi_list));
-
- wi->wi_scheduled = 1; /* LBUG future schedule attempts */
- spin_unlock(&sched->ws_lock);
-}
-EXPORT_SYMBOL(cfs_wi_exit);
-
-/**
- * cancel schedule request of workitem \a wi
- */
-int
-cfs_wi_deschedule(struct cfs_wi_sched *sched, struct cfs_workitem *wi)
-{
- int rc;
-
- LASSERT(!in_interrupt()); /* because we use plain spinlock */
- LASSERT(!sched->ws_stopping);
-
- /*
- * return 0 if it's running already, otherwise return 1, which
- * means the workitem will not be scheduled and will not have
- * any race with wi_action.
- */
- spin_lock(&sched->ws_lock);
-
- rc = !(wi->wi_running);
-
- if (wi->wi_scheduled) { /* cancel pending schedules */
- LASSERT(!list_empty(&wi->wi_list));
- list_del_init(&wi->wi_list);
-
- LASSERT(sched->ws_nscheduled > 0);
- sched->ws_nscheduled--;
-
- wi->wi_scheduled = 0;
- }
-
- LASSERT (list_empty(&wi->wi_list));
-
- spin_unlock(&sched->ws_lock);
- return rc;
-}
-EXPORT_SYMBOL(cfs_wi_deschedule);
-
-/*
- * Workitem scheduled with (serial == 1) is strictly serialised not only with
- * itself, but also with others scheduled this way.
- *
- * Now there's only one static serialised queue, but in the future more might
- * be added, and even dynamic creation of serialised queues might be supported.
- */
-void
-cfs_wi_schedule(struct cfs_wi_sched *sched, struct cfs_workitem *wi)
-{
- LASSERT(!in_interrupt()); /* because we use plain spinlock */
- LASSERT(!sched->ws_stopping);
-
- spin_lock(&sched->ws_lock);
-
- if (!wi->wi_scheduled) {
- LASSERT (list_empty(&wi->wi_list));
-
- wi->wi_scheduled = 1;
- sched->ws_nscheduled++;
- if (!wi->wi_running) {
- list_add_tail(&wi->wi_list, &sched->ws_runq);
- wake_up(&sched->ws_waitq);
- } else {
- list_add(&wi->wi_list, &sched->ws_rerunq);
- }
- }
-
- LASSERT (!list_empty(&wi->wi_list));
- spin_unlock(&sched->ws_lock);
-}
-EXPORT_SYMBOL(cfs_wi_schedule);
-
-static int
-cfs_wi_scheduler(void *arg)
-{
- struct cfs_wi_sched *sched = (struct cfs_wi_sched *)arg;
-
- /* CPT affinity scheduler? */
- if (sched->ws_cptab != NULL)
- if (cfs_cpt_bind(sched->ws_cptab, sched->ws_cpt) != 0)
- CWARN("Unable to bind %s on CPU partition %d\n",
- sched->ws_name, sched->ws_cpt);
-
- spin_lock(&cfs_wi_data.wi_glock);
-
- LASSERT(sched->ws_starting == 1);
- sched->ws_starting--;
- sched->ws_nthreads++;
-
- spin_unlock(&cfs_wi_data.wi_glock);
-
- spin_lock(&sched->ws_lock);
-
- while (!sched->ws_stopping) {
- int nloops = 0;
- int rc;
- struct cfs_workitem *wi;
-
- while (!list_empty(&sched->ws_runq) &&
- nloops < CFS_WI_RESCHED) {
- wi = list_entry(sched->ws_runq.next,
- struct cfs_workitem, wi_list);
- LASSERT(wi->wi_scheduled && !wi->wi_running);
-
- list_del_init(&wi->wi_list);
-
- LASSERT(sched->ws_nscheduled > 0);
- sched->ws_nscheduled--;
-
- wi->wi_running = 1;
- wi->wi_scheduled = 0;
-
- spin_unlock(&sched->ws_lock);
- nloops++;
-
- rc = (*wi->wi_action) (wi);
-
- spin_lock(&sched->ws_lock);
- if (rc != 0) /* WI should be dead, even be freed! */
- continue;
-
- wi->wi_running = 0;
- if (list_empty(&wi->wi_list))
- continue;
-
- LASSERT(wi->wi_scheduled);
- /* wi is rescheduled, should be on rerunq now, we
- * move it to runq so it can run action now */
- list_move_tail(&wi->wi_list, &sched->ws_runq);
- }
-
- if (!list_empty(&sched->ws_runq)) {
- spin_unlock(&sched->ws_lock);
- /* don't sleep because some workitems still
- * expect me to come back soon */
- cond_resched();
- spin_lock(&sched->ws_lock);
- continue;
- }
-
- spin_unlock(&sched->ws_lock);
- rc = wait_event_interruptible_exclusive(sched->ws_waitq,
- !cfs_wi_sched_cansleep(sched));
- spin_lock(&sched->ws_lock);
- }
-
- spin_unlock(&sched->ws_lock);
-
- spin_lock(&cfs_wi_data.wi_glock);
- sched->ws_nthreads--;
- spin_unlock(&cfs_wi_data.wi_glock);
-
- return 0;
-}
-
-void
-cfs_wi_sched_destroy(struct cfs_wi_sched *sched)
-{
- LASSERT(cfs_wi_data.wi_init);
- LASSERT(!cfs_wi_data.wi_stopping);
-
- spin_lock(&cfs_wi_data.wi_glock);
- if (sched->ws_stopping) {
- CDEBUG(D_INFO, "%s is in progress of stopping\n",
- sched->ws_name);
- spin_unlock(&cfs_wi_data.wi_glock);
- return;
- }
-
- LASSERT(!list_empty(&sched->ws_list));
- sched->ws_stopping = 1;
-
- spin_unlock(&cfs_wi_data.wi_glock);
-
- wake_up_all(&sched->ws_waitq);
-
- spin_lock(&cfs_wi_data.wi_glock);
- {
- int i = 2;
-
- while (sched->ws_nthreads > 0) {
- CDEBUG(is_power_of_2(++i / 20) ? D_WARNING : D_NET,
- "waiting %us for %d %s worker threads to exit\n",
- i / 20, sched->ws_nthreads, sched->ws_name);
-
- spin_unlock(&cfs_wi_data.wi_glock);
- schedule_timeout_uninterruptible(cfs_time_seconds(1)
- / 20);
- spin_lock(&cfs_wi_data.wi_glock);
- }
- }
-
- list_del(&sched->ws_list);
-
- spin_unlock(&cfs_wi_data.wi_glock);
-
- LASSERT(sched->ws_nscheduled == 0);
-
- LIBCFS_FREE(sched, sizeof(*sched));
-}
-EXPORT_SYMBOL(cfs_wi_sched_destroy);
-
-int
-cfs_wi_sched_create(char *name, struct cfs_cpt_table *cptab,
- int cpt, int nthrs, struct cfs_wi_sched **sched_pp)
-{
- struct cfs_wi_sched *sched;
-
- LASSERT(cfs_wi_data.wi_init);
- LASSERT(!cfs_wi_data.wi_stopping);
- LASSERT(cptab == NULL || cpt == CFS_CPT_ANY ||
- (cpt >= 0 && cpt < cfs_cpt_number(cptab)));
-
- LIBCFS_ALLOC(sched, sizeof(*sched));
- if (sched == NULL)
- return -ENOMEM;
-
- if (strlen(name) > sizeof(sched->ws_name)-1) {
- LIBCFS_FREE(sched, sizeof(*sched));
- return -E2BIG;
- }
- strlcpy(sched->ws_name, name, sizeof(sched->ws_name));
-
- sched->ws_cptab = cptab;
- sched->ws_cpt = cpt;
-
- spin_lock_init(&sched->ws_lock);
- init_waitqueue_head(&sched->ws_waitq);
-
- INIT_LIST_HEAD(&sched->ws_runq);
- INIT_LIST_HEAD(&sched->ws_rerunq);
- INIT_LIST_HEAD(&sched->ws_list);
-
- for (; nthrs > 0; nthrs--) {
- char name[16];
- struct task_struct *task;
-
- spin_lock(&cfs_wi_data.wi_glock);
- while (sched->ws_starting > 0) {
- spin_unlock(&cfs_wi_data.wi_glock);
- schedule();
- spin_lock(&cfs_wi_data.wi_glock);
- }
-
- sched->ws_starting++;
- spin_unlock(&cfs_wi_data.wi_glock);
-
- if (sched->ws_cptab != NULL && sched->ws_cpt >= 0) {
- snprintf(name, sizeof(name), "%s_%02d_%02d",
- sched->ws_name, sched->ws_cpt,
- sched->ws_nthreads);
- } else {
- snprintf(name, sizeof(name), "%s_%02d",
- sched->ws_name, sched->ws_nthreads);
- }
-
- task = kthread_run(cfs_wi_scheduler, sched, "%s", name);
- if (IS_ERR(task)) {
- int rc = PTR_ERR(task);
-
- CERROR("Failed to create thread for "
- "WI scheduler %s: %d\n", name, rc);
-
- spin_lock(&cfs_wi_data.wi_glock);
-
- /* make up for cfs_wi_sched_destroy */
- list_add(&sched->ws_list, &cfs_wi_data.wi_scheds);
- sched->ws_starting--;
-
- spin_unlock(&cfs_wi_data.wi_glock);
-
- cfs_wi_sched_destroy(sched);
- return rc;
- }
- }
-
- spin_lock(&cfs_wi_data.wi_glock);
- list_add(&sched->ws_list, &cfs_wi_data.wi_scheds);
- spin_unlock(&cfs_wi_data.wi_glock);
-
- *sched_pp = sched;
- return 0;
-}
-EXPORT_SYMBOL(cfs_wi_sched_create);
-
-int
-cfs_wi_startup(void)
-{
- memset(&cfs_wi_data, 0, sizeof(struct cfs_workitem_data));
-
- spin_lock_init(&cfs_wi_data.wi_glock);
- INIT_LIST_HEAD(&cfs_wi_data.wi_scheds);
- cfs_wi_data.wi_init = 1;
-
- return 0;
-}
-
-void
-cfs_wi_shutdown (void)
-{
- struct cfs_wi_sched *sched;
-
- spin_lock(&cfs_wi_data.wi_glock);
- cfs_wi_data.wi_stopping = 1;
- spin_unlock(&cfs_wi_data.wi_glock);
-
- /* nobody should contend on this list */
- list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) {
- sched->ws_stopping = 1;
- wake_up_all(&sched->ws_waitq);
- }
-
- list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) {
- spin_lock(&cfs_wi_data.wi_glock);
-
- while (sched->ws_nthreads != 0) {
- spin_unlock(&cfs_wi_data.wi_glock);
- schedule_timeout_uninterruptible(cfs_time_seconds(1)
- / 20);
- spin_lock(&cfs_wi_data.wi_glock);
- }
- spin_unlock(&cfs_wi_data.wi_glock);
- }
-
- while (!list_empty(&cfs_wi_data.wi_scheds)) {
- sched = list_entry(cfs_wi_data.wi_scheds.next,
- struct cfs_wi_sched, ws_list);
- list_del(&sched->ws_list);
- LIBCFS_FREE(sched, sizeof(*sched));
- }
-
- cfs_wi_data.wi_stopping = 0;
- cfs_wi_data.wi_init = 0;
-}
EXTRA_DIST = \
api.h \
+ lib-cpt.h \
lib-lnet.h \
lib-types.h \
udsp.h \
*
* GPL HEADER END
*/
-/*
- * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
*
* Copyright (c) 2012, 2017, Intel Corporation.
*/
-/*
- * This file is part of Lustre, http://www.lustre.org/
- *
- * libcfs/include/libcfs/libcfs_cpu.h
+/* This file is part of Lustre, http://www.lustre.org/
*
* CPU partition
* . CPU partition is virtual processing unit
#endif /* CONFIG_SMP */
+/* Module parameters */
+extern int cpu_npartitions;
+extern char *cpu_pattern;
+
static inline
struct workqueue_struct *cfs_cpt_bind_workqueue(const char *wq_name,
struct cfs_cpt_table *tbl,
return wq;
}
-/*
- * allocate per-cpu-partition data, returned value is an array of pointers,
+/* allocate per-cpu-partition data, returned value is an array of pointers,
* variable can be indexed by CPU ID.
* cptab != NULL: size of array is number of CPU partitions
* cptab == NULL: size of array is number of HW cores
*/
void *cfs_percpt_alloc(struct cfs_cpt_table *cptab, unsigned int size);
-/*
- * destroy per-cpu-partition variable
- */
+/* destroy per-cpu-partition variable */
void cfs_percpt_free(void *vars);
int cfs_percpt_number(void *vars);
#include <libcfs/libcfs.h>
#include <lnet/api.h>
+#include <lnet/lib-cpt.h>
#include <lnet/lib-types.h>
#include <uapi/linux/lnet/lnet-dlc.h>
#include <uapi/linux/lnet/lnet-types.h>
MODULES := lnet
+lnet-objs-$(CONFIG_SMP) = lib-cpt.o
lnet-objs := api-ni.o config.o nidstrings.o lnet_rdma.o lock.o
lnet-objs += lib-me.o lib-msg.o lib-md.o lib-ptl.o
lnet-objs += lib-socket.o lib-move.o module.o lo.o
-lnet-objs += router.o router_proc.o acceptor.o peer.o net_fault.o udsp.o
+lnet-objs += router.o lnet_debugfs.o acceptor.o peer.o net_fault.o udsp.o
+lnet-objs += $(lnet-objs-y)
default: all
endif # MODULES
-EXTRA_DIST := $(lnet-objs:%.o=%.c)
+EXTRA_DIST := $(lnet-objs:%.o=%.c) lib-cpt.c
MOSTLYCLEANFILES = @MOSTLYCLEANFILES@ lnet
*
* GPL HEADER END
*/
-/*
- * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2017, Intel Corporation.
*/
-/*
- * This file is part of Lustre, http://www.lustre.org/
+/* This file is part of Lustre, http://www.lustre.org/
*
- * Please see comments in libcfs/include/libcfs/libcfs_cpu.h for introduction
+ * Please see comments in include/lnet/lib-cpt.h for introduction
*
* Author: liang@whamcloud.com
*/
#include <linux/cpu.h>
#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <libcfs/libcfs_string.h>
#include <libcfs/libcfs.h>
-#include <libcfs/libcfs_cpu.h>
+#include <lnet/lib-cpt.h>
/** virtual processing unit */
struct cfs_cpu_partition {
* 1 : disable multiple partitions
* >1 : specify number of partitions
*/
-static int cpu_npartitions;
module_param(cpu_npartitions, int, 0444);
MODULE_PARM_DESC(cpu_npartitions, "# of CPU partitions");
* modparam for setting CPU partitions patterns:
*
* i.e: "0[0,1,2,3] 1[4,5,6,7]", number before bracket is CPU partition ID,
- * number in bracket is processor ID (core or HT)
+ * number in bracket is processor ID (core or HT)
*
* i.e: "N 0[0,1] 1[2,3]" the first character 'N' means numbers in bracket
- * are NUMA node ID, number before bracket is CPU partition ID.
+ * are NUMA node ID, number before bracket is CPU partition ID.
*
* i.e: "N", shortcut expression to create CPT from NUMA & CPU topology
*
* NB: If user specified cpu_pattern, cpu_npartitions will be ignored
*/
-static char *cpu_pattern = "N";
module_param(cpu_pattern, charp, 0444);
MODULE_PARM_DESC(cpu_pattern, "CPU partitions pattern");
}
EXPORT_SYMBOL(cfs_cpt_distance);
-/*
- * Calculate the maximum NUMA distance between all nodes in the
+/* Calculate the maximum NUMA distance between all nodes in the
* from_mask and all nodes in the to_mask.
*/
static unsigned int cfs_cpt_distance_calculate(nodemask_t *from_mask,
return 0;
}
- /*
- * Allocate scratch buffers
+ /* Allocate scratch buffers
* As we cannot initialize a cpumask_var_t, we need
* to alloc both before we can risk trying to free either
*/
return ERR_PTR(rc);
}
+struct cfs_var_array {
+ unsigned int va_count; /* # of buffers */
+ unsigned int va_size; /* size of each var */
+ struct cfs_cpt_table *va_cptab; /* cpu partition table */
+ void *va_ptrs[0]; /* buffer addresses */
+};
+
+/* free per-cpu data, see more detail in cfs_percpt_free */
+void
+cfs_percpt_free(void *vars)
+{
+ struct cfs_var_array *arr;
+ int i;
+
+ arr = container_of(vars, struct cfs_var_array, va_ptrs[0]);
+
+ for (i = 0; i < arr->va_count; i++) {
+ if (arr->va_ptrs[i])
+ LIBCFS_FREE(arr->va_ptrs[i], arr->va_size);
+ }
+
+ LIBCFS_FREE(arr, offsetof(struct cfs_var_array,
+ va_ptrs[arr->va_count]));
+}
+EXPORT_SYMBOL(cfs_percpt_free);
+
+/* allocate per cpu-partition variables, returned value is an array of pointers,
+ * variable can be indexed by CPU partition ID, i.e:
+ *
+ * arr = cfs_percpt_alloc(cfs_cpu_pt, size);
+ * then caller can access memory block for CPU 0 by arr[0],
+ * memory block for CPU 1 by arr[1]...
+ * memory block for CPU N by arr[N]...
+ *
+ * cacheline aligned.
+ */
+void *
+cfs_percpt_alloc(struct cfs_cpt_table *cptab, unsigned int size)
+{
+ struct cfs_var_array *arr;
+ int count;
+ int i;
+
+ count = cfs_cpt_number(cptab);
+
+ LIBCFS_ALLOC(arr, offsetof(struct cfs_var_array, va_ptrs[count]));
+ if (!arr)
+ return NULL;
+
+ size = L1_CACHE_ALIGN(size);
+ arr->va_size = size;
+ arr->va_count = count;
+ arr->va_cptab = cptab;
+
+ for (i = 0; i < count; i++) {
+ LIBCFS_CPT_ALLOC(arr->va_ptrs[i], cptab, i, size);
+ if (!arr->va_ptrs[i]) {
+ cfs_percpt_free((void *)&arr->va_ptrs[0]);
+ return NULL;
+ }
+ }
+
+ return (void *)&arr->va_ptrs[0];
+}
+EXPORT_SYMBOL(cfs_percpt_alloc);
+
+/* return number of CPUs (or number of elements in per-cpu data)
+ * according to cptab of @vars
+ */
+int
+cfs_percpt_number(void *vars)
+{
+ struct cfs_var_array *arr;
+
+ arr = container_of(vars, struct cfs_var_array, va_ptrs[0]);
+
+ return arr->va_count;
+}
+EXPORT_SYMBOL(cfs_percpt_number);
+
#ifdef CONFIG_HOTPLUG_CPU
#ifdef HAVE_HOTPLUG_STATE_MACHINE
static enum cpuhp_state lustre_cpu_online;
#include <libcfs/libcfs.h>
#include <lnet/lib-lnet.h>
-/* This is really lnet_proc.c. You might need to update sanity test 215
- * if any file format is changed. */
-
#define LNET_LOFFT_BITS (sizeof(loff_t) * 8)
-/*
- * NB: max allowed LNET_CPT_BITS is 8 on 64-bit system and 2 on 32-bit system
+/* NB: max allowed LNET_CPT_BITS is 8 on 64-bit system and 2 on 32-bit system
*/
#define LNET_PROC_CPT_BITS (LNET_CPT_BITS + 1)
/* change version, 16 bits or 8 bits */
clamp_t(int, LNET_LOFFT_BITS / 4, 8, 16)
#define LNET_PROC_HASH_BITS LNET_PEER_HASH_BITS
-/*
- * bits for peer hash offset
+/* bits for peer hash offset
* NB: we don't use the highest bit of *ppos because it's signed
*/
#define LNET_PROC_HOFF_BITS (LNET_LOFFT_BITS - \
#define LNET_PROC_VERSION(v) ((unsigned int)((v) & LNET_PROC_VER_MASK))
+static int proc_cpt_table(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ size_t nob = *lenp;
+ loff_t pos = *ppos;
+ char *buf = NULL;
+ int len = 4096;
+ int rc = 0;
+
+ if (write)
+ return -EPERM;
+
+ while (1) {
+ LIBCFS_ALLOC(buf, len);
+ if (!buf)
+ return -ENOMEM;
+
+ rc = cfs_cpt_table_print(cfs_cpt_tab, buf, len);
+ if (rc >= 0)
+ break;
+
+ if (rc == -EFBIG) {
+ LIBCFS_FREE(buf, len);
+ len <<= 1;
+ continue;
+ }
+ goto out;
+ }
+
+ if (pos >= rc) {
+ rc = 0;
+ goto out;
+ }
+
+ rc = cfs_trace_copyout_string(buffer, nob, buf + pos, NULL);
+out:
+ if (buf)
+ LIBCFS_FREE(buf, len);
+ return rc;
+}
+
+static int proc_cpt_distance(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ size_t nob = *lenp;
+ loff_t pos = *ppos;
+ char *buf = NULL;
+ int len = 4096;
+ int rc = 0;
+
+ if (write)
+ return -EPERM;
+
+ while (1) {
+ LIBCFS_ALLOC(buf, len);
+ if (!buf)
+ return -ENOMEM;
+
+ rc = cfs_cpt_distance_print(cfs_cpt_tab, buf, len);
+ if (rc >= 0)
+ break;
+
+ if (rc == -EFBIG) {
+ LIBCFS_FREE(buf, len);
+ len <<= 1;
+ continue;
+ }
+ goto out;
+ }
+
+ if (pos >= rc) {
+ rc = 0;
+ goto out;
+ }
+
+ rc = cfs_trace_copyout_string(buffer, nob, buf + pos, NULL);
+out:
+ if (buf)
+ LIBCFS_FREE(buf, len);
+ return rc;
+}
+
static int proc_lnet_stats(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
* to go via /proc for portability.
*/
{
+ .procname = "cpu_partition_table",
+ .maxlen = 128,
+ .mode = 0444,
+ .proc_handler = &proc_cpt_table,
+ },
+ {
+ .procname = "cpu_partition_distance",
+ .maxlen = 128,
+ .mode = 0444,
+ .proc_handler = &proc_cpt_distance,
+ },
+ {
.procname = "stats",
.mode = 0644,
.proc_handler = &proc_lnet_stats,
int rc;
ENTRY;
+ rc = cfs_cpu_init();
+ if (rc < 0) {
+ CERROR("cfs_cpu_init: rc = %d\n", rc);
+ RETURN(rc);
+ }
+
rc = lnet_lib_init();
if (rc != 0) {
CERROR("lnet_lib_init: error %d\n", rc);
+ cfs_cpu_fini();
RETURN(rc);
}
LASSERT(rc == 0);
lnet_lib_exit();
+ cfs_cpu_fini();
}
MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
#include <linux/types.h>
#include <libcfs/libcfs.h>
+#include <lnet/lib-cpt.h>
#include <lprocfs_status.h>
#include <lustre_handles.h>