X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fobdclass%2Fclass_obd.c;h=3470ba6632037975773aaabb9ae344107be15881;hb=21ec386df7314348e9d483008648524192b51054;hp=2efee5b7e34d5e8d1f6fd926aa0c022d8d89d126;hpb=0ae0abca98d7af25898c9996464a8ab603fb7f80;p=fs%2Flustre-release.git diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c index 2efee5b..3470ba6 100644 --- a/lustre/obdclass/class_obd.c +++ b/lustre/obdclass/class_obd.c @@ -1,169 +1,190 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: +/* + * GPL HEADER START * - * Object Devices Class Driver + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * - * Copyright (C) 2001-2003 Cluster File Systems, Inc. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. * - * This file is part of Lustre, http://www.lustre.org. + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * GPL HEADER END + */ +/* + * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. * - * These are the only exported functions, they provide some generic - * infrastructure for managing object devices + * Copyright (c) 2011, 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. */ #define DEBUG_SUBSYSTEM S_CLASS -#define EXPORT_SYMTAB -#ifdef __KERNEL__ -#include /* for CONFIG_PROC_FS */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#else +#ifndef __KERNEL__ # include +#else +# include #endif -#include -#include -#include -#include -#include /* for PTL_MD_MAX_IOV */ -#include -#include - -struct semaphore obd_conf_sem; /* serialize configuration commands */ -struct obd_device obd_dev[MAX_OBD_DEVICES]; -struct list_head obd_types; -atomic_t obd_memory; -int obd_memmax; - -/* Root for /proc/lustre */ -struct proc_dir_entry *proc_lustre_root = NULL; -int obd_proc_read_version(char *page, char **start, off_t off, int count, int *eof, void *data); -struct lprocfs_vars lprocfs_version[] = {{"version", obd_proc_read_version, NULL, NULL },{NULL,NULL,NULL,NULL}}; -int proc_version; - -/* The following are visible and mutable through /proc/sys/lustre/. */ -unsigned long obd_fail_loc; -unsigned long obd_timeout = 100; -unsigned long obd_bulk_timeout = 1; -char obd_lustre_upcall[128] = "/usr/lib/lustre/lustre_upcall"; -unsigned long obd_sync_filter; /* = 0, don't sync by default */ - -#ifdef __KERNEL__ -/* opening /dev/obd */ -static int obd_class_open(struct inode * inode, struct file * file) -{ - struct obd_class_user_state *ocus; - ENTRY; - - OBD_ALLOC(ocus, sizeof(*ocus)); - if (ocus == NULL) - return (-ENOMEM); - - INIT_LIST_HEAD(&ocus->ocus_conns); - file->private_data = ocus; - - PORTAL_MODULE_USE; - RETURN(0); -} - -/* closing /dev/obd */ -static int obd_class_release(struct inode * inode, struct file * file) -{ - struct obd_class_user_state *ocus = file->private_data; - struct obd_class_user_conn *c; - ENTRY; - - while (!list_empty (&ocus->ocus_conns)) { - c = list_entry (ocus->ocus_conns.next, - struct obd_class_user_conn, ocuc_chain); - list_del (&c->ocuc_chain); - - CDEBUG (D_IOCTL, "Auto-disconnect %p\n", &c->ocuc_conn); - - down (&obd_conf_sem); - obd_disconnect (&c->ocuc_conn, 0); - up (&obd_conf_sem); - - OBD_FREE (c, sizeof (*c)); - } +#include +#include +#include +#include +#include +#include +#include +#include "llog_internal.h" + +#ifndef __KERNEL__ +/* liblustre workaround */ +cfs_atomic_t libcfs_kmemory = {0}; +#endif - OBD_FREE (ocus, sizeof (*ocus)); +struct obd_device *obd_devs[MAX_OBD_DEVICES]; +EXPORT_SYMBOL(obd_devs); +cfs_list_t obd_types; +DEFINE_RWLOCK(obd_dev_lock); - PORTAL_MODULE_UNUSE; - RETURN(0); -} +__u64 obd_max_pages = 0; +__u64 obd_max_alloc = 0; +#ifndef __KERNEL__ +__u64 obd_alloc; +__u64 obd_pages; #endif +DEFINE_SPINLOCK(obd_updatemax_lock); -static int -obd_class_add_user_conn (struct obd_class_user_state *ocus, - struct lustre_handle *conn) +/* The following are visible and mutable through /proc/sys/lustre/. */ +unsigned int obd_alloc_fail_rate = 0; +EXPORT_SYMBOL(obd_alloc_fail_rate); +unsigned int obd_debug_peer_on_timeout; +EXPORT_SYMBOL(obd_debug_peer_on_timeout); +unsigned int obd_dump_on_timeout; +EXPORT_SYMBOL(obd_dump_on_timeout); +unsigned int obd_dump_on_eviction; +EXPORT_SYMBOL(obd_dump_on_eviction); +unsigned int obd_max_dirty_pages = 256; +EXPORT_SYMBOL(obd_max_dirty_pages); +cfs_atomic_t obd_dirty_pages; +EXPORT_SYMBOL(obd_dirty_pages); +unsigned int obd_timeout = OBD_TIMEOUT_DEFAULT; /* seconds */ +EXPORT_SYMBOL(obd_timeout); +unsigned int ldlm_timeout = LDLM_TIMEOUT_DEFAULT; /* seconds */ +EXPORT_SYMBOL(ldlm_timeout); +unsigned int obd_timeout_set; +EXPORT_SYMBOL(obd_timeout_set); +unsigned int ldlm_timeout_set; +EXPORT_SYMBOL(ldlm_timeout_set); +/* Adaptive timeout defs here instead of ptlrpc module for /proc/sys/ access */ +unsigned int at_min = 0; +EXPORT_SYMBOL(at_min); +unsigned int at_max = 600; +EXPORT_SYMBOL(at_max); +unsigned int at_history = 600; +EXPORT_SYMBOL(at_history); +int at_early_margin = 5; +EXPORT_SYMBOL(at_early_margin); +int at_extra = 30; +EXPORT_SYMBOL(at_extra); + +cfs_atomic_t obd_dirty_transit_pages; +EXPORT_SYMBOL(obd_dirty_transit_pages); + +char obd_jobid_var[JOBSTATS_JOBID_VAR_MAX_LEN + 1] = JOBSTATS_DISABLE; +EXPORT_SYMBOL(obd_jobid_var); + +/* Get jobid of current process by reading the environment variable + * stored in between the "env_start" & "env_end" of task struct. + * + * TODO: + * It's better to cache the jobid for later use if there is any + * efficient way, the cl_env code probably could be reused for this + * purpose. + * + * If some job scheduler doesn't store jobid in the "env_start/end", + * then an upcall could be issued here to get the jobid by utilizing + * the userspace tools/api. Then, the jobid must be cached. + */ +int lustre_get_jobid(char *jobid) { - struct obd_class_user_conn *c; - - /* NB holding obd_conf_sem */ - - OBD_ALLOC (c, sizeof (*c)); - if (ocus == NULL) - return (-ENOMEM); - - c->ocuc_conn = *conn; - list_add (&c->ocuc_chain, &ocus->ocus_conns); - return (0); + int jobid_len = JOBSTATS_JOBID_SIZE; + int rc = 0; + ENTRY; + + memset(jobid, 0, JOBSTATS_JOBID_SIZE); + /* Jobstats isn't enabled */ + if (strcmp(obd_jobid_var, JOBSTATS_DISABLE) == 0) + RETURN(0); + + /* Use process name + fsuid as jobid */ + if (strcmp(obd_jobid_var, JOBSTATS_PROCNAME_UID) == 0) { + snprintf(jobid, JOBSTATS_JOBID_SIZE, "%s.%u", + cfs_curproc_comm(), cfs_curproc_fsuid()); + RETURN(0); + } + + rc = cfs_get_environ(obd_jobid_var, jobid, &jobid_len); + if (rc) { + if (rc == -EOVERFLOW) { + /* For the PBS_JOBID and LOADL_STEP_ID keys (which are + * variable length strings instead of just numbers), it + * might make sense to keep the unique parts for JobID, + * instead of just returning an error. That means a + * larger temp buffer for cfs_get_environ(), then + * truncating the string at some separator to fit into + * the specified jobid_len. Fix later if needed. */ + static bool printed; + if (unlikely(!printed)) { + LCONSOLE_ERROR_MSG(0x16b, "%s value too large " + "for JobID buffer (%d)\n", + obd_jobid_var, jobid_len); + printed = true; + } + } else { + CDEBUG((rc == -ENOENT || rc == -EINVAL || + rc == -EDEADLK) ? D_INFO : D_ERROR, + "Get jobid for (%s) failed: rc = %d\n", + obd_jobid_var, rc); + } + } + RETURN(rc); } +EXPORT_SYMBOL(lustre_get_jobid); -static void -obd_class_remove_user_conn (struct obd_class_user_state *ocus, - struct lustre_handle *conn) +int obd_alloc_fail(const void *ptr, const char *name, const char *type, + size_t size, const char *file, int line) { - struct list_head *e; - struct obd_class_user_conn *c; - - /* NB holding obd_conf_sem or last reference */ - - list_for_each (e, &ocus->ocus_conns) { - c = list_entry (e, struct obd_class_user_conn, ocuc_chain); - if (conn->cookie == c->ocuc_conn.cookie) { - list_del (&c->ocuc_chain); - OBD_FREE (c, sizeof (*c)); - return; - } - } + if (ptr == NULL || + (cfs_rand() & OBD_ALLOC_FAIL_MASK) < obd_alloc_fail_rate) { + CERROR("%s%salloc of %s ("LPU64" bytes) failed at %s:%d\n", + ptr ? "force " :"", type, name, (__u64)size, file, + line); + CERROR(LPU64" total bytes and "LPU64" total pages " + "("LPU64" bytes) allocated by Lustre, " + "%d total bytes by LNET\n", + obd_memory_sum(), + obd_pages_sum() << CFS_PAGE_SHIFT, + obd_pages_sum(), + cfs_atomic_read(&libcfs_kmemory)); + return 1; + } + return 0; } +EXPORT_SYMBOL(obd_alloc_fail); static inline void obd_data2conn(struct lustre_handle *conn, struct obd_ioctl_data *data) @@ -178,123 +199,78 @@ static inline void obd_conn2data(struct obd_ioctl_data *data, data->ioc_cookie = conn->cookie; } -static void dump_exports(struct obd_device *obd) +int class_resolve_dev_name(__u32 len, const char *name) { - struct obd_export *exp, *n; + int rc; + int dev; + + ENTRY; + if (!len || !name) { + CERROR("No name passed,!\n"); + GOTO(out, rc = -EINVAL); + } + if (name[len - 1] != 0) { + CERROR("Name not nul terminated!\n"); + GOTO(out, rc = -EINVAL); + } - list_for_each_entry_safe(exp, n, &obd->obd_exports, exp_obd_chain) { - CERROR("%s: %p %s %d %d %p\n", - obd->obd_name, exp, exp->exp_client_uuid.uuid, - atomic_read(&exp->exp_refcount), - exp->exp_failed, exp->exp_outstanding_reply ); + CDEBUG(D_IOCTL, "device name %s\n", name); + dev = class_name2dev(name); + if (dev == -1) { + CDEBUG(D_IOCTL, "No device for name %s!\n", name); + GOTO(out, rc = -EINVAL); } + + CDEBUG(D_IOCTL, "device name %s, dev %d\n", name, dev); + rc = dev; + +out: + RETURN(rc); } -int class_handle_ioctl(struct obd_class_user_state *ocus, unsigned int cmd, - unsigned long arg) +int class_handle_ioctl(unsigned int cmd, unsigned long arg) { char *buf = NULL; struct obd_ioctl_data *data; - struct portals_debug_ioctl_data *debug_data; - struct obd_device *obd = ocus->ocus_current_obd; - struct lustre_handle conn; - int err = 0, len = 0, serialised = 0; + struct libcfs_debug_ioctl_data *debug_data; + struct obd_device *obd = NULL; + int err = 0, len = 0; ENTRY; - if ((cmd & 0xffffff00) == ((int)'T') << 8) /* ignore all tty ioctls */ - RETURN(err = -ENOTTY); - /* only for debugging */ - if (cmd == PTL_IOC_DEBUG_MASK) { - debug_data = (struct portals_debug_ioctl_data*)arg; - portal_subsystem_debug = debug_data->subs; - portal_debug = debug_data->debug; + if (cmd == LIBCFS_IOC_DEBUG_MASK) { + debug_data = (struct libcfs_debug_ioctl_data*)arg; + libcfs_subsystem_debug = debug_data->subs; + libcfs_debug = debug_data->debug; return 0; } - switch (cmd) { - case OBD_IOC_BRW_WRITE: - case OBD_IOC_BRW_READ: - case OBD_IOC_GETATTR: - case ECHO_IOC_ENQUEUE: - case ECHO_IOC_CANCEL: - break; - default: - down(&obd_conf_sem); - serialised = 1; - break; - } - - CDEBUG(D_IOCTL, "cmd = %x, obd = %p\n", cmd, obd); - if (!obd && cmd != OBD_IOC_DEVICE && - cmd != OBD_IOC_LIST && cmd != OBD_GET_VERSION && - cmd != OBD_IOC_NAME2DEV && cmd != OBD_IOC_UUID2DEV && - cmd != OBD_IOC_NEWDEV && cmd != OBD_IOC_ADD_UUID && - cmd != OBD_IOC_DEL_UUID && cmd != OBD_IOC_CLOSE_UUID) { - CERROR("OBD ioctl: No device\n"); - GOTO(out, err = -EINVAL); - } + CDEBUG(D_IOCTL, "cmd = %x\n", cmd); if (obd_ioctl_getdata(&buf, &len, (void *)arg)) { CERROR("OBD ioctl: data error\n"); - GOTO(out, err = -EINVAL); + RETURN(-EINVAL); } data = (struct obd_ioctl_data *)buf; switch (cmd) { - case OBD_IOC_DEVICE: { - CDEBUG(D_IOCTL, "\n"); - if (data->ioc_dev >= MAX_OBD_DEVICES || data->ioc_dev < 0) { - CERROR("OBD ioctl: DEVICE invalid device %d\n", - data->ioc_dev); - GOTO(out, err = -EINVAL); - } - CDEBUG(D_IOCTL, "device %d\n", data->ioc_dev); - - ocus->ocus_current_obd = &obd_dev[data->ioc_dev]; - GOTO(out, err = 0); - } + case OBD_IOC_PROCESS_CFG: { + struct lustre_cfg *lcfg; - case OBD_IOC_LIST: { - int i; - char *buf2 = data->ioc_bulk; - int remains = data->ioc_inllen1; - - if (!data->ioc_inlbuf1) { - CERROR("No buffer passed!\n"); + if (!data->ioc_plen1 || !data->ioc_pbuf1) { + CERROR("No config buffer passed!\n"); GOTO(out, err = -EINVAL); } - - - for (i = 0 ; i < MAX_OBD_DEVICES ; i++) { - int l; - char *status; - struct obd_device *obd = &obd_dev[i]; - - if (!obd->obd_type) - continue; - if (obd->obd_stopping) - status = "ST"; - else if (obd->obd_set_up) - status = "UP"; - else if (obd->obd_attached) - status = "AT"; - else - status = "-"; - l = snprintf(buf2, remains, "%2d %s %s %s %s %d\n", - i, status, obd->obd_type->typ_name, - obd->obd_name, obd->obd_uuid.uuid, - obd->obd_type->typ_refcnt); - buf2 +=l; - remains -=l; - if (remains <= 0) { - CERROR("not enough space for device listing\n"); - break; - } - } - - err = copy_to_user((void *)arg, data, len); - if (err) - err = -EFAULT; + OBD_ALLOC(lcfg, data->ioc_plen1); + if (lcfg == NULL) + GOTO(out, err = -ENOMEM); + err = cfs_copy_from_user(lcfg, data->ioc_pbuf1, + data->ioc_plen1); + if (!err) + err = lustre_cfg_sanity_check(lcfg, data->ioc_plen1); + if (!err) + err = class_process_config(lcfg); + + OBD_FREE(lcfg, data->ioc_plen1); GOTO(out, err); } @@ -312,7 +288,7 @@ int class_handle_ioctl(struct obd_class_user_state *ocus, unsigned int cmd, memcpy(data->ioc_bulk, BUILD_VERSION, strlen(BUILD_VERSION) + 1); - err = copy_to_user((void *)arg, data, len); + err = obd_ioctl_popdata((void *)arg, data, len); if (err) err = -EFAULT; GOTO(out, err); @@ -323,27 +299,13 @@ int class_handle_ioctl(struct obd_class_user_state *ocus, unsigned int cmd, */ int dev; - if (!data->ioc_inllen1 || !data->ioc_inlbuf1 ) { - CERROR("No name passed,!\n"); - GOTO(out, err = -EINVAL); - } - if (data->ioc_inlbuf1[data->ioc_inllen1 - 1] != 0) { - CERROR("Name not nul terminated!\n"); - GOTO(out, err = -EINVAL); - } - - CDEBUG(D_IOCTL, "device name %s\n", data->ioc_inlbuf1); - dev = class_name2dev(data->ioc_inlbuf1); + dev = class_resolve_dev_name(data->ioc_inllen1, + data->ioc_inlbuf1); data->ioc_dev = dev; - if (dev == -1) { - CDEBUG(D_IOCTL, "No device for name %s!\n", - data->ioc_inlbuf1); + if (dev < 0) GOTO(out, err = -EINVAL); - } - CDEBUG(D_IOCTL, "device name %s, dev %d\n", data->ioc_inlbuf1, - dev); - err = copy_to_user((void *)arg, data, sizeof(*data)); + err = obd_ioctl_popdata((void *)arg, data, sizeof(*data)); if (err) err = -EFAULT; GOTO(out, err); @@ -377,356 +339,97 @@ int class_handle_ioctl(struct obd_class_user_state *ocus, unsigned int cmd, CDEBUG(D_IOCTL, "device name %s, dev %d\n", data->ioc_inlbuf1, dev); - err = copy_to_user((void *)arg, data, sizeof(*data)); + err = obd_ioctl_popdata((void *)arg, data, sizeof(*data)); if (err) err = -EFAULT; GOTO(out, err); } - - - case OBD_IOC_NEWDEV: { - int dev = -1; - int i; - - ocus->ocus_current_obd = NULL; - for (i = 0 ; i < MAX_OBD_DEVICES ; i++) { - struct obd_device *obd = &obd_dev[i]; - if (!obd->obd_type) { - ocus->ocus_current_obd = obd; - dev = i; - break; - } - } - - - data->ioc_dev = dev; - if (dev == -1) - GOTO(out, err = -EINVAL); - - err = copy_to_user((void *)arg, data, sizeof(*data)); - if (err) - err = -EFAULT; - GOTO(out, err); + case OBD_IOC_CLOSE_UUID: { + CDEBUG(D_IOCTL, "closing all connections to uuid %s (NOOP)\n", + data->ioc_inlbuf1); + GOTO(out, err = 0); } - case OBD_IOC_ATTACH: { - struct obd_type *type; - int minor, len; + case OBD_IOC_GETDEVICE: { + int index = data->ioc_count; + char *status, *str; - /* have we attached a type to this device */ - if (obd->obd_attached|| obd->obd_type) { - CERROR("OBD: Device %d already typed as %s.\n", - obd->obd_minor, MKSTR(obd->obd_type->typ_name)); - GOTO(out, err = -EBUSY); - } - - if (!data->ioc_inllen1 || !data->ioc_inlbuf1) { - CERROR("No type passed!\n"); - GOTO(out, err = -EINVAL); - } - if (data->ioc_inlbuf1[data->ioc_inllen1 - 1] != 0) { - CERROR("Type not nul terminated!\n"); - GOTO(out, err = -EINVAL); - } - if (!data->ioc_inllen2 || !data->ioc_inlbuf2) { - CERROR("No name passed!\n"); - GOTO(out, err = -EINVAL); - } - if (data->ioc_inlbuf2[data->ioc_inllen2 - 1] != 0) { - CERROR("Name not nul terminated!\n"); - GOTO(out, err = -EINVAL); - } - if (!data->ioc_inllen3 || !data->ioc_inlbuf3) { - CERROR("No UUID passed!\n"); - GOTO(out, err = -EINVAL); - } - if (data->ioc_inlbuf3[data->ioc_inllen3 - 1] != 0) { - CERROR("UUID not nul terminated!\n"); - GOTO(out, err = -EINVAL); - } - - CDEBUG(D_IOCTL, "attach type %s name: %s uuid: %s\n", - MKSTR(data->ioc_inlbuf1), - MKSTR(data->ioc_inlbuf2), MKSTR(data->ioc_inlbuf3)); - - /* find the type */ - type = class_get_type(data->ioc_inlbuf1); - if (!type) { - CERROR("OBD: unknown type dev %d\n", obd->obd_minor); + if (!data->ioc_inlbuf1) { + CERROR("No buffer passed in ioctl\n"); GOTO(out, err = -EINVAL); } - - minor = obd->obd_minor; - memset(obd, 0, sizeof(*obd)); - obd->obd_minor = minor; - obd->obd_type = type; - INIT_LIST_HEAD(&obd->obd_exports); - obd->obd_num_exports = 0; - INIT_LIST_HEAD(&obd->obd_imports); - spin_lock_init(&obd->obd_dev_lock); - init_waitqueue_head(&obd->obd_refcount_waitq); - - /* XXX belong ins setup not attach */ - /* recovery data */ - spin_lock_init(&obd->obd_processing_task_lock); - init_waitqueue_head(&obd->obd_next_transno_waitq); - INIT_LIST_HEAD(&obd->obd_recovery_queue); - INIT_LIST_HEAD(&obd->obd_delayed_reply_queue); - - init_waitqueue_head(&obd->obd_commit_waitq); - - len = strlen(data->ioc_inlbuf2) + 1; - OBD_ALLOC(obd->obd_name, len); - if (!obd->obd_name) { - class_put_type(obd->obd_type); - obd->obd_type = NULL; - GOTO(out, err = -ENOMEM); - } - memcpy(obd->obd_name, data->ioc_inlbuf2, len); - - len = strlen(data->ioc_inlbuf3); - if (len >= sizeof(obd->obd_uuid)) { - CERROR("uuid must be < "LPSZ" bytes long\n", - sizeof(obd->obd_uuid)); - if (obd->obd_name) - OBD_FREE(obd->obd_name, - strlen(obd->obd_name) + 1); - class_put_type(obd->obd_type); - obd->obd_type = NULL; + if (data->ioc_inllen1 < 128) { + CERROR("ioctl buffer too small to hold version\n"); GOTO(out, err = -EINVAL); } - memcpy(obd->obd_uuid.uuid, data->ioc_inlbuf3, len); - - /* do the attach */ - if (OBP(obd, attach)) - err = OBP(obd,attach)(obd, sizeof(*data), data); - if (err) { - if(data->ioc_inlbuf2) - OBD_FREE(obd->obd_name, - strlen(obd->obd_name) + 1); - class_put_type(obd->obd_type); - obd->obd_type = NULL; - } else { - obd->obd_attached = 1; - - type->typ_refcnt++; - CDEBUG(D_IOCTL, "OBD: dev %d attached type %s\n", - obd->obd_minor, data->ioc_inlbuf1); - } - GOTO(out, err); - } - - case OBD_IOC_DETACH: { - ENTRY; - if (obd->obd_set_up) { - CERROR("OBD device %d still set up\n", obd->obd_minor); - GOTO(out, err = -EBUSY); - } - if (!obd->obd_attached) { - CERROR("OBD device %d not attached\n", obd->obd_minor); - GOTO(out, err = -ENODEV); - } - if (OBP(obd, detach)) - err = OBP(obd,detach)(obd); - - if (obd->obd_name) { - OBD_FREE(obd->obd_name, strlen(obd->obd_name)+1); - obd->obd_name = NULL; - } + obd = class_num2obd(index); + if (!obd) + GOTO(out, err = -ENOENT); + + if (obd->obd_stopping) + status = "ST"; + else if (obd->obd_set_up) + status = "UP"; + else if (obd->obd_attached) + status = "AT"; + else + status = "--"; + str = (char *)data->ioc_bulk; + snprintf(str, len - sizeof(*data), "%3d %s %s %s %s %d", + (int)index, status, obd->obd_type->typ_name, + obd->obd_name, obd->obd_uuid.uuid, + cfs_atomic_read(&obd->obd_refcount)); + err = obd_ioctl_popdata((void *)arg, data, len); - obd->obd_attached = 0; - obd->obd_type->typ_refcnt--; - class_put_type(obd->obd_type); - obd->obd_type = NULL; - memset(obd, 0, sizeof(*obd)); GOTO(out, err = 0); } - case OBD_IOC_SETUP: { - /* have we attached a type to this device? */ - if (!obd->obd_attached) { - CERROR("Device %d not attached\n", obd->obd_minor); - GOTO(out, err = -ENODEV); - } - - /* has this been done already? */ - if (obd->obd_set_up) { - CERROR("Device %d already setup (type %s)\n", - obd->obd_minor, obd->obd_type->typ_name); - GOTO(out, err = -EBUSY); - } - - atomic_set(&obd->obd_refcount, 0); - - if (OBT(obd) && OBP(obd, setup)) - err = obd_setup(obd, sizeof(*data), data); - - if (!err) { - obd->obd_type->typ_refcnt++; - obd->obd_set_up = 1; - atomic_inc(&obd->obd_refcount); - } - - GOTO(out, err); } - case OBD_IOC_CLEANUP: { - int flags = 0; - char *flag; - - if (!obd->obd_set_up) { - CERROR("Device %d not setup\n", obd->obd_minor); - GOTO(out, err = -ENODEV); - } - - if (data->ioc_inlbuf1) { - for (flag = data->ioc_inlbuf1; *flag != 0; flag++) - switch (*flag) { - case 'F': - flags |= OBD_OPT_FORCE; - break; - case 'A': - flags |= OBD_OPT_FAILOVER; - break; - default: - CERROR("unrecognised flag '%c'\n", - *flag); - } - } - - if (atomic_read(&obd->obd_refcount) == 1 || - flags & OBD_OPT_FORCE) { - /* this will stop new connections, and need to - do it before class_disconnect_exports() */ - obd->obd_stopping = 1; - } - - if (atomic_read(&obd->obd_refcount) > 1) { - struct l_wait_info lwi = LWI_TIMEOUT_INTR(60 * HZ, NULL, - NULL, NULL); - int rc; - - if (!(flags & OBD_OPT_FORCE)) { - CERROR("OBD device %d (%p) has refcount %d\n", - obd->obd_minor, obd, - atomic_read(&obd->obd_refcount)); - dump_exports(obd); - GOTO(out, err = -EBUSY); - } - class_disconnect_exports(obd, flags); - CDEBUG(D_IOCTL, - "%s: waiting for obd refs to go away: %d\n", - obd->obd_name, atomic_read(&obd->obd_refcount)); - - rc = l_wait_event(obd->obd_refcount_waitq, - atomic_read(&obd->obd_refcount) < 2, &lwi); - if (rc == 0) { - LASSERT(atomic_read(&obd->obd_refcount) == 1); - } else { - CERROR("wait cancelled cleaning anyway. " - "refcount: %d\n", - atomic_read(&obd->obd_refcount)); - dump_exports(obd); - } - CDEBUG(D_IOCTL, "%s: awake, now finishing cleanup\n", - obd->obd_name); - } - - if (OBT(obd) && OBP(obd, cleanup)) - err = obd_cleanup(obd, flags); - - if (!err) { - obd->obd_set_up = obd->obd_stopping = 0; - obd->obd_type->typ_refcnt--; - atomic_dec(&obd->obd_refcount); - /* XXX this should be an LASSERT */ - if (atomic_read(&obd->obd_refcount) > 0) - CERROR("%s still has refcount %d after " - "cleanup.\n", obd->obd_name, - atomic_read(&obd->obd_refcount)); - } - GOTO(out, err); + if (data->ioc_dev == OBD_DEV_BY_DEVNAME) { + if (data->ioc_inllen4 <= 0 || data->ioc_inlbuf4 == NULL) + GOTO(out, err = -EINVAL); + if (strnlen(data->ioc_inlbuf4, MAX_OBD_NAME) >= MAX_OBD_NAME) + GOTO(out, err = -EINVAL); + obd = class_name2obd(data->ioc_inlbuf4); + } else if (data->ioc_dev < class_devno_max()) { + obd = class_num2obd(data->ioc_dev); + } else { + CERROR("OBD ioctl: No device\n"); + GOTO(out, err = -EINVAL); } - case OBD_IOC_CONNECT: { - struct obd_uuid cluuid = { "OBD_CLASS_UUID" }; - obd_data2conn(&conn, data); - - err = obd_connect(&conn, obd, &cluuid); - - CDEBUG(D_IOCTL, "assigned export "LPX64"\n", conn.cookie); - obd_conn2data(data, &conn); - if (err) - GOTO(out, err); - - err = obd_class_add_user_conn (ocus, &conn); - if (err != 0) { - obd_disconnect (&conn, 0); - GOTO (out, err); - } - - err = copy_to_user((void *)arg, data, sizeof(*data)); - if (err != 0) { - obd_class_remove_user_conn (ocus, &conn); - obd_disconnect (&conn, 0); - GOTO (out, err = -EFAULT); - } - GOTO(out, err); + if (obd == NULL) { + CERROR("OBD ioctl : No Device %d\n", data->ioc_dev); + GOTO(out, err = -EINVAL); } + LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC); - case OBD_IOC_DISCONNECT: { - obd_data2conn(&conn, data); - obd_class_remove_user_conn (ocus, &conn); - err = obd_disconnect(&conn, 0); - GOTO(out, err); + if (!obd->obd_set_up || obd->obd_stopping) { + CERROR("OBD ioctl: device not setup %d \n", data->ioc_dev); + GOTO(out, err = -EINVAL); } + switch(cmd) { case OBD_IOC_NO_TRANSNO: { if (!obd->obd_attached) { CERROR("Device %d not attached\n", obd->obd_minor); GOTO(out, err = -ENODEV); } - CDEBUG(D_IOCTL, - "disabling committed-transno notifications on %d\n", - obd->obd_minor); + CDEBUG(D_HA, "%s: disabling committed-transno notification\n", + obd->obd_name); obd->obd_no_transno = 1; GOTO(out, err = 0); } - case OBD_IOC_CLOSE_UUID: { - struct lustre_peer peer; - CDEBUG(D_IOCTL, "closing all connections to uuid %s\n", - data->ioc_inlbuf1); - lustre_uuid_to_peer(data->ioc_inlbuf1, &peer); - GOTO(out, err = 0); - } - case OBD_IOC_ADD_UUID: { - CDEBUG(D_IOCTL, "adding mapping from uuid %s to nid "LPX64 - ", nal %d\n", data->ioc_inlbuf1, data->ioc_nid, - data->ioc_nal); - - err = class_add_uuid(data->ioc_inlbuf1, data->ioc_nid, - data->ioc_nal); - GOTO(out, err); - } - case OBD_IOC_DEL_UUID: { - CDEBUG(D_IOCTL, "removing mappings for uuid %s\n", - data->ioc_inlbuf1 == NULL ? "" : - data->ioc_inlbuf1); - - err = class_del_uuid(data->ioc_inlbuf1); - GOTO(out, err); - } - default: { - // obd_data2conn(&conn, data); - struct obd_class_user_conn *oconn = list_entry(ocus->ocus_conns.next, struct obd_class_user_conn, ocuc_chain); - err = obd_iocontrol(cmd, &oconn->ocuc_conn, len, data, NULL); + default: { + err = obd_iocontrol(cmd, obd->obd_self_export, len, data, NULL); if (err) GOTO(out, err); - err = copy_to_user((void *)arg, data, len); + err = obd_ioctl_popdata((void *)arg, data, len); if (err) err = -EFAULT; GOTO(out, err); @@ -736,85 +439,91 @@ int class_handle_ioctl(struct obd_class_user_state *ocus, unsigned int cmd, out: if (buf) obd_ioctl_freedata(buf, len); - if (serialised) - up(&obd_conf_sem); RETURN(err); } /* class_handle_ioctl */ - - -#define OBD_MINOR 241 #ifdef __KERNEL__ -/* to control /dev/obd */ -static int obd_class_ioctl(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) +extern cfs_psdev_t obd_psdev; +#else +void *obd_psdev = NULL; +#endif + +#define OBD_INIT_CHECK +#ifdef OBD_INIT_CHECK +int obd_init_checks(void) { - return class_handle_ioctl(filp->private_data, cmd, arg); -} + __u64 u64val, div64val; + char buf[64]; + int len, ret = 0; + + CDEBUG(D_INFO, "LPU64=%s, LPD64=%s, LPX64=%s\n", LPU64, LPD64, LPX64); + + CDEBUG(D_INFO, "OBD_OBJECT_EOF = "LPX64"\n", (__u64)OBD_OBJECT_EOF); + + u64val = OBD_OBJECT_EOF; + CDEBUG(D_INFO, "u64val OBD_OBJECT_EOF = "LPX64"\n", u64val); + if (u64val != OBD_OBJECT_EOF) { + CERROR("__u64 "LPX64"(%d) != 0xffffffffffffffff\n", + u64val, (int)sizeof(u64val)); + ret = -EINVAL; + } + len = snprintf(buf, sizeof(buf), LPX64, u64val); + if (len != 18) { + CWARN("LPX64 wrong length! strlen(%s)=%d != 18\n", buf, len); + ret = -EINVAL; + } -/* declare character device */ -static struct file_operations obd_psdev_fops = { - ioctl: obd_class_ioctl, /* ioctl */ - open: obd_class_open, /* open */ - release: obd_class_release, /* release */ -}; - -/* modules setup */ -static struct miscdevice obd_psdev = { - OBD_MINOR, - "obd_psdev", - &obd_psdev_fops -}; + div64val = OBD_OBJECT_EOF; + CDEBUG(D_INFO, "u64val OBD_OBJECT_EOF = "LPX64"\n", u64val); + if (u64val != OBD_OBJECT_EOF) { + CERROR("__u64 "LPX64"(%d) != 0xffffffffffffffff\n", + u64val, (int)sizeof(u64val)); + ret = -EOVERFLOW; + } + if (u64val >> 8 != OBD_OBJECT_EOF >> 8) { + CERROR("__u64 "LPX64"(%d) != 0xffffffffffffffff\n", + u64val, (int)sizeof(u64val)); + return -EOVERFLOW; + } + if (do_div(div64val, 256) != (u64val & 255)) { + CERROR("do_div("LPX64",256) != "LPU64"\n", u64val, u64val &255); + return -EOVERFLOW; + } + if (u64val >> 8 != div64val) { + CERROR("do_div("LPX64",256) "LPU64" != "LPU64"\n", + u64val, div64val, u64val >> 8); + return -EOVERFLOW; + } + len = snprintf(buf, sizeof(buf), LPX64, u64val); + if (len != 18) { + CWARN("LPX64 wrong length! strlen(%s)=%d != 18\n", buf, len); + ret = -EINVAL; + } + len = snprintf(buf, sizeof(buf), LPU64, u64val); + if (len != 20) { + CWARN("LPU64 wrong length! strlen(%s)=%d != 20\n", buf, len); + ret = -EINVAL; + } + len = snprintf(buf, sizeof(buf), LPD64, u64val); + if (len != 2) { + CWARN("LPD64 wrong length! strlen(%s)=%d != 2\n", buf, len); + ret = -EINVAL; + } + if ((u64val & ~CFS_PAGE_MASK) >= CFS_PAGE_SIZE) { + CWARN("mask failed: u64val "LPU64" >= "LPU64"\n", u64val, + (__u64)CFS_PAGE_SIZE); + ret = -EINVAL; + } + + return ret; +} #else -void *obd_psdev = NULL; +#define obd_init_checks() do {} while(0) #endif -EXPORT_SYMBOL(obd_dev); -EXPORT_SYMBOL(obdo_cachep); -EXPORT_SYMBOL(obd_memory); -EXPORT_SYMBOL(obd_memmax); -EXPORT_SYMBOL(obd_fail_loc); -EXPORT_SYMBOL(obd_timeout); -EXPORT_SYMBOL(obd_lustre_upcall); -EXPORT_SYMBOL(obd_sync_filter); -EXPORT_SYMBOL(ptlrpc_put_connection_superhack); -EXPORT_SYMBOL(ptlrpc_abort_inflight_superhack); -EXPORT_SYMBOL(proc_lustre_root); - -EXPORT_SYMBOL(lctl_fake_uuid); - -EXPORT_SYMBOL(class_register_type); -EXPORT_SYMBOL(class_unregister_type); -EXPORT_SYMBOL(class_get_type); -EXPORT_SYMBOL(class_put_type); -EXPORT_SYMBOL(class_name2dev); -EXPORT_SYMBOL(class_name2obd); -EXPORT_SYMBOL(class_uuid2dev); -EXPORT_SYMBOL(class_uuid2obd); -EXPORT_SYMBOL(class_export_get); -EXPORT_SYMBOL(class_export_put); -EXPORT_SYMBOL(class_new_export); -EXPORT_SYMBOL(class_unlink_export); -EXPORT_SYMBOL(class_import_get); -EXPORT_SYMBOL(class_import_put); -EXPORT_SYMBOL(class_new_import); -EXPORT_SYMBOL(class_destroy_import); -EXPORT_SYMBOL(class_connect); -EXPORT_SYMBOL(class_conn2export); -EXPORT_SYMBOL(class_conn2obd); -EXPORT_SYMBOL(class_conn2cliimp); -EXPORT_SYMBOL(class_conn2ldlmimp); -EXPORT_SYMBOL(class_disconnect); -EXPORT_SYMBOL(class_disconnect_exports); - -/* uuid.c */ -EXPORT_SYMBOL(class_uuid_unparse); -EXPORT_SYMBOL(lustre_uuid_to_peer); -EXPORT_SYMBOL(client_tgtuuid2obd); - -EXPORT_SYMBOL(class_handle_hash); -EXPORT_SYMBOL(class_handle_unhash); -EXPORT_SYMBOL(class_handle2object); +extern spinlock_t obd_types_lock; +extern int class_procfs_init(void); +extern int class_procfs_clean(void); #ifdef __KERNEL__ static int __init init_obdclass(void) @@ -822,116 +531,184 @@ static int __init init_obdclass(void) int init_obdclass(void) #endif { - struct obd_device *obd; - int err; - int i; + int i, err; +#ifdef __KERNEL__ + int lustre_register_fs(void); + + for (i = CAPA_SITE_CLIENT; i < CAPA_SITE_MAX; i++) + CFS_INIT_LIST_HEAD(&capa_list[i]); +#endif - printk(KERN_INFO "OBD class driver Build Version: " BUILD_VERSION - ", info@clusterfs.com\n"); + LCONSOLE_INFO("Lustre: Build Version: "BUILD_VERSION"\n"); + + spin_lock_init(&obd_types_lock); + obd_zombie_impexp_init(); +#ifdef LPROCFS + obd_memory = lprocfs_alloc_stats(OBD_STATS_NUM, + LPROCFS_STATS_FLAG_NONE | + LPROCFS_STATS_FLAG_IRQ_SAFE); + if (obd_memory == NULL) { + CERROR("kmalloc of 'obd_memory' failed\n"); + RETURN(-ENOMEM); + } + + lprocfs_counter_init(obd_memory, OBD_MEMORY_STAT, + LPROCFS_CNTR_AVGMINMAX, + "memused", "bytes"); + lprocfs_counter_init(obd_memory, OBD_MEMORY_PAGES_STAT, + LPROCFS_CNTR_AVGMINMAX, + "pagesused", "pages"); +#endif + err = obd_init_checks(); + if (err == -EOVERFLOW) + return err; class_init_uuidlist(); err = class_handle_init(); if (err) return err; - sema_init(&obd_conf_sem, 1); - INIT_LIST_HEAD(&obd_types); + CFS_INIT_LIST_HEAD(&obd_types); - err = misc_register(&obd_psdev); + err = cfs_psdev_register(&obd_psdev); if (err) { - CERROR("cannot register %d err %d\n", OBD_MINOR, err); + CERROR("cannot register %d err %d\n", OBD_DEV_MINOR, err); return err; } - /* This struct is already zerod for us (static global) */ - for (i = 0, obd = obd_dev; i < MAX_OBD_DEVICES; i++, obd++) - obd->obd_minor = i; + /* This struct is already zeroed for us (static global) */ + for (i = 0; i < class_devno_max(); i++) + obd_devs[i] = NULL; + + /* Default the dirty page cache cap to 1/2 of system memory. + * For clients with less memory, a larger fraction is needed + * for other purposes (mostly for BGL). */ + if (cfs_num_physpages <= 512 << (20 - CFS_PAGE_SHIFT)) + obd_max_dirty_pages = cfs_num_physpages / 4; + else + obd_max_dirty_pages = cfs_num_physpages / 2; err = obd_init_caches(); if (err) return err; - #ifdef __KERNEL__ - obd_sysctl_init(); + err = class_procfs_init(); + if (err) + return err; #endif -#ifdef LPROCFS - proc_lustre_root = proc_mkdir("lustre", proc_root_fs); - if (!proc_lustre_root) - printk(KERN_ERR "error registering /proc/fs/lustre\n"); - proc_version = lprocfs_add_vars(proc_lustre_root,lprocfs_version,NULL); -#else - proc_lustre_root = NULL; - proc_version = -1; + err = lu_global_init(); + if (err) + return err; + + err = llog_info_init(); + if (err) + return err; + +#ifdef __KERNEL__ + err = lustre_register_fs(); #endif - return 0; + + return err; +} + +void obd_update_maxusage(void) +{ + __u64 max1, max2; + + max1 = obd_pages_sum(); + max2 = obd_memory_sum(); + + spin_lock(&obd_updatemax_lock); + if (max1 > obd_max_pages) + obd_max_pages = max1; + if (max2 > obd_max_alloc) + obd_max_alloc = max2; + spin_unlock(&obd_updatemax_lock); } +EXPORT_SYMBOL(obd_update_maxusage); #ifdef LPROCFS -int obd_proc_read_version(char *page, char **start, off_t off, int count, int *eof, void *data) { - *eof = 1; - return snprintf(page, count, "%s\n", BUILD_VERSION); +__u64 obd_memory_max(void) +{ + __u64 ret; + + spin_lock(&obd_updatemax_lock); + ret = obd_max_alloc; + spin_unlock(&obd_updatemax_lock); + + return ret; } -#else -int obd_proc_read_version(char *page, char **start, off_t off, int count, int *eof, void *data) { return 0; } +EXPORT_SYMBOL(obd_memory_max); + +__u64 obd_pages_max(void) +{ + __u64 ret; + + spin_lock(&obd_updatemax_lock); + ret = obd_max_pages; + spin_unlock(&obd_updatemax_lock); + + return ret; +} +EXPORT_SYMBOL(obd_pages_max); #endif +/* liblustre doesn't call cleanup_obdclass, apparently. we carry on in this + * ifdef to the end of the file to cover module and versioning goo.*/ #ifdef __KERNEL__ -static void /*__exit*/ cleanup_obdclass(void) -#else static void cleanup_obdclass(void) -#endif { int i; + int lustre_unregister_fs(void); + __u64 memory_leaked, pages_leaked; + __u64 memory_max, pages_max; ENTRY; - misc_deregister(&obd_psdev); - for (i = 0; i < MAX_OBD_DEVICES; i++) { - struct obd_device *obd = &obd_dev[i]; - if (obd->obd_type && obd->obd_set_up && + lustre_unregister_fs(); + + cfs_psdev_deregister(&obd_psdev); + for (i = 0; i < class_devno_max(); i++) { + struct obd_device *obd = class_num2obd(i); + if (obd && obd->obd_set_up && OBT(obd) && OBP(obd, detach)) { /* XXX should this call generic detach otherwise? */ + LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC); OBP(obd, detach)(obd); } } + llog_info_fini(); + lu_global_fini(); obd_cleanup_caches(); -#ifdef __KERNEL__ obd_sysctl_clean(); -#endif - if (proc_lustre_root) { - lprocfs_remove(proc_lustre_root); - proc_lustre_root = NULL; - } + + class_procfs_clean(); class_handle_cleanup(); class_exit_uuidlist(); + obd_zombie_impexp_stop(); + + memory_leaked = obd_memory_sum(); + pages_leaked = obd_pages_sum(); + + memory_max = obd_memory_max(); + pages_max = obd_pages_max(); + + lprocfs_free_stats(&obd_memory); + CDEBUG((memory_leaked) ? D_ERROR : D_INFO, + "obd_memory max: "LPU64", leaked: "LPU64"\n", + memory_max, memory_leaked); + CDEBUG((pages_leaked) ? D_ERROR : D_INFO, + "obd_memory_pages max: "LPU64", leaked: "LPU64"\n", + pages_max, pages_leaked); - CERROR("obd mem max: %d leaked: %d\n", obd_memmax, - atomic_read(&obd_memory)); EXIT; } -/* Check that we're building against the appropriate version of the Lustre - * kernel patch */ -#ifdef __KERNEL__ -#include -#define LUSTRE_MIN_VERSION 23 -#define LUSTRE_MAX_VERSION 23 -#if (LUSTRE_KERNEL_VERSION < LUSTRE_MIN_VERSION) -# error Cannot continue: Your Lustre kernel patch is older than the sources -#elif (LUSTRE_KERNEL_VERSION > LUSTRE_MAX_VERSION) -# error Cannot continue: Your Lustre sources are older than the kernel patch -#endif - #else -# warning "Lib Lustre - no versioning information" -#endif - -#ifdef __KERNEL__ -MODULE_AUTHOR("Cluster File Systems, Inc. "); +MODULE_AUTHOR("Sun Microsystems, Inc. "); MODULE_DESCRIPTION("Lustre Class Driver Build Version: " BUILD_VERSION); MODULE_LICENSE("GPL"); -module_init(init_obdclass); -module_exit(cleanup_obdclass); +cfs_module(obdclass, LUSTRE_VERSION_STRING, init_obdclass, cleanup_obdclass); #endif