X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fobdclass%2Fclass_obd.c;h=71b479ae2a92a3456332a72c4b22fdc9aeec28ef;hb=19fff6ccfbd2889bc140fb5a8ee74902f8a35cc0;hp=4f17e42121edcee63fb8cc711a430d9dfbccf014;hpb=c0349df81f9b0857686ecc29b026c8bc523086e6;p=fs%2Flustre-release.git diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c index 4f17e42..71b479a 100644 --- a/lustre/obdclass/class_obd.c +++ b/lustre/obdclass/class_obd.c @@ -1,6 +1,4 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * +/* * GPL HEADER START * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -26,8 +24,10 @@ * GPL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -35,9 +35,6 @@ */ #define DEBUG_SUBSYSTEM S_CLASS -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif #ifndef __KERNEL__ # include #else @@ -46,10 +43,16 @@ #include #include +#include #include #include #include #include +#include +#ifdef HAVE_SERVER_SUPPORT +# include +# include +#endif /* HAVE_SERVER_SUPPORT */ #include "llog_internal.h" #ifndef __KERNEL__ @@ -58,58 +61,142 @@ atomic_t libcfs_kmemory = {0}; #endif struct obd_device *obd_devs[MAX_OBD_DEVICES]; -struct list_head obd_types; -spinlock_t obd_dev_lock = SPIN_LOCK_UNLOCKED; +EXPORT_SYMBOL(obd_devs); +cfs_list_t obd_types; +DEFINE_RWLOCK(obd_dev_lock); -#ifndef __KERNEL__ __u64 obd_max_pages = 0; __u64 obd_max_alloc = 0; +#ifndef __KERNEL__ __u64 obd_alloc; __u64 obd_pages; #endif +DEFINE_SPINLOCK(obd_updatemax_lock); /* The following are visible and mutable through /proc/sys/lustre/. */ +unsigned int obd_alloc_fail_rate = 0; +EXPORT_SYMBOL(obd_alloc_fail_rate); unsigned int obd_debug_peer_on_timeout; +EXPORT_SYMBOL(obd_debug_peer_on_timeout); unsigned int obd_dump_on_timeout; +EXPORT_SYMBOL(obd_dump_on_timeout); unsigned int obd_dump_on_eviction; -unsigned int obd_timeout = OBD_TIMEOUT_DEFAULT; /* seconds */ -unsigned int ldlm_timeout = LDLM_TIMEOUT_DEFAULT; /* seconds */ +EXPORT_SYMBOL(obd_dump_on_eviction); unsigned int obd_max_dirty_pages = 256; +EXPORT_SYMBOL(obd_max_dirty_pages); +atomic_t obd_unstable_pages; +EXPORT_SYMBOL(obd_unstable_pages); atomic_t obd_dirty_pages; +EXPORT_SYMBOL(obd_dirty_pages); +unsigned int obd_timeout = OBD_TIMEOUT_DEFAULT; /* seconds */ +EXPORT_SYMBOL(obd_timeout); +unsigned int ldlm_timeout = LDLM_TIMEOUT_DEFAULT; /* seconds */ +EXPORT_SYMBOL(ldlm_timeout); +unsigned int obd_timeout_set; +EXPORT_SYMBOL(obd_timeout_set); +unsigned int ldlm_timeout_set; +EXPORT_SYMBOL(ldlm_timeout_set); +/* Adaptive timeout defs here instead of ptlrpc module for /proc/sys/ access */ +unsigned int at_min = 0; +EXPORT_SYMBOL(at_min); +unsigned int at_max = 600; +EXPORT_SYMBOL(at_max); +unsigned int at_history = 600; +EXPORT_SYMBOL(at_history); +int at_early_margin = 5; +EXPORT_SYMBOL(at_early_margin); +int at_extra = 30; +EXPORT_SYMBOL(at_extra); + +atomic_t obd_dirty_transit_pages; +EXPORT_SYMBOL(obd_dirty_transit_pages); + +char obd_jobid_var[JOBSTATS_JOBID_VAR_MAX_LEN + 1] = JOBSTATS_DISABLE; +EXPORT_SYMBOL(obd_jobid_var); -cfs_waitq_t obd_race_waitq; -int obd_race_state; - -#ifdef __KERNEL__ -unsigned long obd_print_fail_loc(void) -{ - CWARN("obd_fail_loc = %lx\n", obd_fail_loc); - return obd_fail_loc; -} - -void obd_set_fail_loc(unsigned int fl) -{ - obd_fail_loc = fl; -} +#ifdef LPROCFS +struct lprocfs_stats *obd_memory = NULL; +EXPORT_SYMBOL(obd_memory); +#endif -/* opening /dev/obd */ -static int obd_class_open(unsigned long flags, void *args) +/* Get jobid of current process by reading the environment variable + * stored in between the "env_start" & "env_end" of task struct. + * + * TODO: + * It's better to cache the jobid for later use if there is any + * efficient way, the cl_env code probably could be reused for this + * purpose. + * + * If some job scheduler doesn't store jobid in the "env_start/end", + * then an upcall could be issued here to get the jobid by utilizing + * the userspace tools/api. Then, the jobid must be cached. + */ +int lustre_get_jobid(char *jobid) { - ENTRY; - - PORTAL_MODULE_USE; - RETURN(0); + int jobid_len = JOBSTATS_JOBID_SIZE; + int rc = 0; + ENTRY; + + memset(jobid, 0, JOBSTATS_JOBID_SIZE); + /* Jobstats isn't enabled */ + if (strcmp(obd_jobid_var, JOBSTATS_DISABLE) == 0) + RETURN(0); + + /* Use process name + fsuid as jobid */ + if (strcmp(obd_jobid_var, JOBSTATS_PROCNAME_UID) == 0) { + snprintf(jobid, JOBSTATS_JOBID_SIZE, "%s.%u", + current_comm(), current_fsuid()); + RETURN(0); + } + + rc = cfs_get_environ(obd_jobid_var, jobid, &jobid_len); + if (rc) { + if (rc == -EOVERFLOW) { + /* For the PBS_JOBID and LOADL_STEP_ID keys (which are + * variable length strings instead of just numbers), it + * might make sense to keep the unique parts for JobID, + * instead of just returning an error. That means a + * larger temp buffer for cfs_get_environ(), then + * truncating the string at some separator to fit into + * the specified jobid_len. Fix later if needed. */ + static bool printed; + if (unlikely(!printed)) { + LCONSOLE_ERROR_MSG(0x16b, "%s value too large " + "for JobID buffer (%d)\n", + obd_jobid_var, jobid_len); + printed = true; + } + } else { + CDEBUG((rc == -ENOENT || rc == -EINVAL || + rc == -EDEADLK) ? D_INFO : D_ERROR, + "Get jobid for (%s) failed: rc = %d\n", + obd_jobid_var, rc); + } + } + RETURN(rc); } +EXPORT_SYMBOL(lustre_get_jobid); -/* closing /dev/obd */ -static int obd_class_release(unsigned long flags, void *args) +int obd_alloc_fail(const void *ptr, const char *name, const char *type, + size_t size, const char *file, int line) { - ENTRY; - - PORTAL_MODULE_UNUSE; - RETURN(0); + if (ptr == NULL || + (cfs_rand() & OBD_ALLOC_FAIL_MASK) < obd_alloc_fail_rate) { + CERROR("%s%salloc of %s ("LPU64" bytes) failed at %s:%d\n", + ptr ? "force " :"", type, name, (__u64)size, file, + line); + CERROR(LPU64" total bytes and "LPU64" total pages " + "("LPU64" bytes) allocated by Lustre, " + "%d total bytes by LNET\n", + obd_memory_sum(), + obd_pages_sum() << PAGE_CACHE_SHIFT, + obd_pages_sum(), + atomic_read(&libcfs_kmemory)); + return 1; + } + return 0; } -#endif +EXPORT_SYMBOL(obd_alloc_fail); static inline void obd_data2conn(struct lustre_handle *conn, struct obd_ioctl_data *data) @@ -173,7 +260,7 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg) CDEBUG(D_IOCTL, "cmd = %x\n", cmd); if (obd_ioctl_getdata(&buf, &len, (void *)arg)) { CERROR("OBD ioctl: data error\n"); - GOTO(out, err = -EINVAL); + RETURN(-EINVAL); } data = (struct obd_ioctl_data *)buf; @@ -188,7 +275,8 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg) OBD_ALLOC(lcfg, data->ioc_plen1); if (lcfg == NULL) GOTO(out, err = -ENOMEM); - err = copy_from_user(lcfg, data->ioc_pbuf1, data->ioc_plen1); + err = copy_from_user(lcfg, data->ioc_pbuf1, + data->ioc_plen1); if (!err) err = lustre_cfg_sanity_check(lcfg, data->ioc_plen1); if (!err) @@ -304,7 +392,7 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg) snprintf(str, len - sizeof(*data), "%3d %s %s %s %s %d", (int)index, status, obd->obd_type->typ_name, obd->obd_name, obd->obd_uuid.uuid, - atomic_read(&obd->obd_refcount)); + atomic_read(&obd->obd_refcount)); err = obd_ioctl_popdata((void *)arg, data, len); GOTO(out, err = 0); @@ -312,12 +400,19 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg) } - if (data->ioc_dev >= class_devno_max()) { + if (data->ioc_dev == OBD_DEV_BY_DEVNAME) { + if (data->ioc_inllen4 <= 0 || data->ioc_inlbuf4 == NULL) + GOTO(out, err = -EINVAL); + if (strnlen(data->ioc_inlbuf4, MAX_OBD_NAME) >= MAX_OBD_NAME) + GOTO(out, err = -EINVAL); + obd = class_name2obd(data->ioc_inlbuf4); + } else if (data->ioc_dev < class_devno_max()) { + obd = class_num2obd(data->ioc_dev); + } else { CERROR("OBD ioctl: No device\n"); GOTO(out, err = -EINVAL); } - obd = class_num2obd(data->ioc_dev); if (obd == NULL) { CERROR("OBD ioctl : No Device %d\n", data->ioc_dev); GOTO(out, err = -EINVAL); @@ -359,95 +454,12 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg) RETURN(err); } /* class_handle_ioctl */ - - -#define OBD_MINOR 241 #ifdef __KERNEL__ -/* to control /dev/obd */ -static int obd_class_ioctl (struct cfs_psdev_file *pfile, unsigned long cmd, - void *arg) -{ - return class_handle_ioctl(cmd, (unsigned long)arg); -} - -/* declare character device */ -struct cfs_psdev_ops obd_psdev_ops = { - /* .p_open = */ obd_class_open, /* open */ - /* .p_close = */ obd_class_release, /* release */ - /* .p_read = */ NULL, - /* .p_write = */ NULL, - /* .p_ioctl = */ obd_class_ioctl /* ioctl */ -}; - -extern cfs_psdev_t obd_psdev; +extern struct miscdevice obd_psdev; #else -void *obd_psdev = NULL; +struct miscdevice obd_psdev; #endif -EXPORT_SYMBOL(obd_devs); -EXPORT_SYMBOL(obd_print_fail_loc); -EXPORT_SYMBOL(obd_race_waitq); -EXPORT_SYMBOL(obd_race_state); -EXPORT_SYMBOL(obd_debug_peer_on_timeout); -EXPORT_SYMBOL(obd_dump_on_timeout); -EXPORT_SYMBOL(obd_dump_on_eviction); -EXPORT_SYMBOL(obd_timeout); -EXPORT_SYMBOL(ldlm_timeout); -EXPORT_SYMBOL(obd_max_dirty_pages); -EXPORT_SYMBOL(obd_dirty_pages); -EXPORT_SYMBOL(ptlrpc_put_connection_superhack); - -EXPORT_SYMBOL(proc_lustre_root); - -EXPORT_SYMBOL(class_register_type); -EXPORT_SYMBOL(class_unregister_type); -EXPORT_SYMBOL(class_get_type); -EXPORT_SYMBOL(class_put_type); -EXPORT_SYMBOL(class_name2dev); -EXPORT_SYMBOL(class_name2obd); -EXPORT_SYMBOL(class_uuid2dev); -EXPORT_SYMBOL(class_uuid2obd); -EXPORT_SYMBOL(class_find_client_obd); -EXPORT_SYMBOL(class_find_client_notype); -EXPORT_SYMBOL(class_devices_in_group); -EXPORT_SYMBOL(class_conn2export); -EXPORT_SYMBOL(class_exp2obd); -EXPORT_SYMBOL(class_conn2obd); -EXPORT_SYMBOL(class_exp2cliimp); -EXPORT_SYMBOL(class_conn2cliimp); -EXPORT_SYMBOL(class_disconnect); -EXPORT_SYMBOL(class_num2obd); - -/* uuid.c */ -EXPORT_SYMBOL(class_uuid_unparse); -EXPORT_SYMBOL(lustre_uuid_to_peer); - -EXPORT_SYMBOL(class_handle_hash); -EXPORT_SYMBOL(class_handle_unhash); -EXPORT_SYMBOL(class_handle_hash_back); -EXPORT_SYMBOL(class_handle2object); -EXPORT_SYMBOL(class_handle_free_cb); - -/* obd_config.c */ -EXPORT_SYMBOL(class_incref); -EXPORT_SYMBOL(class_decref); -EXPORT_SYMBOL(class_get_profile); -EXPORT_SYMBOL(class_del_profile); -EXPORT_SYMBOL(class_del_profiles); -EXPORT_SYMBOL(class_process_config); -EXPORT_SYMBOL(class_process_proc_param); -EXPORT_SYMBOL(class_config_parse_llog); -EXPORT_SYMBOL(class_config_dump_llog); -EXPORT_SYMBOL(class_attach); -EXPORT_SYMBOL(class_setup); -EXPORT_SYMBOL(class_cleanup); -EXPORT_SYMBOL(class_detach); -EXPORT_SYMBOL(class_manual_cleanup); - -/* mea.c */ -EXPORT_SYMBOL(mea_name2idx); -EXPORT_SYMBOL(raw_name2idx); - #define OBD_INIT_CHECK #ifdef OBD_INIT_CHECK int obd_init_checks(void) @@ -456,8 +468,7 @@ int obd_init_checks(void) char buf[64]; int len, ret = 0; - CDEBUG(D_INFO, "LPU64=%s, LPD64=%s, LPX64=%s, LPSZ=%s, LPSSZ=%s\n", - LPU64, LPD64, LPX64, LPSZ, LPSSZ); + CDEBUG(D_INFO, "LPU64=%s, LPD64=%s, LPX64=%s\n", LPU64, LPD64, LPX64); CDEBUG(D_INFO, "OBD_OBJECT_EOF = "LPX64"\n", (__u64)OBD_OBJECT_EOF); @@ -510,9 +521,9 @@ int obd_init_checks(void) CWARN("LPD64 wrong length! strlen(%s)=%d != 2\n", buf, len); ret = -EINVAL; } - if ((u64val & ~CFS_PAGE_MASK) >= CFS_PAGE_SIZE) { + if ((u64val & ~CFS_PAGE_MASK) >= PAGE_CACHE_SIZE) { CWARN("mask failed: u64val "LPU64" >= "LPU64"\n", u64val, - (__u64)CFS_PAGE_SIZE); + (__u64)PAGE_CACHE_SIZE); ret = -EINVAL; } @@ -540,16 +551,14 @@ int init_obdclass(void) CFS_INIT_LIST_HEAD(&capa_list[i]); #endif - LCONSOLE_INFO("Lustre: OBD class driver, http://www.lustre.org\n"); - LCONSOLE_INFO(" Lustre Version: "LUSTRE_VERSION_STRING"\n"); - LCONSOLE_INFO(" Build Version: "BUILD_VERSION"\n"); + LCONSOLE_INFO("Lustre: Build Version: "BUILD_VERSION"\n"); - spin_lock_init(&obd_types_lock); - cfs_waitq_init(&obd_race_waitq); + spin_lock_init(&obd_types_lock); obd_zombie_impexp_init(); #ifdef LPROCFS obd_memory = lprocfs_alloc_stats(OBD_STATS_NUM, - LPROCFS_STATS_FLAG_PERCPU); + LPROCFS_STATS_FLAG_NONE | + LPROCFS_STATS_FLAG_IRQ_SAFE); if (obd_memory == NULL) { CERROR("kmalloc of 'obd_memory' failed\n"); RETURN(-ENOMEM); @@ -571,43 +580,110 @@ int init_obdclass(void) if (err) return err; - spin_lock_init(&obd_dev_lock); CFS_INIT_LIST_HEAD(&obd_types); - err = cfs_psdev_register(&obd_psdev); - if (err) { - CERROR("cannot register %d err %d\n", OBD_MINOR, err); - return err; - } + err = misc_register(&obd_psdev); + if (err) { + CERROR("cannot register %d err %d\n", OBD_DEV_MINOR, err); + return err; + } - /* This struct is already zerod for us (static global) */ + /* This struct is already zeroed for us (static global) */ for (i = 0; i < class_devno_max(); i++) obd_devs[i] = NULL; /* Default the dirty page cache cap to 1/2 of system memory. * For clients with less memory, a larger fraction is needed * for other purposes (mostly for BGL). */ - if (num_physpages <= 512 << (20 - CFS_PAGE_SHIFT)) - obd_max_dirty_pages = num_physpages / 4; - else - obd_max_dirty_pages = num_physpages / 2; + if (totalram_pages <= 512 << (20 - PAGE_CACHE_SHIFT)) + obd_max_dirty_pages = totalram_pages / 4; + else + obd_max_dirty_pages = totalram_pages / 2; err = obd_init_caches(); if (err) return err; #ifdef __KERNEL__ - err = lu_global_init(); - if (err) - return err; err = class_procfs_init(); if (err) return err; +#endif + + err = lu_global_init(); + if (err) + return err; + + err = lu_capainfo_init(); + if (err) + return err; + + err = cl_global_init(); + if (err != 0) + return err; + +#if defined(__KERNEL__) && defined(HAVE_SERVER_SUPPORT) + err = dt_global_init(); + if (err != 0) + return err; + + err = lu_ucred_global_init(); + if (err != 0) + return err; +#endif + + err = llog_info_init(); + if (err) + return err; + +#ifdef __KERNEL__ err = lustre_register_fs(); #endif return err; } +void obd_update_maxusage(void) +{ + __u64 max1, max2; + + max1 = obd_pages_sum(); + max2 = obd_memory_sum(); + + spin_lock(&obd_updatemax_lock); + if (max1 > obd_max_pages) + obd_max_pages = max1; + if (max2 > obd_max_alloc) + obd_max_alloc = max2; + spin_unlock(&obd_updatemax_lock); +} +EXPORT_SYMBOL(obd_update_maxusage); + +#ifdef LPROCFS +__u64 obd_memory_max(void) +{ + __u64 ret; + + spin_lock(&obd_updatemax_lock); + ret = obd_max_alloc; + spin_unlock(&obd_updatemax_lock); + + return ret; +} +EXPORT_SYMBOL(obd_memory_max); + +__u64 obd_pages_max(void) +{ + __u64 ret; + + spin_lock(&obd_updatemax_lock); + ret = obd_max_pages; + spin_unlock(&obd_updatemax_lock); + + return ret; +} +EXPORT_SYMBOL(obd_pages_max); +#endif + /* liblustre doesn't call cleanup_obdclass, apparently. we carry on in this * ifdef to the end of the file to cover module and versioning goo.*/ #ifdef __KERNEL__ @@ -621,17 +697,24 @@ static void cleanup_obdclass(void) lustre_unregister_fs(); - cfs_psdev_deregister(&obd_psdev); - for (i = 0; i < class_devno_max(); i++) { - struct obd_device *obd = class_num2obd(i); - if (obd && obd->obd_set_up && - OBT(obd) && OBP(obd, detach)) { - /* XXX should this call generic detach otherwise? */ - LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC); - OBP(obd, detach)(obd); - } - } - lu_global_fini(); + misc_deregister(&obd_psdev); + for (i = 0; i < class_devno_max(); i++) { + struct obd_device *obd = class_num2obd(i); + if (obd && obd->obd_set_up && + OBT(obd) && OBP(obd, detach)) { + /* XXX should this call generic detach otherwise? */ + LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC); + OBP(obd, detach)(obd); + } + } + llog_info_fini(); +#ifdef HAVE_SERVER_SUPPORT + lu_ucred_global_fini(); + dt_global_fini(); +#endif + cl_global_fini(); + lu_capainfo_fini(); + lu_global_fini(); obd_cleanup_caches(); obd_sysctl_clean(); @@ -649,14 +732,12 @@ static void cleanup_obdclass(void) pages_max = obd_pages_max(); lprocfs_free_stats(&obd_memory); - if (memory_leaked > 0) { - CWARN("Memory leaks detected (max "LPU64", leaked "LPD64")\n", - memory_max, memory_leaked); - } - if (pages_leaked > 0) { - CWARN("Page leaks detected (max "LPU64", leaked "LPU64")\n", - pages_max, pages_leaked); - } + CDEBUG((memory_leaked) ? D_ERROR : D_INFO, + "obd_memory max: "LPU64", leaked: "LPU64"\n", + memory_max, memory_leaked); + CDEBUG((pages_leaked) ? D_ERROR : D_INFO, + "obd_memory_pages max: "LPU64", leaked: "LPU64"\n", + pages_max, pages_leaked); EXIT; }