X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fobdclass%2Fclass_obd.c;h=2747e10eff2681fa154a1649c1b548f55b1caf0a;hb=29e98f581ab68ecdfc1b923925eda2ec5b251f3a;hp=5cfbbcb1fc3ac33c3ef2c8a4fc764392b2436fb3;hpb=33b82aa398e5ecd3c29d0a9eb5f5eba73611c6f6;p=fs%2Flustre-release.git diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c index 5cfbbcb..2747e10 100644 --- a/lustre/obdclass/class_obd.c +++ b/lustre/obdclass/class_obd.c @@ -1,6 +1,4 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * +/* * GPL HEADER START * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -26,8 +24,10 @@ * GPL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -35,9 +35,6 @@ */ #define DEBUG_SUBSYSTEM S_CLASS -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif #ifndef __KERNEL__ # include #else @@ -46,70 +43,153 @@ #include #include +#include #include #include #include #include +#include +#ifdef HAVE_SERVER_SUPPORT +# include +# include +#endif /* HAVE_SERVER_SUPPORT */ #include "llog_internal.h" #ifndef __KERNEL__ /* liblustre workaround */ -atomic_t libcfs_kmemory = {0}; +cfs_atomic_t libcfs_kmemory = {0}; #endif struct obd_device *obd_devs[MAX_OBD_DEVICES]; -struct list_head obd_types; -spinlock_t obd_dev_lock = SPIN_LOCK_UNLOCKED; +EXPORT_SYMBOL(obd_devs); +cfs_list_t obd_types; +DEFINE_RWLOCK(obd_dev_lock); -#ifndef __KERNEL__ __u64 obd_max_pages = 0; __u64 obd_max_alloc = 0; +#ifndef __KERNEL__ __u64 obd_alloc; __u64 obd_pages; #endif +DEFINE_SPINLOCK(obd_updatemax_lock); /* The following are visible and mutable through /proc/sys/lustre/. */ +unsigned int obd_alloc_fail_rate = 0; +EXPORT_SYMBOL(obd_alloc_fail_rate); unsigned int obd_debug_peer_on_timeout; +EXPORT_SYMBOL(obd_debug_peer_on_timeout); unsigned int obd_dump_on_timeout; +EXPORT_SYMBOL(obd_dump_on_timeout); unsigned int obd_dump_on_eviction; +EXPORT_SYMBOL(obd_dump_on_eviction); +unsigned int obd_max_dirty_pages = 256; +EXPORT_SYMBOL(obd_max_dirty_pages); +cfs_atomic_t obd_dirty_pages; +EXPORT_SYMBOL(obd_dirty_pages); unsigned int obd_timeout = OBD_TIMEOUT_DEFAULT; /* seconds */ +EXPORT_SYMBOL(obd_timeout); unsigned int ldlm_timeout = LDLM_TIMEOUT_DEFAULT; /* seconds */ -unsigned int obd_max_dirty_pages = 256; -atomic_t obd_dirty_pages; - -cfs_waitq_t obd_race_waitq; -int obd_race_state; - -#ifdef __KERNEL__ -unsigned long obd_print_fail_loc(void) -{ - CWARN("obd_fail_loc = %lx\n", obd_fail_loc); - return obd_fail_loc; -} - -void obd_set_fail_loc(unsigned int fl) -{ - obd_fail_loc = fl; -} - -/* opening /dev/obd */ -static int obd_class_open(unsigned long flags, void *args) +EXPORT_SYMBOL(ldlm_timeout); +unsigned int obd_timeout_set; +EXPORT_SYMBOL(obd_timeout_set); +unsigned int ldlm_timeout_set; +EXPORT_SYMBOL(ldlm_timeout_set); +/* Adaptive timeout defs here instead of ptlrpc module for /proc/sys/ access */ +unsigned int at_min = 0; +EXPORT_SYMBOL(at_min); +unsigned int at_max = 600; +EXPORT_SYMBOL(at_max); +unsigned int at_history = 600; +EXPORT_SYMBOL(at_history); +int at_early_margin = 5; +EXPORT_SYMBOL(at_early_margin); +int at_extra = 30; +EXPORT_SYMBOL(at_extra); + +cfs_atomic_t obd_dirty_transit_pages; +EXPORT_SYMBOL(obd_dirty_transit_pages); + +char obd_jobid_var[JOBSTATS_JOBID_VAR_MAX_LEN + 1] = JOBSTATS_DISABLE; +EXPORT_SYMBOL(obd_jobid_var); + +/* Get jobid of current process by reading the environment variable + * stored in between the "env_start" & "env_end" of task struct. + * + * TODO: + * It's better to cache the jobid for later use if there is any + * efficient way, the cl_env code probably could be reused for this + * purpose. + * + * If some job scheduler doesn't store jobid in the "env_start/end", + * then an upcall could be issued here to get the jobid by utilizing + * the userspace tools/api. Then, the jobid must be cached. + */ +int lustre_get_jobid(char *jobid) { - ENTRY; - - PORTAL_MODULE_USE; - RETURN(0); + int jobid_len = JOBSTATS_JOBID_SIZE; + int rc = 0; + ENTRY; + + memset(jobid, 0, JOBSTATS_JOBID_SIZE); + /* Jobstats isn't enabled */ + if (strcmp(obd_jobid_var, JOBSTATS_DISABLE) == 0) + RETURN(0); + + /* Use process name + fsuid as jobid */ + if (strcmp(obd_jobid_var, JOBSTATS_PROCNAME_UID) == 0) { + snprintf(jobid, JOBSTATS_JOBID_SIZE, "%s.%u", + cfs_curproc_comm(), cfs_curproc_fsuid()); + RETURN(0); + } + + rc = cfs_get_environ(obd_jobid_var, jobid, &jobid_len); + if (rc) { + if (rc == -EOVERFLOW) { + /* For the PBS_JOBID and LOADL_STEP_ID keys (which are + * variable length strings instead of just numbers), it + * might make sense to keep the unique parts for JobID, + * instead of just returning an error. That means a + * larger temp buffer for cfs_get_environ(), then + * truncating the string at some separator to fit into + * the specified jobid_len. Fix later if needed. */ + static bool printed; + if (unlikely(!printed)) { + LCONSOLE_ERROR_MSG(0x16b, "%s value too large " + "for JobID buffer (%d)\n", + obd_jobid_var, jobid_len); + printed = true; + } + } else { + CDEBUG((rc == -ENOENT || rc == -EINVAL || + rc == -EDEADLK) ? D_INFO : D_ERROR, + "Get jobid for (%s) failed: rc = %d\n", + obd_jobid_var, rc); + } + } + RETURN(rc); } +EXPORT_SYMBOL(lustre_get_jobid); -/* closing /dev/obd */ -static int obd_class_release(unsigned long flags, void *args) +int obd_alloc_fail(const void *ptr, const char *name, const char *type, + size_t size, const char *file, int line) { - ENTRY; - - PORTAL_MODULE_UNUSE; - RETURN(0); + if (ptr == NULL || + (cfs_rand() & OBD_ALLOC_FAIL_MASK) < obd_alloc_fail_rate) { + CERROR("%s%salloc of %s ("LPU64" bytes) failed at %s:%d\n", + ptr ? "force " :"", type, name, (__u64)size, file, + line); + CERROR(LPU64" total bytes and "LPU64" total pages " + "("LPU64" bytes) allocated by Lustre, " + "%d total bytes by LNET\n", + obd_memory_sum(), + obd_pages_sum() << CFS_PAGE_SHIFT, + obd_pages_sum(), + cfs_atomic_read(&libcfs_kmemory)); + return 1; + } + return 0; } -#endif +EXPORT_SYMBOL(obd_alloc_fail); static inline void obd_data2conn(struct lustre_handle *conn, struct obd_ioctl_data *data) @@ -173,7 +253,7 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg) CDEBUG(D_IOCTL, "cmd = %x\n", cmd); if (obd_ioctl_getdata(&buf, &len, (void *)arg)) { CERROR("OBD ioctl: data error\n"); - GOTO(out, err = -EINVAL); + RETURN(-EINVAL); } data = (struct obd_ioctl_data *)buf; @@ -188,7 +268,8 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg) OBD_ALLOC(lcfg, data->ioc_plen1); if (lcfg == NULL) GOTO(out, err = -ENOMEM); - err = copy_from_user(lcfg, data->ioc_pbuf1, data->ioc_plen1); + err = cfs_copy_from_user(lcfg, data->ioc_pbuf1, + data->ioc_plen1); if (!err) err = lustre_cfg_sanity_check(lcfg, data->ioc_plen1); if (!err) @@ -304,7 +385,7 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg) snprintf(str, len - sizeof(*data), "%3d %s %s %s %s %d", (int)index, status, obd->obd_type->typ_name, obd->obd_name, obd->obd_uuid.uuid, - atomic_read(&obd->obd_refcount)); + cfs_atomic_read(&obd->obd_refcount)); err = obd_ioctl_popdata((void *)arg, data, len); GOTO(out, err = 0); @@ -312,12 +393,19 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg) } - if (data->ioc_dev >= class_devno_max()) { + if (data->ioc_dev == OBD_DEV_BY_DEVNAME) { + if (data->ioc_inllen4 <= 0 || data->ioc_inlbuf4 == NULL) + GOTO(out, err = -EINVAL); + if (strnlen(data->ioc_inlbuf4, MAX_OBD_NAME) >= MAX_OBD_NAME) + GOTO(out, err = -EINVAL); + obd = class_name2obd(data->ioc_inlbuf4); + } else if (data->ioc_dev < class_devno_max()) { + obd = class_num2obd(data->ioc_dev); + } else { CERROR("OBD ioctl: No device\n"); GOTO(out, err = -EINVAL); } - obd = class_num2obd(data->ioc_dev); if (obd == NULL) { CERROR("OBD ioctl : No Device %d\n", data->ioc_dev); GOTO(out, err = -EINVAL); @@ -359,95 +447,12 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg) RETURN(err); } /* class_handle_ioctl */ - - -#define OBD_MINOR 241 #ifdef __KERNEL__ -/* to control /dev/obd */ -static int obd_class_ioctl (struct cfs_psdev_file *pfile, unsigned long cmd, - void *arg) -{ - return class_handle_ioctl(cmd, (unsigned long)arg); -} - -/* declare character device */ -struct cfs_psdev_ops obd_psdev_ops = { - /* .p_open = */ obd_class_open, /* open */ - /* .p_close = */ obd_class_release, /* release */ - /* .p_read = */ NULL, - /* .p_write = */ NULL, - /* .p_ioctl = */ obd_class_ioctl /* ioctl */ -}; - extern cfs_psdev_t obd_psdev; #else void *obd_psdev = NULL; #endif -EXPORT_SYMBOL(obd_devs); -EXPORT_SYMBOL(obd_print_fail_loc); -EXPORT_SYMBOL(obd_race_waitq); -EXPORT_SYMBOL(obd_race_state); -EXPORT_SYMBOL(obd_debug_peer_on_timeout); -EXPORT_SYMBOL(obd_dump_on_timeout); -EXPORT_SYMBOL(obd_dump_on_eviction); -EXPORT_SYMBOL(obd_timeout); -EXPORT_SYMBOL(ldlm_timeout); -EXPORT_SYMBOL(obd_max_dirty_pages); -EXPORT_SYMBOL(obd_dirty_pages); -EXPORT_SYMBOL(ptlrpc_put_connection_superhack); - -EXPORT_SYMBOL(proc_lustre_root); - -EXPORT_SYMBOL(class_register_type); -EXPORT_SYMBOL(class_unregister_type); -EXPORT_SYMBOL(class_get_type); -EXPORT_SYMBOL(class_put_type); -EXPORT_SYMBOL(class_name2dev); -EXPORT_SYMBOL(class_name2obd); -EXPORT_SYMBOL(class_uuid2dev); -EXPORT_SYMBOL(class_uuid2obd); -EXPORT_SYMBOL(class_find_client_obd); -EXPORT_SYMBOL(class_find_client_notype); -EXPORT_SYMBOL(class_devices_in_group); -EXPORT_SYMBOL(class_conn2export); -EXPORT_SYMBOL(class_exp2obd); -EXPORT_SYMBOL(class_conn2obd); -EXPORT_SYMBOL(class_exp2cliimp); -EXPORT_SYMBOL(class_conn2cliimp); -EXPORT_SYMBOL(class_disconnect); -EXPORT_SYMBOL(class_num2obd); - -/* uuid.c */ -EXPORT_SYMBOL(class_uuid_unparse); -EXPORT_SYMBOL(lustre_uuid_to_peer); - -EXPORT_SYMBOL(class_handle_hash); -EXPORT_SYMBOL(class_handle_unhash); -EXPORT_SYMBOL(class_handle_hash_back); -EXPORT_SYMBOL(class_handle2object); -EXPORT_SYMBOL(class_handle_free_cb); - -/* obd_config.c */ -EXPORT_SYMBOL(class_incref); -EXPORT_SYMBOL(class_decref); -EXPORT_SYMBOL(class_get_profile); -EXPORT_SYMBOL(class_del_profile); -EXPORT_SYMBOL(class_del_profiles); -EXPORT_SYMBOL(class_process_config); -EXPORT_SYMBOL(class_process_proc_param); -EXPORT_SYMBOL(class_config_parse_llog); -EXPORT_SYMBOL(class_config_dump_llog); -EXPORT_SYMBOL(class_attach); -EXPORT_SYMBOL(class_setup); -EXPORT_SYMBOL(class_cleanup); -EXPORT_SYMBOL(class_detach); -EXPORT_SYMBOL(class_manual_cleanup); - -/* mea.c */ -EXPORT_SYMBOL(mea_name2idx); -EXPORT_SYMBOL(raw_name2idx); - #define OBD_INIT_CHECK #ifdef OBD_INIT_CHECK int obd_init_checks(void) @@ -456,8 +461,7 @@ int obd_init_checks(void) char buf[64]; int len, ret = 0; - CDEBUG(D_INFO, "LPU64=%s, LPD64=%s, LPX64=%s, LPSZ=%s, LPSSZ=%s\n", - LPU64, LPD64, LPX64, LPSZ, LPSSZ); + CDEBUG(D_INFO, "LPU64=%s, LPD64=%s, LPX64=%s\n", LPU64, LPD64, LPX64); CDEBUG(D_INFO, "OBD_OBJECT_EOF = "LPX64"\n", (__u64)OBD_OBJECT_EOF); @@ -540,16 +544,14 @@ int init_obdclass(void) CFS_INIT_LIST_HEAD(&capa_list[i]); #endif - LCONSOLE_INFO("OBD class driver, http://www.lustre.org/\n"); - LCONSOLE_INFO(" Lustre Version: "LUSTRE_VERSION_STRING"\n"); - LCONSOLE_INFO(" Build Version: "BUILD_VERSION"\n"); + LCONSOLE_INFO("Lustre: Build Version: "BUILD_VERSION"\n"); - spin_lock_init(&obd_types_lock); - cfs_waitq_init(&obd_race_waitq); + spin_lock_init(&obd_types_lock); obd_zombie_impexp_init(); #ifdef LPROCFS obd_memory = lprocfs_alloc_stats(OBD_STATS_NUM, - LPROCFS_STATS_FLAG_PERCPU); + LPROCFS_STATS_FLAG_NONE | + LPROCFS_STATS_FLAG_IRQ_SAFE); if (obd_memory == NULL) { CERROR("kmalloc of 'obd_memory' failed\n"); RETURN(-ENOMEM); @@ -571,12 +573,11 @@ int init_obdclass(void) if (err) return err; - spin_lock_init(&obd_dev_lock); CFS_INIT_LIST_HEAD(&obd_types); err = cfs_psdev_register(&obd_psdev); if (err) { - CERROR("cannot register %d err %d\n", OBD_MINOR, err); + CERROR("cannot register %d err %d\n", OBD_DEV_MINOR, err); return err; } @@ -587,27 +588,91 @@ int init_obdclass(void) /* Default the dirty page cache cap to 1/2 of system memory. * For clients with less memory, a larger fraction is needed * for other purposes (mostly for BGL). */ - if (num_physpages <= 512 << (20 - CFS_PAGE_SHIFT)) - obd_max_dirty_pages = num_physpages / 4; + if (cfs_num_physpages <= 512 << (20 - CFS_PAGE_SHIFT)) + obd_max_dirty_pages = cfs_num_physpages / 4; else - obd_max_dirty_pages = num_physpages / 2; + obd_max_dirty_pages = cfs_num_physpages / 2; err = obd_init_caches(); if (err) return err; #ifdef __KERNEL__ - err = lu_global_init(); + err = class_procfs_init(); if (err) return err; - err = class_procfs_init(); +#endif + + err = lu_global_init(); if (err) return err; + + err = cl_global_init(); + if (err != 0) + return err; + +#if defined(__KERNEL__) && defined(HAVE_SERVER_SUPPORT) + err = dt_global_init(); + if (err != 0) + return err; + + err = lu_ucred_global_init(); + if (err != 0) + return err; +#endif + + err = llog_info_init(); + if (err) + return err; + +#ifdef __KERNEL__ err = lustre_register_fs(); #endif return err; } +void obd_update_maxusage(void) +{ + __u64 max1, max2; + + max1 = obd_pages_sum(); + max2 = obd_memory_sum(); + + spin_lock(&obd_updatemax_lock); + if (max1 > obd_max_pages) + obd_max_pages = max1; + if (max2 > obd_max_alloc) + obd_max_alloc = max2; + spin_unlock(&obd_updatemax_lock); +} +EXPORT_SYMBOL(obd_update_maxusage); + +#ifdef LPROCFS +__u64 obd_memory_max(void) +{ + __u64 ret; + + spin_lock(&obd_updatemax_lock); + ret = obd_max_alloc; + spin_unlock(&obd_updatemax_lock); + + return ret; +} +EXPORT_SYMBOL(obd_memory_max); + +__u64 obd_pages_max(void) +{ + __u64 ret; + + spin_lock(&obd_updatemax_lock); + ret = obd_max_pages; + spin_unlock(&obd_updatemax_lock); + + return ret; +} +EXPORT_SYMBOL(obd_pages_max); +#endif + /* liblustre doesn't call cleanup_obdclass, apparently. we carry on in this * ifdef to the end of the file to cover module and versioning goo.*/ #ifdef __KERNEL__ @@ -631,6 +696,12 @@ static void cleanup_obdclass(void) OBP(obd, detach)(obd); } } + llog_info_fini(); +#ifdef HAVE_SERVER_SUPPORT + lu_ucred_global_fini(); + dt_global_fini(); +#endif + cl_global_fini(); lu_global_fini(); obd_cleanup_caches();