X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fobdclass%2Fgenops.c;h=a74ee1844b542b608693d5e956f12b60d9dad1f2;hp=f31a97ad8339fa661d448f41b1375a510544ac6e;hb=41ab260642f4923106da3567d7c75e1ca3c61ea4;hpb=4d477d1468cf4be4c37681610b3d726fd27f229f diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index f31a97a..a74ee18 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -1,39 +1,67 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * - * Copyright (c) 2001, 2002 Cluster File Systems, Inc. + * GPL HEADER START * - * This file is part of Lustre, http://www.lustre.org. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lustre/obdclass/genops.c * * These are the only exported functions, they provide some generic * infrastructure for managing object devices */ #define DEBUG_SUBSYSTEM S_CLASS -#include /* for request_module() */ -#include -#include -#include -#include -#include +#ifndef __KERNEL__ +#include +#endif +#include +#include +#include +#include extern struct list_head obd_types; -kmem_cache_t *obdo_cachep = NULL; -kmem_cache_t *import_cachep = NULL; -kmem_cache_t *export_cachep = NULL; +spinlock_t obd_types_lock; + +cfs_mem_cache_t *obd_device_cachep; +cfs_mem_cache_t *obdo_cachep; +EXPORT_SYMBOL(obdo_cachep); +cfs_mem_cache_t *import_cachep; + +struct list_head obd_zombie_imports; +struct list_head obd_zombie_exports; +spinlock_t obd_zombie_impexp_lock; +static void obd_zombie_impexp_notify(void); +static void obd_zombie_export_add(struct obd_export *exp); +static void obd_zombie_import_add(struct obd_import *imp); int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c); @@ -41,75 +69,158 @@ int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c); * support functions: we could use inter-module communication, but this * is more portable to other OS's */ -static struct obd_type *class_search_type(char *nm) +static struct obd_device *obd_device_alloc(void) +{ + struct obd_device *obd; + + OBD_SLAB_ALLOC_PTR_GFP(obd, obd_device_cachep, CFS_ALLOC_IO); + if (obd != NULL) { + obd->obd_magic = OBD_DEVICE_MAGIC; + } + return obd; +} +EXPORT_SYMBOL(obd_device_alloc); + +static void obd_device_free(struct obd_device *obd) +{ + LASSERT(obd != NULL); + LASSERTF(obd->obd_magic == OBD_DEVICE_MAGIC, "obd %p obd_magic %08x != %08x\n", + obd, obd->obd_magic, OBD_DEVICE_MAGIC); + if (obd->obd_namespace != NULL) { + CERROR("obd %p: namespace %p was not properly cleaned up (obd_force=%d)!\n", + obd, obd->obd_namespace, obd->obd_force); + LBUG(); + } + lu_ref_fini(&obd->obd_reference); + OBD_SLAB_FREE_PTR(obd, obd_device_cachep); +} + +struct obd_type *class_search_type(const char *name) { struct list_head *tmp; struct obd_type *type; - CDEBUG(D_INFO, "SEARCH %s\n", nm); - tmp = &obd_types; + spin_lock(&obd_types_lock); list_for_each(tmp, &obd_types) { type = list_entry(tmp, struct obd_type, typ_chain); - CDEBUG(D_INFO, "TYP %s\n", type->typ_name); - if (strlen(type->typ_name) == strlen(nm) && - strcmp(type->typ_name, nm) == 0 ) { + if (strcmp(type->typ_name, name) == 0) { + spin_unlock(&obd_types_lock); return type; } } + spin_unlock(&obd_types_lock); return NULL; } -struct obd_type *class_nm_to_type(char *nm) +struct obd_type *class_get_type(const char *name) { - struct obd_type *type = class_search_type(nm); + struct obd_type *type = class_search_type(name); #ifdef CONFIG_KMOD - if ( !type ) { - if ( !request_module(nm) ) { - CDEBUG(D_INFO, "Loaded module '%s'\n", nm); - type = class_search_type(nm); + if (!type) { + const char *modname = name; + if (!request_module(modname)) { + CDEBUG(D_INFO, "Loaded module '%s'\n", modname); + type = class_search_type(name); } else { - CDEBUG(D_INFO, "Can't load module '%s'\n", nm); + LCONSOLE_ERROR_MSG(0x158, "Can't load module '%s'\n", + modname); } } #endif + if (type) { + spin_lock(&type->obd_type_lock); + type->typ_refcnt++; + try_module_get(type->typ_dt_ops->o_owner); + spin_unlock(&type->obd_type_lock); + } return type; } -int class_register_type(struct obd_ops *ops, struct lprocfs_vars *vars, - char *nm) +void class_put_type(struct obd_type *type) { - struct obd_type *type; - int rc; + LASSERT(type); + spin_lock(&type->obd_type_lock); + type->typ_refcnt--; + module_put(type->typ_dt_ops->o_owner); + spin_unlock(&type->obd_type_lock); +} + +#define CLASS_MAX_NAME 1024 +int class_register_type(struct obd_ops *dt_ops, struct md_ops *md_ops, + struct lprocfs_vars *vars, const char *name, + struct lu_device_type *ldt) +{ + struct obd_type *type; + int rc = 0; ENTRY; - if (class_search_type(nm)) { - CDEBUG(D_IOCTL, "Type %s already registered\n", nm); + /* sanity check */ + LASSERT(strnlen(name, CLASS_MAX_NAME) < CLASS_MAX_NAME); + + if (class_search_type(name)) { + CDEBUG(D_IOCTL, "Type %s already registered\n", name); RETURN(-EEXIST); } + rc = -ENOMEM; OBD_ALLOC(type, sizeof(*type)); - OBD_ALLOC(type->typ_ops, sizeof(*type->typ_ops)); - OBD_ALLOC(type->typ_name, strlen(nm) + 1); - if (!type) - RETURN(-ENOMEM); - INIT_LIST_HEAD(&type->typ_chain); - CDEBUG(D_INFO, "MOD_INC_USE for register_type: count = %d\n", - atomic_read(&(THIS_MODULE)->uc.usecount)); - MOD_INC_USE_COUNT; + if (type == NULL) + RETURN(rc); + + OBD_ALLOC_PTR(type->typ_dt_ops); + OBD_ALLOC_PTR(type->typ_md_ops); + OBD_ALLOC(type->typ_name, strlen(name) + 1); + + if (type->typ_dt_ops == NULL || + type->typ_md_ops == NULL || + type->typ_name == NULL) + GOTO (failed, rc); + + *(type->typ_dt_ops) = *dt_ops; + /* md_ops is optional */ + if (md_ops) + *(type->typ_md_ops) = *md_ops; + strcpy(type->typ_name, name); + spin_lock_init(&type->obd_type_lock); + +#ifdef LPROCFS + type->typ_procroot = lprocfs_register(type->typ_name, proc_lustre_root, + vars, type); + if (IS_ERR(type->typ_procroot)) { + rc = PTR_ERR(type->typ_procroot); + type->typ_procroot = NULL; + GOTO (failed, rc); + } +#endif + if (ldt != NULL) { + type->typ_lu = ldt; + rc = lu_device_type_init(ldt); + if (rc != 0) + GOTO (failed, rc); + } + + spin_lock(&obd_types_lock); list_add(&type->typ_chain, &obd_types); - memcpy(type->typ_ops, ops, sizeof(*type->typ_ops)); - strcpy(type->typ_name, nm); - rc = lprocfs_reg_class(type, vars, type); + spin_unlock(&obd_types_lock); + + RETURN (0); + failed: + if (type->typ_name != NULL) + OBD_FREE(type->typ_name, strlen(name) + 1); + if (type->typ_md_ops != NULL) + OBD_FREE_PTR(type->typ_md_ops); + if (type->typ_dt_ops != NULL) + OBD_FREE_PTR(type->typ_dt_ops); + OBD_FREE(type, sizeof(*type)); RETURN(rc); } -int class_unregister_type(char *nm) +int class_unregister_type(const char *name) { - struct obd_type *type = class_nm_to_type(nm); - + struct obd_type *type = class_search_type(name); ENTRY; if (!type) { @@ -118,96 +229,385 @@ int class_unregister_type(char *nm) } if (type->typ_refcnt) { - CERROR("type %s has refcount (%d)\n", nm, type->typ_refcnt); + CERROR("type %s has refcount (%d)\n", name, type->typ_refcnt); /* This is a bad situation, let's make the best of it */ /* Remove ops, but leave the name for debugging */ - OBD_FREE(type->typ_ops, sizeof(*type->typ_ops)); + OBD_FREE_PTR(type->typ_dt_ops); + OBD_FREE_PTR(type->typ_md_ops); RETURN(-EBUSY); } - if(type->typ_procroot) - lprocfs_dereg_class(type); + if (type->typ_procroot) { + lprocfs_remove(&type->typ_procroot); + } + + if (type->typ_lu) + lu_device_type_fini(type->typ_lu); + + spin_lock(&obd_types_lock); list_del(&type->typ_chain); - OBD_FREE(type->typ_name, strlen(nm) + 1); - if (type->typ_ops != NULL) - OBD_FREE(type->typ_ops, sizeof(*type->typ_ops)); + spin_unlock(&obd_types_lock); + OBD_FREE(type->typ_name, strlen(name) + 1); + if (type->typ_dt_ops != NULL) + OBD_FREE_PTR(type->typ_dt_ops); + if (type->typ_md_ops != NULL) + OBD_FREE_PTR(type->typ_md_ops); OBD_FREE(type, sizeof(*type)); - CDEBUG(D_INFO, "MOD_DEC_USE for register_type: count = %d\n", - atomic_read(&(THIS_MODULE)->uc.usecount) - 1); - MOD_DEC_USE_COUNT; RETURN(0); } /* class_unregister_type */ -int class_name2dev(char *name) +/** + * Create a new obd device. + * + * Find an empty slot in ::obd_devs[], create a new obd device in it. + * + * \param typename [in] obd device type string. + * \param name [in] obd device name. + * + * \retval NULL if create fails, otherwise return the obd device + * pointer created. + */ +struct obd_device *class_newdev(const char *type_name, const char *name) +{ + struct obd_device *result = NULL; + struct obd_device *newdev; + struct obd_type *type = NULL; + int i; + int new_obd_minor = 0; + + if (strlen(name) >= MAX_OBD_NAME) { + CERROR("name/uuid must be < %u bytes long\n", MAX_OBD_NAME); + RETURN(ERR_PTR(-EINVAL)); + } + + type = class_get_type(type_name); + if (type == NULL){ + CERROR("OBD: unknown type: %s\n", type_name); + RETURN(ERR_PTR(-ENODEV)); + } + + newdev = obd_device_alloc(); + if (newdev == NULL) { + class_put_type(type); + RETURN(ERR_PTR(-ENOMEM)); + } + LASSERT(newdev->obd_magic == OBD_DEVICE_MAGIC); + + spin_lock(&obd_dev_lock); + for (i = 0; i < class_devno_max(); i++) { + struct obd_device *obd = class_num2obd(i); + if (obd && obd->obd_name && + (strcmp(name, obd->obd_name) == 0)) { + CERROR("Device %s already exists, won't add\n", name); + if (result) { + LASSERTF(result->obd_magic == OBD_DEVICE_MAGIC, + "%p obd_magic %08x != %08x\n", result, + result->obd_magic, OBD_DEVICE_MAGIC); + LASSERTF(result->obd_minor == new_obd_minor, + "%p obd_minor %d != %d\n", result, + result->obd_minor, new_obd_minor); + + obd_devs[result->obd_minor] = NULL; + result->obd_name[0]='\0'; + } + result = ERR_PTR(-EEXIST); + break; + } + if (!result && !obd) { + result = newdev; + result->obd_minor = i; + new_obd_minor = i; + result->obd_type = type; + strncpy(result->obd_name, name, + sizeof(result->obd_name) - 1); + obd_devs[i] = result; + } + } + spin_unlock(&obd_dev_lock); + + if (result == NULL && i >= class_devno_max()) { + CERROR("all %u OBD devices used, increase MAX_OBD_DEVICES\n", + class_devno_max()); + result = ERR_PTR(-EOVERFLOW); + } + + if (IS_ERR(result)) { + obd_device_free(newdev); + class_put_type(type); + } else { + CDEBUG(D_IOCTL, "Adding new device %s (%p)\n", + result->obd_name, result); + } + return result; +} + +void class_release_dev(struct obd_device *obd) +{ + struct obd_type *obd_type = obd->obd_type; + + LASSERTF(obd->obd_magic == OBD_DEVICE_MAGIC, "%p obd_magic %08x != %08x\n", + obd, obd->obd_magic, OBD_DEVICE_MAGIC); + LASSERTF(obd == obd_devs[obd->obd_minor], "obd %p != obd_devs[%d] %p\n", + obd, obd->obd_minor, obd_devs[obd->obd_minor]); + LASSERT(obd_type != NULL); + + CDEBUG(D_INFO, "Release obd device %s obd_type name =%s\n", + obd->obd_name,obd->obd_type->typ_name); + + spin_lock(&obd_dev_lock); + obd_devs[obd->obd_minor] = NULL; + spin_unlock(&obd_dev_lock); + obd_device_free(obd); + + class_put_type(obd_type); +} + +int class_name2dev(const char *name) { - int res = -1; int i; if (!name) return -1; - for (i=0; i < MAX_OBD_DEVICES; i++) { - struct obd_device *obd = &obd_dev[i]; - if (obd->obd_name && strcmp(name, obd->obd_name) == 0) { - res = i; - return res; + spin_lock(&obd_dev_lock); + for (i = 0; i < class_devno_max(); i++) { + struct obd_device *obd = class_num2obd(i); + if (obd && obd->obd_name && strcmp(name, obd->obd_name) == 0) { + /* Make sure we finished attaching before we give + out any references */ + LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC); + if (obd->obd_attached) { + spin_unlock(&obd_dev_lock); + return i; + } + break; } } + spin_unlock(&obd_dev_lock); - return res; + return -1; } -int class_uuid2dev(char *uuid) +struct obd_device *class_name2obd(const char *name) +{ + int dev = class_name2dev(name); + + if (dev < 0 || dev > class_devno_max()) + return NULL; + return class_num2obd(dev); +} + +int class_uuid2dev(struct obd_uuid *uuid) { - int res = -1; int i; - for (i=0; i < MAX_OBD_DEVICES; i++) { - struct obd_device *obd = &obd_dev[i]; - if (strncmp(uuid, obd->obd_uuid, sizeof(obd->obd_uuid)) == 0) { - res = i; - return res; + spin_lock(&obd_dev_lock); + for (i = 0; i < class_devno_max(); i++) { + struct obd_device *obd = class_num2obd(i); + if (obd && obd_uuid_equals(uuid, &obd->obd_uuid)) { + LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC); + spin_unlock(&obd_dev_lock); + return i; } } + spin_unlock(&obd_dev_lock); - return res; + return -1; } +struct obd_device *class_uuid2obd(struct obd_uuid *uuid) +{ + int dev = class_uuid2dev(uuid); + if (dev < 0) + return NULL; + return class_num2obd(dev); +} + +/** + * Get obd device from ::obd_devs[] + * + * \param num [in] array index + * + * \retval NULL if ::obd_devs[\a num] does not contains an obd device + * otherwise return the obd device there. + */ +struct obd_device *class_num2obd(int num) +{ + struct obd_device *obd = NULL; + + if (num < class_devno_max()) { + obd = obd_devs[num]; + if (obd == NULL) + return NULL; + + LASSERTF(obd->obd_magic == OBD_DEVICE_MAGIC, + "%p obd_magic %08x != %08x\n", + obd, obd->obd_magic, OBD_DEVICE_MAGIC); + LASSERTF(obd->obd_minor == num, + "%p obd_minor %0d != %0d\n", + obd, obd->obd_minor, num); + } + + return obd; +} -struct obd_device *class_uuid2obd(char *uuid) +void class_obd_list(void) { + char *status; int i; - for (i=0; i < MAX_OBD_DEVICES; i++) { - struct obd_device *obd = &obd_dev[i]; - if (strncmp(uuid, obd->obd_uuid, sizeof(obd->obd_uuid)) == 0) + spin_lock(&obd_dev_lock); + for (i = 0; i < class_devno_max(); i++) { + struct obd_device *obd = class_num2obd(i); + if (obd == NULL) + continue; + if (obd->obd_stopping) + status = "ST"; + else if (obd->obd_set_up) + status = "UP"; + else if (obd->obd_attached) + status = "AT"; + else + status = "--"; + LCONSOLE(D_CONFIG, "%3d %s %s %s %s %d\n", + i, status, obd->obd_type->typ_name, + obd->obd_name, obd->obd_uuid.uuid, + atomic_read(&obd->obd_refcount)); + } + spin_unlock(&obd_dev_lock); + return; +} + +/* Search for a client OBD connected to tgt_uuid. If grp_uuid is + specified, then only the client with that uuid is returned, + otherwise any client connected to the tgt is returned. */ +struct obd_device * class_find_client_obd(struct obd_uuid *tgt_uuid, + const char * typ_name, + struct obd_uuid *grp_uuid) +{ + int i; + + spin_lock(&obd_dev_lock); + for (i = 0; i < class_devno_max(); i++) { + struct obd_device *obd = class_num2obd(i); + if (obd == NULL) + continue; + if ((strncmp(obd->obd_type->typ_name, typ_name, + strlen(typ_name)) == 0)) { + if (obd_uuid_equals(tgt_uuid, + &obd->u.cli.cl_target_uuid) && + ((grp_uuid)? obd_uuid_equals(grp_uuid, + &obd->obd_uuid) : 1)) { + spin_unlock(&obd_dev_lock); + return obd; + } + } + } + spin_unlock(&obd_dev_lock); + + return NULL; +} + +/* Iterate the obd_device list looking devices have grp_uuid. Start + searching at *next, and if a device is found, the next index to look + at is saved in *next. If next is NULL, then the first matching device + will always be returned. */ +struct obd_device * class_devices_in_group(struct obd_uuid *grp_uuid, int *next) +{ + int i; + + if (next == NULL) + i = 0; + else if (*next >= 0 && *next < class_devno_max()) + i = *next; + else + return NULL; + + spin_lock(&obd_dev_lock); + for (; i < class_devno_max(); i++) { + struct obd_device *obd = class_num2obd(i); + if (obd == NULL) + continue; + if (obd_uuid_equals(grp_uuid, &obd->obd_uuid)) { + if (next != NULL) + *next = i+1; + spin_unlock(&obd_dev_lock); return obd; + } } + spin_unlock(&obd_dev_lock); return NULL; } +/** + * to notify sptlrpc log for @fsname has changed, let every relevant OBD + * adjust sptlrpc settings accordingly. + */ +int class_notify_sptlrpc_conf(const char *fsname, int namelen) +{ + struct obd_device *obd; + const char *type; + int i, rc = 0, rc2; + + LASSERT(namelen > 0); + + spin_lock(&obd_dev_lock); + for (i = 0; i < class_devno_max(); i++) { + obd = class_num2obd(i); + + if (obd == NULL || obd->obd_set_up == 0 || obd->obd_stopping) + continue; + + /* only notify mdc, osc, mdt, ost */ + type = obd->obd_type->typ_name; + if (strcmp(type, LUSTRE_MDC_NAME) != 0 && + strcmp(type, LUSTRE_OSC_NAME) != 0 && + strcmp(type, LUSTRE_MDT_NAME) != 0 && + strcmp(type, LUSTRE_OST_NAME) != 0) + continue; + + if (strncmp(obd->obd_name, fsname, namelen)) + continue; + + class_incref(obd, __FUNCTION__, obd); + spin_unlock(&obd_dev_lock); + rc2 = obd_set_info_async(obd->obd_self_export, + sizeof(KEY_SPTLRPC_CONF), + KEY_SPTLRPC_CONF, 0, NULL, NULL); + rc = rc ? rc : rc2; + class_decref(obd, __FUNCTION__, obd); + spin_lock(&obd_dev_lock); + } + spin_unlock(&obd_dev_lock); + return rc; +} +EXPORT_SYMBOL(class_notify_sptlrpc_conf); + void obd_cleanup_caches(void) { int rc; + ENTRY; + if (obd_device_cachep) { + rc = cfs_mem_cache_destroy(obd_device_cachep); + LASSERTF(rc == 0, "Cannot destropy ll_obd_device_cache: rc %d\n", rc); + obd_device_cachep = NULL; + } if (obdo_cachep) { - rc = kmem_cache_destroy(obdo_cachep); - if (rc) - CERROR("Cannot destory ll_obdo_cache\n"); + rc = cfs_mem_cache_destroy(obdo_cachep); + LASSERTF(rc == 0, "Cannot destory ll_obdo_cache\n"); obdo_cachep = NULL; } if (import_cachep) { - rc = kmem_cache_destroy(import_cachep); - if (rc) - CERROR("Cannot destory ll_import_cache\n"); + rc = cfs_mem_cache_destroy(import_cachep); + LASSERTF(rc == 0, "Cannot destory ll_import_cache\n"); import_cachep = NULL; } - if (export_cachep) { - rc = kmem_cache_destroy(export_cachep); - if (rc) - CERROR("Cannot destory ll_export_cache\n"); - export_cachep = NULL; + if (capa_cachep) { + rc = cfs_mem_cache_destroy(capa_cachep); + LASSERTF(rc == 0, "Cannot destory capa_cache\n"); + capa_cachep = NULL; } EXIT; } @@ -215,26 +615,33 @@ void obd_cleanup_caches(void) int obd_init_caches(void) { ENTRY; - LASSERT(obdo_cachep == NULL); - obdo_cachep = kmem_cache_create("ll_obdo_cache", sizeof(struct obdo), - 0, 0, NULL, NULL); - if (!obdo_cachep) + + LASSERT(obd_device_cachep == NULL); + obd_device_cachep = cfs_mem_cache_create("ll_obd_dev_cache", + sizeof(struct obd_device), + 0, 0); + if (!obd_device_cachep) GOTO(out, -ENOMEM); - LASSERT(export_cachep == NULL); - export_cachep = kmem_cache_create("ll_export_cache", - sizeof(struct obd_export), - 0, 0, NULL, NULL); - if (!export_cachep) + LASSERT(obdo_cachep == NULL); + obdo_cachep = cfs_mem_cache_create("ll_obdo_cache", sizeof(struct obdo), + 0, 0); + if (!obdo_cachep) GOTO(out, -ENOMEM); LASSERT(import_cachep == NULL); - import_cachep = kmem_cache_create("ll_import_cache", - sizeof(struct obd_import), - 0, 0, NULL, NULL); + import_cachep = cfs_mem_cache_create("ll_import_cache", + sizeof(struct obd_import), + 0, 0); if (!import_cachep) GOTO(out, -ENOMEM); + LASSERT(capa_cachep == NULL); + capa_cachep = cfs_mem_cache_create("capa_cache", + sizeof(struct obd_capa), 0, 0); + if (!capa_cachep) + GOTO(out, -ENOMEM); + RETURN(0); out: obd_cleanup_caches(); @@ -246,257 +653,877 @@ int obd_init_caches(void) struct obd_export *class_conn2export(struct lustre_handle *conn) { struct obd_export *export; + ENTRY; if (!conn) { CDEBUG(D_CACHE, "looking for null handle\n"); RETURN(NULL); } - if (conn->addr == -1) { /* this means assign a new connection */ + if (conn->cookie == -1) { /* this means assign a new connection */ CDEBUG(D_CACHE, "want a new connection\n"); RETURN(NULL); } - if (!conn->addr) { - CDEBUG(D_CACHE, "looking for null addr\n"); - fixme(); - RETURN(NULL); - } - - CDEBUG(D_IOCTL, "looking for export addr "LPX64" cookie "LPX64"\n", - conn->addr, conn->cookie); - export = (struct obd_export *) (unsigned long)conn->addr; - if (!kmem_cache_validate(export_cachep, (void *)export)) - RETURN(NULL); - - if (export->exp_cookie != conn->cookie) - RETURN(NULL); + CDEBUG(D_INFO, "looking for export cookie "LPX64"\n", conn->cookie); + export = class_handle2object(conn->cookie); RETURN(export); -} /* class_conn2export */ +} + +struct obd_device *class_exp2obd(struct obd_export *exp) +{ + if (exp) + return exp->exp_obd; + return NULL; +} struct obd_device *class_conn2obd(struct lustre_handle *conn) { struct obd_export *export; export = class_conn2export(conn); - if (export) - return export->exp_obd; - fixme(); + if (export) { + struct obd_device *obd = export->exp_obd; + class_export_put(export); + return obd; + } return NULL; } +struct obd_import *class_exp2cliimp(struct obd_export *exp) +{ + struct obd_device *obd = exp->exp_obd; + if (obd == NULL) + return NULL; + return obd->u.cli.cl_import; +} + struct obd_import *class_conn2cliimp(struct lustre_handle *conn) { - return &class_conn2obd(conn)->u.cli.cl_import; + struct obd_device *obd = class_conn2obd(conn); + if (obd == NULL) + return NULL; + return obd->u.cli.cl_import; } -struct obd_import *class_conn2ldlmimp(struct lustre_handle *conn) +/* Export management functions */ +static void class_export_destroy(struct obd_export *exp) { - return &class_conn2export(conn)->exp_ldlm_data.led_import; + struct obd_device *obd = exp->exp_obd; + ENTRY; + + LASSERT (atomic_read(&exp->exp_refcount) == 0); + + CDEBUG(D_IOCTL, "destroying export %p/%s for %s\n", exp, + exp->exp_client_uuid.uuid, obd->obd_name); + + LASSERT(obd != NULL); + + /* "Local" exports (lctl, LOV->{mdc,osc}) have no connection. */ + if (exp->exp_connection) + ptlrpc_put_connection_superhack(exp->exp_connection); + + LASSERT(list_empty(&exp->exp_outstanding_replies)); + LASSERT(list_empty(&exp->exp_uncommitted_replies)); + LASSERT(list_empty(&exp->exp_req_replay_queue)); + LASSERT(list_empty(&exp->exp_queued_rpc)); + obd_destroy_export(exp); + class_decref(obd, "export", exp); + + OBD_FREE_RCU(exp, sizeof(*exp), &exp->exp_handle); + EXIT; } -struct obd_export *class_new_export(struct obd_device *obddev) +static void export_handle_addref(void *export) { - struct obd_export * export; + class_export_get(export); +} - export = kmem_cache_alloc(export_cachep, GFP_KERNEL); - if (!export) { - CERROR("no memory! (minor %d)\n", obddev->obd_minor); - return NULL; +struct obd_export *class_export_get(struct obd_export *exp) +{ + atomic_inc(&exp->exp_refcount); + CDEBUG(D_INFO, "GETting export %p : new refcount %d\n", exp, + atomic_read(&exp->exp_refcount)); + return exp; +} +EXPORT_SYMBOL(class_export_get); + +void class_export_put(struct obd_export *exp) +{ + LASSERT(exp != NULL); + CDEBUG(D_INFO, "PUTting export %p : new refcount %d\n", exp, + atomic_read(&exp->exp_refcount) - 1); + LASSERT(atomic_read(&exp->exp_refcount) > 0); + LASSERT(atomic_read(&exp->exp_refcount) < 0x5a5a5a); + + if (atomic_dec_and_test(&exp->exp_refcount)) { + CDEBUG(D_IOCTL, "final put %p/%s\n", + exp, exp->exp_client_uuid.uuid); + obd_zombie_export_add(exp); + } +} +EXPORT_SYMBOL(class_export_put); + +/* Creates a new export, adds it to the hash table, and returns a + * pointer to it. The refcount is 2: one for the hash reference, and + * one for the pointer returned by this function. */ +struct obd_export *class_new_export(struct obd_device *obd, + struct obd_uuid *cluuid) +{ + struct obd_export *export; + int rc = 0; + + OBD_ALLOC_PTR(export); + if (!export) + return ERR_PTR(-ENOMEM); + + export->exp_conn_cnt = 0; + export->exp_lock_hash = NULL; + atomic_set(&export->exp_refcount, 2); + atomic_set(&export->exp_rpc_count, 0); + export->exp_obd = obd; + CFS_INIT_LIST_HEAD(&export->exp_outstanding_replies); + spin_lock_init(&export->exp_uncommitted_replies_lock); + CFS_INIT_LIST_HEAD(&export->exp_uncommitted_replies); + CFS_INIT_LIST_HEAD(&export->exp_req_replay_queue); + CFS_INIT_LIST_HEAD(&export->exp_handle.h_link); + CFS_INIT_LIST_HEAD(&export->exp_queued_rpc); + class_handle_hash(&export->exp_handle, export_handle_addref); + export->exp_last_request_time = cfs_time_current_sec(); + spin_lock_init(&export->exp_lock); + INIT_HLIST_NODE(&export->exp_uuid_hash); + INIT_HLIST_NODE(&export->exp_nid_hash); + + export->exp_sp_peer = LUSTRE_SP_ANY; + export->exp_flvr.sf_rpc = SPTLRPC_FLVR_INVALID; + export->exp_client_uuid = *cluuid; + obd_init_export(export); + + spin_lock(&obd->obd_dev_lock); + if (!obd_uuid_equals(cluuid, &obd->obd_uuid)) { + rc = lustre_hash_add_unique(obd->obd_uuid_hash, cluuid, + &export->exp_uuid_hash); + if (rc != 0) { + LCONSOLE_WARN("%s: denying duplicate export for %s, %d\n", + obd->obd_name, cluuid->uuid, rc); + spin_unlock(&obd->obd_dev_lock); + class_handle_unhash(&export->exp_handle); + OBD_FREE_PTR(export); + return ERR_PTR(-EALREADY); + } } - memset(export, 0, sizeof(*export)); - get_random_bytes(&export->exp_cookie, sizeof(export->exp_cookie)); - export->exp_obd = obddev; - /* XXX this should be in LDLM init */ - INIT_LIST_HEAD(&export->exp_ldlm_data.led_held_locks); - INIT_LIST_HEAD(&export->exp_conn_chain); - spin_lock(&obddev->obd_dev_lock); + LASSERT(!obd->obd_stopping); /* shouldn't happen, but might race */ + class_incref(obd, "export", export); list_add(&export->exp_obd_chain, &export->exp_obd->obd_exports); - spin_unlock(&obddev->obd_dev_lock); + list_add_tail(&export->exp_obd_chain_timed, + &export->exp_obd->obd_exports_timed); + export->exp_obd->obd_num_exports++; + spin_unlock(&obd->obd_dev_lock); + return export; } +EXPORT_SYMBOL(class_new_export); -void class_destroy_export(struct obd_export *exp) +void class_unlink_export(struct obd_export *exp) { - ENTRY; - - LASSERT(exp->exp_cookie != DEAD_HANDLE_MAGIC); + class_handle_unhash(&exp->exp_handle); spin_lock(&exp->exp_obd->obd_dev_lock); - list_del(&exp->exp_obd_chain); + /* delete an uuid-export hashitem from hashtables */ + if (!hlist_unhashed(&exp->exp_uuid_hash)) + lustre_hash_del(exp->exp_obd->obd_uuid_hash, + &exp->exp_client_uuid, + &exp->exp_uuid_hash); + + list_del_init(&exp->exp_obd_chain); + list_del_init(&exp->exp_obd_chain_timed); + exp->exp_obd->obd_num_exports--; spin_unlock(&exp->exp_obd->obd_dev_lock); - /* XXXshaver no connection here... */ - if (exp->exp_connection) - spin_lock(&exp->exp_connection->c_lock); - list_del(&exp->exp_conn_chain); - if (exp->exp_connection) { - spin_unlock(&exp->exp_connection->c_lock); - ptlrpc_put_connection_superhack(exp->exp_connection); - } + /* Keep these counter valid always */ + spin_lock_bh(&exp->exp_obd->obd_processing_task_lock); + if (exp->exp_delayed) + exp->exp_obd->obd_delayed_clients--; + else if (exp->exp_in_recovery) + exp->exp_obd->obd_recoverable_clients--; + else if (exp->exp_obd->obd_recovering) + exp->exp_obd->obd_max_recoverable_clients--; + spin_unlock_bh(&exp->exp_obd->obd_processing_task_lock); + class_export_put(exp); +} +EXPORT_SYMBOL(class_unlink_export); + +/* Import management functions */ +void class_import_destroy(struct obd_import *imp) +{ + ENTRY; + + CDEBUG(D_IOCTL, "destroying import %p for %s\n", imp, + imp->imp_obd->obd_name); + + LASSERT(atomic_read(&imp->imp_refcount) == 0); - exp->exp_cookie = DEAD_HANDLE_MAGIC; - kmem_cache_free(export_cachep, exp); + ptlrpc_put_connection_superhack(imp->imp_connection); + while (!list_empty(&imp->imp_conn_list)) { + struct obd_import_conn *imp_conn; + + imp_conn = list_entry(imp->imp_conn_list.next, + struct obd_import_conn, oic_item); + list_del_init(&imp_conn->oic_item); + ptlrpc_put_connection_superhack(imp_conn->oic_conn); + OBD_FREE(imp_conn, sizeof(*imp_conn)); + } + + LASSERT(imp->imp_sec == NULL); + class_decref(imp->imp_obd, "import", imp); + OBD_FREE_RCU(imp, sizeof(*imp), &imp->imp_handle); EXIT; } -/* a connection defines an export context in which preallocation can - be managed. */ -int class_connect(struct lustre_handle *conn, struct obd_device *obd, - obd_uuid_t cluuid) +static void import_handle_addref(void *import) { - struct obd_export * export; - if (conn == NULL) { - LBUG(); - return -EINVAL; + class_import_get(import); +} + +struct obd_import *class_import_get(struct obd_import *import) +{ + LASSERT(atomic_read(&import->imp_refcount) >= 0); + LASSERT(atomic_read(&import->imp_refcount) < 0x5a5a5a); + atomic_inc(&import->imp_refcount); + CDEBUG(D_INFO, "import %p refcount=%d obd=%s\n", import, + atomic_read(&import->imp_refcount), + import->imp_obd->obd_name); + return import; +} +EXPORT_SYMBOL(class_import_get); + +void class_import_put(struct obd_import *imp) +{ + ENTRY; + + LASSERT(atomic_read(&imp->imp_refcount) > 0); + LASSERT(atomic_read(&imp->imp_refcount) < 0x5a5a5a); + LASSERT(list_empty(&imp->imp_zombie_chain)); + + CDEBUG(D_INFO, "import %p refcount=%d obd=%s\n", imp, + atomic_read(&imp->imp_refcount) - 1, + imp->imp_obd->obd_name); + + if (atomic_dec_and_test(&imp->imp_refcount)) { + CDEBUG(D_INFO, "final put import %p\n", imp); + obd_zombie_import_add(imp); } - if (obd == NULL) { - LBUG(); - return -EINVAL; + EXIT; +} +EXPORT_SYMBOL(class_import_put); + +static void init_imp_at(struct imp_at *at) { + int i; + at_init(&at->iat_net_latency, 0, 0); + for (i = 0; i < IMP_AT_MAX_PORTALS; i++) { + /* max service estimates are tracked on the server side, so + don't use the AT history here, just use the last reported + val. (But keep hist for proc histogram, worst_ever) */ + at_init(&at->iat_service_estimate[i], INITIAL_CONNECT_TIMEOUT, + AT_FLG_NOHIST); } +} - export = class_new_export(obd); - if (!export) - return -ENOMEM; +struct obd_import *class_new_import(struct obd_device *obd) +{ + struct obd_import *imp; - conn->addr = (__u64) (unsigned long)export; - conn->cookie = export->exp_cookie; + OBD_ALLOC(imp, sizeof(*imp)); + if (imp == NULL) + return NULL; - CDEBUG(D_IOCTL, "connect: addr %Lx cookie %Lx\n", - (long long)conn->addr, (long long)conn->cookie); - return 0; + CFS_INIT_LIST_HEAD(&imp->imp_zombie_chain); + CFS_INIT_LIST_HEAD(&imp->imp_replay_list); + CFS_INIT_LIST_HEAD(&imp->imp_sending_list); + CFS_INIT_LIST_HEAD(&imp->imp_delayed_list); + spin_lock_init(&imp->imp_lock); + imp->imp_last_success_conn = 0; + imp->imp_state = LUSTRE_IMP_NEW; + imp->imp_obd = class_incref(obd, "import", imp); + sema_init(&imp->imp_sec_mutex, 1); + cfs_waitq_init(&imp->imp_recovery_waitq); + + atomic_set(&imp->imp_refcount, 2); + atomic_set(&imp->imp_unregistering, 0); + atomic_set(&imp->imp_inflight, 0); + atomic_set(&imp->imp_replay_inflight, 0); + atomic_set(&imp->imp_inval_count, 0); + CFS_INIT_LIST_HEAD(&imp->imp_conn_list); + CFS_INIT_LIST_HEAD(&imp->imp_handle.h_link); + class_handle_hash(&imp->imp_handle, import_handle_addref); + init_imp_at(&imp->imp_at); + + /* the default magic is V2, will be used in connect RPC, and + * then adjusted according to the flags in request/reply. */ + imp->imp_msg_magic = LUSTRE_MSG_MAGIC_V2; + + return imp; } +EXPORT_SYMBOL(class_new_import); -int class_disconnect(struct lustre_handle *conn) +void class_destroy_import(struct obd_import *import) +{ + LASSERT(import != NULL); + LASSERT(import != LP_POISON); + + class_handle_unhash(&import->imp_handle); + + spin_lock(&import->imp_lock); + import->imp_generation++; + spin_unlock(&import->imp_lock); + class_import_put(import); +} +EXPORT_SYMBOL(class_destroy_import); + +/* A connection defines an export context in which preallocation can + be managed. This releases the export pointer reference, and returns + the export handle, so the export refcount is 1 when this function + returns. */ +int class_connect(struct lustre_handle *conn, struct obd_device *obd, + struct obd_uuid *cluuid) { struct obd_export *export; + LASSERT(conn != NULL); + LASSERT(obd != NULL); + LASSERT(cluuid != NULL); ENTRY; - if (!(export = class_conn2export(conn))) { + export = class_new_export(obd, cluuid); + if (IS_ERR(export)) + RETURN(PTR_ERR(export)); + + conn->cookie = export->exp_handle.h_cookie; + class_export_put(export); + + CDEBUG(D_IOCTL, "connect: client %s, cookie "LPX64"\n", + cluuid->uuid, conn->cookie); + RETURN(0); +} +EXPORT_SYMBOL(class_connect); + +/* if export is involved in recovery then clean up related things */ +void class_export_recovery_cleanup(struct obd_export *exp) +{ + struct obd_device *obd = exp->exp_obd; + + spin_lock_bh(&obd->obd_processing_task_lock); + if (obd->obd_recovering && exp->exp_in_recovery) { + spin_lock(&exp->exp_lock); + exp->exp_in_recovery = 0; + spin_unlock(&exp->exp_lock); + obd->obd_connected_clients--; + /* each connected client is counted as recoverable */ + obd->obd_recoverable_clients--; + if (exp->exp_req_replay_needed) { + spin_lock(&exp->exp_lock); + exp->exp_req_replay_needed = 0; + spin_unlock(&exp->exp_lock); + LASSERT(atomic_read(&obd->obd_req_replay_clients)); + atomic_dec(&obd->obd_req_replay_clients); + } + if (exp->exp_lock_replay_needed) { + spin_lock(&exp->exp_lock); + exp->exp_lock_replay_needed = 0; + spin_unlock(&exp->exp_lock); + LASSERT(atomic_read(&obd->obd_lock_replay_clients)); + atomic_dec(&obd->obd_lock_replay_clients); + } + } + spin_unlock_bh(&obd->obd_processing_task_lock); +} + +/* This function removes 1-3 references from the export: + * 1 - for export pointer passed + * and if disconnect really need + * 2 - removing from hash + * 3 - in client_unlink_export + * The export pointer passed to this function can destroyed */ +int class_disconnect(struct obd_export *export) +{ + int already_disconnected; + ENTRY; + + if (export == NULL) { fixme(); - CDEBUG(D_IOCTL, "disconnect: attempting to free " - "nonexistent client "LPX64"\n", conn->addr); + CDEBUG(D_IOCTL, "attempting to free NULL export %p\n", export); RETURN(-EINVAL); } - CDEBUG(D_IOCTL, "disconnect: addr %Lx cookie %Lx\n", - (long long)conn->addr, (long long)conn->cookie); + spin_lock(&export->exp_lock); + already_disconnected = export->exp_disconnected; + export->exp_disconnected = 1; + spin_unlock(&export->exp_lock); + + /* class_cleanup(), abort_recovery(), and class_fail_export() + * all end up in here, and if any of them race we shouldn't + * call extra class_export_puts(). */ + if (already_disconnected) { + LASSERT(hlist_unhashed(&export->exp_nid_hash)); + GOTO(no_disconn, already_disconnected); + } - class_destroy_export(export); + CDEBUG(D_IOCTL, "disconnect: cookie "LPX64"\n", + export->exp_handle.h_cookie); + if (!hlist_unhashed(&export->exp_nid_hash)) + lustre_hash_del(export->exp_obd->obd_nid_hash, + &export->exp_connection->c_peer.nid, + &export->exp_nid_hash); + + class_export_recovery_cleanup(export); + class_unlink_export(export); +no_disconn: + class_export_put(export); RETURN(0); } -void class_disconnect_all(struct obd_device *obddev) +static void class_disconnect_export_list(struct list_head *list, + enum obd_option flags) { - int again = 1; - - while (again) { - spin_lock(&obddev->obd_dev_lock); - if (!list_empty(&obddev->obd_exports)) { - struct obd_export *export; - struct lustre_handle conn; - int rc; + int rc; + struct obd_export *exp; + ENTRY; - export = list_entry(obddev->obd_exports.next, - struct obd_export, - exp_obd_chain); - conn.addr = (__u64)(unsigned long)export; - conn.cookie = export->exp_cookie; - spin_unlock(&obddev->obd_dev_lock); - CERROR("force disconnecting %s:%s export %p\n", - export->exp_obd->obd_type->typ_name, - export->exp_connection->c_remote_uuid, export); - rc = obd_disconnect(&conn); - if (rc < 0) { - /* AED: not so sure about this... We can't - * loop here forever, yet we shouldn't leak - * exports on a struct we will soon destroy. - */ - CERROR("destroy export %p with err: rc = %d\n", - export, rc); - class_destroy_export(export); - } - } else { - spin_unlock(&obddev->obd_dev_lock); - again = 0; + /* It's possible that an export may disconnect itself, but + * nothing else will be added to this list. */ + while (!list_empty(list)) { + exp = list_entry(list->next, struct obd_export, exp_obd_chain); + /* need for safe call CDEBUG after obd_disconnect */ + class_export_get(exp); + + spin_lock(&exp->exp_lock); + exp->exp_flags = flags; + spin_unlock(&exp->exp_lock); + + if (obd_uuid_equals(&exp->exp_client_uuid, + &exp->exp_obd->obd_uuid)) { + CDEBUG(D_HA, + "exp %p export uuid == obd uuid, don't discon\n", + exp); + /* Need to delete this now so we don't end up pointing + * to work_list later when this export is cleaned up. */ + list_del_init(&exp->exp_obd_chain); + class_export_put(exp); + continue; } + + class_export_get(exp); + CDEBUG(D_HA, "%s: disconnecting export at %s (%p), " + "last request at "CFS_TIME_T"\n", + exp->exp_obd->obd_name, obd_export_nid2str(exp), + exp, exp->exp_last_request_time); + /* release one export reference anyway */ + rc = obd_disconnect(exp); + + CDEBUG(D_HA, "disconnected export at %s (%p): rc %d\n", + obd_export_nid2str(exp), exp, rc); + class_export_put(exp); } + EXIT; } -#if 0 +void class_disconnect_exports(struct obd_device *obd) +{ + struct list_head work_list; + ENTRY; -/* FIXME: Data is a space- or comma-separated list of device IDs. This will - * have to change. */ -int class_multi_setup(struct obd_device *obddev, uint32_t len, void *data) + /* Move all of the exports from obd_exports to a work list, en masse. */ + CFS_INIT_LIST_HEAD(&work_list); + spin_lock(&obd->obd_dev_lock); + list_splice_init(&obd->obd_exports, &work_list); + list_splice_init(&obd->obd_delayed_exports, &work_list); + spin_unlock(&obd->obd_dev_lock); + + if (!list_empty(&work_list)) { + CDEBUG(D_HA, "OBD device %d (%p) has exports, " + "disconnecting them\n", obd->obd_minor, obd); + class_disconnect_export_list(&work_list, + exp_flags_from_obd(obd)); + } else + CDEBUG(D_HA, "OBD device %d (%p) has no exports\n", + obd->obd_minor, obd); + EXIT; +} +EXPORT_SYMBOL(class_disconnect_exports); + +/* Remove exports that have not completed recovery. + */ +void class_disconnect_stale_exports(struct obd_device *obd, + int (*test_export)(struct obd_export *), + enum obd_option flags) { - int count, rc; - char *p; + struct list_head work_list; + struct list_head *pos, *n; + struct obd_export *exp; ENTRY; - for (p = data, count = 0; p < (char *)data + len; count++) { - char *end; - int tmp = simple_strtoul(p, &end, 0); + CFS_INIT_LIST_HEAD(&work_list); + spin_lock(&obd->obd_dev_lock); + obd->obd_stale_clients = 0; + list_for_each_safe(pos, n, &obd->obd_exports) { + exp = list_entry(pos, struct obd_export, exp_obd_chain); + if (test_export(exp)) + continue; + + list_move(&exp->exp_obd_chain, &work_list); + /* don't count self-export as client */ + if (obd_uuid_equals(&exp->exp_client_uuid, + &exp->exp_obd->obd_uuid)) + continue; + + obd->obd_stale_clients++; + CDEBUG(D_ERROR, "%s: disconnect stale client %s@%s\n", + obd->obd_name, exp->exp_client_uuid.uuid, + exp->exp_connection == NULL ? "" : + libcfs_nid2str(exp->exp_connection->c_peer.nid)); + } + spin_unlock(&obd->obd_dev_lock); + + CDEBUG(D_HA, "%s: disconnecting %d stale clients\n", obd->obd_name, + obd->obd_stale_clients); - if (p == end) { - CERROR("invalid device ID starting at: %s\n", p); - GOTO(err_disconnect, rc = -EINVAL); - } + class_disconnect_export_list(&work_list, flags); + EXIT; +} +EXPORT_SYMBOL(class_disconnect_stale_exports); + +void class_fail_export(struct obd_export *exp) +{ + int rc, already_failed; + + spin_lock(&exp->exp_lock); + already_failed = exp->exp_failed; + exp->exp_failed = 1; + spin_unlock(&exp->exp_lock); + + if (already_failed) { + CDEBUG(D_HA, "disconnecting dead export %p/%s; skipping\n", + exp, exp->exp_client_uuid.uuid); + return; + } + + CDEBUG(D_HA, "disconnecting export %p/%s\n", + exp, exp->exp_client_uuid.uuid); + + if (obd_dump_on_timeout) + libcfs_debug_dumplog(); + + /* Most callers into obd_disconnect are removing their own reference + * (request, for example) in addition to the one from the hash table. + * We don't have such a reference here, so make one. */ + class_export_get(exp); + rc = obd_disconnect(exp); + if (rc) + CERROR("disconnecting export %p failed: %d\n", exp, rc); + else + CDEBUG(D_HA, "disconnected export %p/%s\n", + exp, exp->exp_client_uuid.uuid); +} +EXPORT_SYMBOL(class_fail_export); + +char *obd_export_nid2str(struct obd_export *exp) +{ + if (exp->exp_connection != NULL) + return libcfs_nid2str(exp->exp_connection->c_peer.nid); + + return "(no nid)"; +} +EXPORT_SYMBOL(obd_export_nid2str); - if (tmp < 0 || tmp >= MAX_OBD_DEVICES) { - CERROR("Trying to sub dev %d - dev no too large\n", - tmp); - GOTO(err_disconnect, rc = -EINVAL); +int obd_export_evict_by_nid(struct obd_device *obd, const char *nid) +{ + struct obd_export *doomed_exp = NULL; + int exports_evicted = 0; + + lnet_nid_t nid_key = libcfs_str2nid((char *)nid); + + do { + doomed_exp = lustre_hash_lookup(obd->obd_nid_hash, &nid_key); + if (doomed_exp == NULL) + break; + + LASSERTF(doomed_exp->exp_connection->c_peer.nid == nid_key, + "nid %s found, wanted nid %s, requested nid %s\n", + obd_export_nid2str(doomed_exp), + libcfs_nid2str(nid_key), nid); + LASSERTF(doomed_exp != obd->obd_self_export, + "self-export is hashed by NID?\n"); + exports_evicted++; + CWARN("%s: evict NID '%s' (%s) #%d at adminstrative request\n", + obd->obd_name, nid, doomed_exp->exp_client_uuid.uuid, + exports_evicted); + class_fail_export(doomed_exp); + class_export_put(doomed_exp); + } while (1); + + if (!exports_evicted) + CDEBUG(D_HA,"%s: can't disconnect NID '%s': no exports found\n", + obd->obd_name, nid); + return exports_evicted; +} +EXPORT_SYMBOL(obd_export_evict_by_nid); + +int obd_export_evict_by_uuid(struct obd_device *obd, const char *uuid) +{ + struct obd_export *doomed_exp = NULL; + struct obd_uuid doomed_uuid; + int exports_evicted = 0; + + obd_str2uuid(&doomed_uuid, uuid); + if (obd_uuid_equals(&doomed_uuid, &obd->obd_uuid)) { + CERROR("%s: can't evict myself\n", obd->obd_name); + return exports_evicted; + } + + doomed_exp = lustre_hash_lookup(obd->obd_uuid_hash, &doomed_uuid); + + if (doomed_exp == NULL) { + CERROR("%s: can't disconnect %s: no exports found\n", + obd->obd_name, uuid); + } else { + CWARN("%s: evicting %s at adminstrative request\n", + obd->obd_name, doomed_exp->exp_client_uuid.uuid); + class_fail_export(doomed_exp); + class_export_put(doomed_exp); + exports_evicted++; + } + + return exports_evicted; +} +EXPORT_SYMBOL(obd_export_evict_by_uuid); + +/** + * kill zombie imports and exports + */ +void obd_zombie_impexp_cull(void) +{ + struct obd_import *import; + struct obd_export *export; + ENTRY; + + do { + spin_lock(&obd_zombie_impexp_lock); + + import = NULL; + if (!list_empty(&obd_zombie_imports)) { + import = list_entry(obd_zombie_imports.next, + struct obd_import, + imp_zombie_chain); + list_del_init(&import->imp_zombie_chain); } - rc = obd_connect(&obddev->obd_multi_conn[count], &obd_dev[tmp]); - if (rc) { - CERROR("cannot connect to device %d: rc = %d\n", tmp, - rc); - GOTO(err_disconnect, rc); + export = NULL; + if (!list_empty(&obd_zombie_exports)) { + export = list_entry(obd_zombie_exports.next, + struct obd_export, + exp_obd_chain); + list_del_init(&export->exp_obd_chain); } - CDEBUG(D_INFO, "target OBD %d is of type %s\n", count, - obd_dev[tmp].obd_type->typ_name); + spin_unlock(&obd_zombie_impexp_lock); - p = end + 1; - } + if (import != NULL) + class_import_destroy(import); - obddev->obd_multi_count = count; + if (export != NULL) + class_export_destroy(export); - RETURN(0); + } while (import != NULL || export != NULL); + EXIT; +} + +static struct completion obd_zombie_start; +static struct completion obd_zombie_stop; +static unsigned long obd_zombie_flags; +static cfs_waitq_t obd_zombie_waitq; + +enum { + OBD_ZOMBIE_STOP = 1 << 1 +}; - err_disconnect: - for (count--; count >= 0; count--) - obd_disconnect(&obddev->obd_multi_conn[count]); +/** + * check for work for kill zombie import/export thread. + */ +static int obd_zombie_impexp_check(void *arg) +{ + int rc; + + spin_lock(&obd_zombie_impexp_lock); + rc = list_empty(&obd_zombie_imports) && + list_empty(&obd_zombie_exports) && + !test_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags); + + spin_unlock(&obd_zombie_impexp_lock); + + RETURN(rc); +} + +/** + * Add export to the obd_zombe thread and notify it. + */ +static void obd_zombie_export_add(struct obd_export *exp) { + spin_lock(&obd_zombie_impexp_lock); + LASSERT(list_empty(&exp->exp_obd_chain)); + list_add(&exp->exp_obd_chain, &obd_zombie_exports); + spin_unlock(&obd_zombie_impexp_lock); + + if (obd_zombie_impexp_notify != NULL) + obd_zombie_impexp_notify(); +} + +/** + * Add import to the obd_zombe thread and notify it. + */ +static void obd_zombie_import_add(struct obd_import *imp) { + LASSERT(imp->imp_sec == NULL); + spin_lock(&obd_zombie_impexp_lock); + LASSERT(list_empty(&imp->imp_zombie_chain)); + list_add(&imp->imp_zombie_chain, &obd_zombie_imports); + spin_unlock(&obd_zombie_impexp_lock); + + if (obd_zombie_impexp_notify != NULL) + obd_zombie_impexp_notify(); +} + +/** + * notify import/export destroy thread about new zombie. + */ +static void obd_zombie_impexp_notify(void) +{ + cfs_waitq_signal(&obd_zombie_waitq); +} + +/** + * check whether obd_zombie is idle + */ +static int obd_zombie_is_idle(void) +{ + int rc; + + LASSERT(!test_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags)); + spin_lock(&obd_zombie_impexp_lock); + rc = list_empty(&obd_zombie_imports) && + list_empty(&obd_zombie_exports); + spin_unlock(&obd_zombie_impexp_lock); return rc; } -/* - * remove all connections to this device - * close all connections to lower devices - * needed for forced unloads of OBD client drivers +/** + * wait when obd_zombie import/export queues become empty */ -int class_multi_cleanup(struct obd_device *obddev) +void obd_zombie_barrier(void) { - int i; + struct l_wait_info lwi = { 0 }; + l_wait_event(obd_zombie_waitq, obd_zombie_is_idle(), &lwi); +} +EXPORT_SYMBOL(obd_zombie_barrier); - for (i = 0; i < obddev->obd_multi_count; i++) { - int rc; - struct obd_device *obd = - class_conn2obd(&obddev->obd_multi_conn[i]); +#ifdef __KERNEL__ - if (!obd) { - CERROR("no such device [i %d]\n", i); - RETURN(-EINVAL); - } +/** + * destroy zombie export/import thread. + */ +static int obd_zombie_impexp_thread(void *unused) +{ + int rc; + + if ((rc = cfs_daemonize_ctxt("obd_zombid"))) { + complete(&obd_zombie_start); + RETURN(rc); + } + + complete(&obd_zombie_start); + + while(!test_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags)) { + struct l_wait_info lwi = { 0 }; - rc = obd_disconnect(&obddev->obd_multi_conn[i]); - if (rc) - CERROR("disconnect failure %d\n", obd->obd_minor); + l_wait_event(obd_zombie_waitq, + !obd_zombie_impexp_check(NULL), &lwi); + obd_zombie_impexp_cull(); + + /* + * Notify obd_zombie_barrier callers that queues + * may be empty. + */ + cfs_waitq_signal(&obd_zombie_waitq); } - return 0; + + complete(&obd_zombie_stop); + + RETURN(0); } + +#else /* ! KERNEL */ + +static atomic_t zombie_recur = ATOMIC_INIT(0); +static void *obd_zombie_impexp_work_cb; +static void *obd_zombie_impexp_idle_cb; + +int obd_zombie_impexp_kill(void *arg) +{ + int rc = 0; + + if (atomic_inc_return(&zombie_recur) == 1) { + obd_zombie_impexp_cull(); + rc = 1; + } + atomic_dec(&zombie_recur); + return rc; +} + +#endif + +/** + * start destroy zombie import/export thread + */ +int obd_zombie_impexp_init(void) +{ + int rc; + + CFS_INIT_LIST_HEAD(&obd_zombie_imports); + CFS_INIT_LIST_HEAD(&obd_zombie_exports); + spin_lock_init(&obd_zombie_impexp_lock); + init_completion(&obd_zombie_start); + init_completion(&obd_zombie_stop); + cfs_waitq_init(&obd_zombie_waitq); + +#ifdef __KERNEL__ + rc = cfs_kernel_thread(obd_zombie_impexp_thread, NULL, 0); + if (rc < 0) + RETURN(rc); + + wait_for_completion(&obd_zombie_start); +#else + + obd_zombie_impexp_work_cb = + liblustre_register_wait_callback("obd_zombi_impexp_kill", + &obd_zombie_impexp_kill, NULL); + + obd_zombie_impexp_idle_cb = + liblustre_register_idle_callback("obd_zombi_impexp_check", + &obd_zombie_impexp_check, NULL); + rc = 0; +#endif + RETURN(rc); +} +/** + * stop destroy zombie import/export thread + */ +void obd_zombie_impexp_stop(void) +{ + set_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags); + obd_zombie_impexp_notify(); +#ifdef __KERNEL__ + wait_for_completion(&obd_zombie_stop); +#else + liblustre_deregister_wait_callback(obd_zombie_impexp_work_cb); + liblustre_deregister_idle_callback(obd_zombie_impexp_idle_cb); #endif +}