X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fobdclass%2Fgenops.c;h=4fb3ecc25c4136ce9426dc1fdce896878ff64692;hp=74d5f5ff8551b611c92818b783244afa131443e4;hb=892078e3b566c04471e7dcf2c28e66f2f3584f93;hpb=65701b4a30efdb695776bcf690a2b3cabc928da1 diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index 74d5f5f..4fb3ecc 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -1,6 +1,4 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * +/* * GPL HEADER START * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -29,7 +27,7 @@ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2011, 2012, Whamcloud, Inc. + * Copyright (c) 2011, 2014, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -42,31 +40,36 @@ */ #define DEBUG_SUBSYSTEM S_CLASS -#ifndef __KERNEL__ -#include -#endif -#include + +#include #include #include +#include +#include -extern cfs_list_t obd_types; -cfs_spinlock_t obd_types_lock; +spinlock_t obd_types_lock; -cfs_mem_cache_t *obd_device_cachep; -cfs_mem_cache_t *obdo_cachep; +static struct kmem_cache *obd_device_cachep; +struct kmem_cache *obdo_cachep; EXPORT_SYMBOL(obdo_cachep); -cfs_mem_cache_t *import_cachep; +static struct kmem_cache *import_cachep; + +static struct list_head obd_zombie_imports; +static struct list_head obd_zombie_exports; +static spinlock_t obd_zombie_impexp_lock; -cfs_list_t obd_zombie_imports; -cfs_list_t obd_zombie_exports; -cfs_spinlock_t obd_zombie_impexp_lock; static void obd_zombie_impexp_notify(void); static void obd_zombie_export_add(struct obd_export *exp); static void obd_zombie_import_add(struct obd_import *imp); static void print_export_data(struct obd_export *exp, const char *status, int locks); +struct list_head obd_stale_exports; +spinlock_t obd_stale_export_lock; +atomic_t obd_stale_export_num; + int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c); +EXPORT_SYMBOL(ptlrpc_put_connection_superhack); /* * support functions: we could use inter-module communication, but this @@ -74,13 +77,13 @@ int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c); */ static struct obd_device *obd_device_alloc(void) { - struct obd_device *obd; + struct obd_device *obd; - OBD_SLAB_ALLOC_PTR_GFP(obd, obd_device_cachep, CFS_ALLOC_IO); - if (obd != NULL) { - obd->obd_magic = OBD_DEVICE_MAGIC; - } - return obd; + OBD_SLAB_ALLOC_PTR_GFP(obd, obd_device_cachep, GFP_NOFS); + if (obd != NULL) { + obd->obd_magic = OBD_DEVICE_MAGIC; + } + return obd; } static void obd_device_free(struct obd_device *obd) @@ -99,20 +102,21 @@ static void obd_device_free(struct obd_device *obd) struct obd_type *class_search_type(const char *name) { - cfs_list_t *tmp; - struct obd_type *type; + struct list_head *tmp; + struct obd_type *type; - cfs_spin_lock(&obd_types_lock); - cfs_list_for_each(tmp, &obd_types) { - type = cfs_list_entry(tmp, struct obd_type, typ_chain); - if (strcmp(type->typ_name, name) == 0) { - cfs_spin_unlock(&obd_types_lock); - return type; - } - } - cfs_spin_unlock(&obd_types_lock); - return NULL; + spin_lock(&obd_types_lock); + list_for_each(tmp, &obd_types) { + type = list_entry(tmp, struct obd_type, typ_chain); + if (strcmp(type->typ_name, name) == 0) { + spin_unlock(&obd_types_lock); + return type; + } + } + spin_unlock(&obd_types_lock); + return NULL; } +EXPORT_SYMBOL(class_search_type); struct obd_type *class_get_type(const char *name) { @@ -121,9 +125,19 @@ struct obd_type *class_get_type(const char *name) #ifdef HAVE_MODULE_LOADING_SUPPORT if (!type) { const char *modname = name; - if (!cfs_request_module("%s", modname)) { - CDEBUG(D_INFO, "Loaded module '%s'\n", modname); - type = class_search_type(name); + + if (strcmp(modname, "obdfilter") == 0) + modname = "ofd"; + + if (strcmp(modname, LUSTRE_LWP_NAME) == 0) + modname = LUSTRE_OSP_NAME; + + if (!strncmp(modname, LUSTRE_MDS_NAME, strlen(LUSTRE_MDS_NAME))) + modname = LUSTRE_MDT_NAME; + + if (!request_module("%s", modname)) { + CDEBUG(D_INFO, "Loaded module '%s'\n", modname); + type = class_search_type(name); } else { LCONSOLE_ERROR_MSG(0x158, "Can't load module '%s'\n", modname); @@ -131,30 +145,28 @@ struct obd_type *class_get_type(const char *name) } #endif if (type) { - cfs_spin_lock(&type->obd_type_lock); - type->typ_refcnt++; - cfs_try_module_get(type->typ_dt_ops->o_owner); - cfs_spin_unlock(&type->obd_type_lock); - } - return type; + spin_lock(&type->obd_type_lock); + type->typ_refcnt++; + try_module_get(type->typ_dt_ops->o_owner); + spin_unlock(&type->obd_type_lock); + } + return type; } -EXPORT_SYMBOL(class_get_type); void class_put_type(struct obd_type *type) { - LASSERT(type); - cfs_spin_lock(&type->obd_type_lock); - type->typ_refcnt--; - cfs_module_put(type->typ_dt_ops->o_owner); - cfs_spin_unlock(&type->obd_type_lock); + LASSERT(type); + spin_lock(&type->obd_type_lock); + type->typ_refcnt--; + module_put(type->typ_dt_ops->o_owner); + spin_unlock(&type->obd_type_lock); } -EXPORT_SYMBOL(class_put_type); #define CLASS_MAX_NAME 1024 int class_register_type(struct obd_ops *dt_ops, struct md_ops *md_ops, - struct lprocfs_vars *vars, const char *name, - struct lu_device_type *ldt) + bool enable_proc, struct lprocfs_vars *vars, + const char *name, struct lu_device_type *ldt) { struct obd_type *type; int rc = 0; @@ -187,16 +199,19 @@ int class_register_type(struct obd_ops *dt_ops, struct md_ops *md_ops, if (md_ops) *(type->typ_md_ops) = *md_ops; strcpy(type->typ_name, name); - cfs_spin_lock_init(&type->obd_type_lock); - -#ifdef LPROCFS - type->typ_procroot = lprocfs_register(type->typ_name, proc_lustre_root, - vars, type); - if (IS_ERR(type->typ_procroot)) { - rc = PTR_ERR(type->typ_procroot); - type->typ_procroot = NULL; - GOTO (failed, rc); - } + spin_lock_init(&type->obd_type_lock); + +#ifdef CONFIG_PROC_FS + if (enable_proc) { + type->typ_procroot = lprocfs_register(type->typ_name, + proc_lustre_root, + vars, type); + if (IS_ERR(type->typ_procroot)) { + rc = PTR_ERR(type->typ_procroot); + type->typ_procroot = NULL; + GOTO(failed, rc); + } + } #endif if (ldt != NULL) { type->typ_lu = ldt; @@ -205,15 +220,20 @@ int class_register_type(struct obd_ops *dt_ops, struct md_ops *md_ops, GOTO (failed, rc); } - cfs_spin_lock(&obd_types_lock); - cfs_list_add(&type->typ_chain, &obd_types); - cfs_spin_unlock(&obd_types_lock); + spin_lock(&obd_types_lock); + list_add(&type->typ_chain, &obd_types); + spin_unlock(&obd_types_lock); RETURN (0); - failed: - if (type->typ_name != NULL) +failed: + if (type->typ_name != NULL) { +#ifdef CONFIG_PROC_FS + if (type->typ_procroot != NULL) + remove_proc_subtree(type->typ_name, proc_lustre_root); +#endif OBD_FREE(type->typ_name, strlen(name) + 1); + } if (type->typ_md_ops != NULL) OBD_FREE_PTR(type->typ_md_ops); if (type->typ_dt_ops != NULL) @@ -242,16 +262,22 @@ int class_unregister_type(const char *name) RETURN(-EBUSY); } - if (type->typ_procroot) { - lprocfs_remove(&type->typ_procroot); - } - + /* we do not use type->typ_procroot as for compatibility purposes + * other modules can share names (i.e. lod can use lov entry). so + * we can't reference pointer as it can get invalided when another + * module removes the entry */ +#ifdef CONFIG_PROC_FS + if (type->typ_procroot != NULL) + remove_proc_subtree(type->typ_name, proc_lustre_root); + if (type->typ_procsym != NULL) + lprocfs_remove(&type->typ_procsym); +#endif if (type->typ_lu) lu_device_type_fini(type->typ_lu); - cfs_spin_lock(&obd_types_lock); - cfs_list_del(&type->typ_chain); - cfs_spin_unlock(&obd_types_lock); + spin_lock(&obd_types_lock); + list_del(&type->typ_chain); + spin_unlock(&obd_types_lock); OBD_FREE(type->typ_name, strlen(name) + 1); if (type->typ_dt_ops != NULL) OBD_FREE_PTR(type->typ_dt_ops); @@ -294,18 +320,16 @@ struct obd_device *class_newdev(const char *type_name, const char *name) } newdev = obd_device_alloc(); - if (newdev == NULL) { - class_put_type(type); - RETURN(ERR_PTR(-ENOMEM)); - } + if (newdev == NULL) + GOTO(out_type, result = ERR_PTR(-ENOMEM)); + LASSERT(newdev->obd_magic == OBD_DEVICE_MAGIC); - cfs_write_lock(&obd_dev_lock); + write_lock(&obd_dev_lock); for (i = 0; i < class_devno_max(); i++) { struct obd_device *obd = class_num2obd(i); - if (obd && obd->obd_name && - (strcmp(name, obd->obd_name) == 0)) { + if (obd && (strcmp(name, obd->obd_name) == 0)) { CERROR("Device %s already exists at %d, won't add\n", name, i); if (result) { @@ -332,22 +356,26 @@ struct obd_device *class_newdev(const char *type_name, const char *name) obd_devs[i] = result; } } - cfs_write_unlock(&obd_dev_lock); + write_unlock(&obd_dev_lock); if (result == NULL && i >= class_devno_max()) { CERROR("all %u OBD devices used, increase MAX_OBD_DEVICES\n", class_devno_max()); - RETURN(ERR_PTR(-EOVERFLOW)); + GOTO(out, result = ERR_PTR(-EOVERFLOW)); } - if (IS_ERR(result)) { - obd_device_free(newdev); - class_put_type(type); - } else { - CDEBUG(D_IOCTL, "Adding new device %s (%p)\n", - result->obd_name, result); - } - RETURN(result); + if (IS_ERR(result)) + GOTO(out, result); + + CDEBUG(D_IOCTL, "Adding new device %s (%p)\n", + result->obd_name, result); + + RETURN(result); +out: + obd_device_free(newdev); +out_type: + class_put_type(type); + return result; } void class_release_dev(struct obd_device *obd) @@ -363,9 +391,9 @@ void class_release_dev(struct obd_device *obd) CDEBUG(D_INFO, "Release obd device %s at %d obd_type name =%s\n", obd->obd_name, obd->obd_minor, obd->obd_type->typ_name); - cfs_write_lock(&obd_dev_lock); + write_lock(&obd_dev_lock); obd_devs[obd->obd_minor] = NULL; - cfs_write_unlock(&obd_dev_lock); + write_unlock(&obd_dev_lock); obd_device_free(obd); class_put_type(obd_type); @@ -378,26 +406,25 @@ int class_name2dev(const char *name) if (!name) return -1; - cfs_read_lock(&obd_dev_lock); + read_lock(&obd_dev_lock); for (i = 0; i < class_devno_max(); i++) { struct obd_device *obd = class_num2obd(i); - if (obd && obd->obd_name && strcmp(name, obd->obd_name) == 0) { + if (obd && strcmp(name, obd->obd_name) == 0) { /* Make sure we finished attaching before we give out any references */ LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC); if (obd->obd_attached) { - cfs_read_unlock(&obd_dev_lock); + read_unlock(&obd_dev_lock); return i; } break; } } - cfs_read_unlock(&obd_dev_lock); + read_unlock(&obd_dev_lock); return -1; } -EXPORT_SYMBOL(class_name2dev); struct obd_device *class_name2obd(const char *name) { @@ -413,21 +440,20 @@ int class_uuid2dev(struct obd_uuid *uuid) { int i; - cfs_read_lock(&obd_dev_lock); + read_lock(&obd_dev_lock); for (i = 0; i < class_devno_max(); i++) { struct obd_device *obd = class_num2obd(i); if (obd && obd_uuid_equals(uuid, &obd->obd_uuid)) { LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC); - cfs_read_unlock(&obd_dev_lock); + read_unlock(&obd_dev_lock); return i; } } - cfs_read_unlock(&obd_dev_lock); + read_unlock(&obd_dev_lock); return -1; } -EXPORT_SYMBOL(class_uuid2dev); struct obd_device *class_uuid2obd(struct obd_uuid *uuid) { @@ -465,14 +491,34 @@ struct obd_device *class_num2obd(int num) return obd; } -EXPORT_SYMBOL(class_num2obd); + +/** + * Get obd devices count. Device in any + * state are counted + * \retval obd device count + */ +int get_devices_count(void) +{ + int index, max_index = class_devno_max(), dev_count = 0; + + read_lock(&obd_dev_lock); + for (index = 0; index <= max_index; index++) { + struct obd_device *obd = class_num2obd(index); + if (obd != NULL) + dev_count++; + } + read_unlock(&obd_dev_lock); + + return dev_count; +} +EXPORT_SYMBOL(get_devices_count); void class_obd_list(void) { char *status; int i; - cfs_read_lock(&obd_dev_lock); + read_lock(&obd_dev_lock); for (i = 0; i < class_devno_max(); i++) { struct obd_device *obd = class_num2obd(i); @@ -489,9 +535,9 @@ void class_obd_list(void) LCONSOLE(D_CONFIG, "%3d %s %s %s %s %d\n", i, status, obd->obd_type->typ_name, obd->obd_name, obd->obd_uuid.uuid, - cfs_atomic_read(&obd->obd_refcount)); + atomic_read(&obd->obd_refcount)); } - cfs_read_unlock(&obd_dev_lock); + read_unlock(&obd_dev_lock); return; } @@ -504,7 +550,7 @@ struct obd_device * class_find_client_obd(struct obd_uuid *tgt_uuid, { int i; - cfs_read_lock(&obd_dev_lock); + read_lock(&obd_dev_lock); for (i = 0; i < class_devno_max(); i++) { struct obd_device *obd = class_num2obd(i); @@ -516,12 +562,12 @@ struct obd_device * class_find_client_obd(struct obd_uuid *tgt_uuid, &obd->u.cli.cl_target_uuid) && ((grp_uuid)? obd_uuid_equals(grp_uuid, &obd->obd_uuid) : 1)) { - cfs_read_unlock(&obd_dev_lock); + read_unlock(&obd_dev_lock); return obd; } } } - cfs_read_unlock(&obd_dev_lock); + read_unlock(&obd_dev_lock); return NULL; } @@ -542,7 +588,7 @@ struct obd_device * class_devices_in_group(struct obd_uuid *grp_uuid, int *next) else return NULL; - cfs_read_lock(&obd_dev_lock); + read_lock(&obd_dev_lock); for (; i < class_devno_max(); i++) { struct obd_device *obd = class_num2obd(i); @@ -551,11 +597,11 @@ struct obd_device * class_devices_in_group(struct obd_uuid *grp_uuid, int *next) if (obd_uuid_equals(grp_uuid, &obd->obd_uuid)) { if (next != NULL) *next = i+1; - cfs_read_unlock(&obd_dev_lock); + read_unlock(&obd_dev_lock); return obd; } } - cfs_read_unlock(&obd_dev_lock); + read_unlock(&obd_dev_lock); return NULL; } @@ -573,101 +619,89 @@ int class_notify_sptlrpc_conf(const char *fsname, int namelen) LASSERT(namelen > 0); - cfs_read_lock(&obd_dev_lock); - for (i = 0; i < class_devno_max(); i++) { - obd = class_num2obd(i); + read_lock(&obd_dev_lock); + for (i = 0; i < class_devno_max(); i++) { + obd = class_num2obd(i); - if (obd == NULL || obd->obd_set_up == 0 || obd->obd_stopping) - continue; + if (obd == NULL || obd->obd_set_up == 0 || obd->obd_stopping) + continue; - /* only notify mdc, osc, mdt, ost */ - type = obd->obd_type->typ_name; - if (strcmp(type, LUSTRE_MDC_NAME) != 0 && - strcmp(type, LUSTRE_OSC_NAME) != 0 && - strcmp(type, LUSTRE_MDT_NAME) != 0 && - strcmp(type, LUSTRE_OST_NAME) != 0) - continue; + /* only notify mdc, osc, osp, lwp, mdt, ost + * because only these have a -sptlrpc llog */ + type = obd->obd_type->typ_name; + if (strcmp(type, LUSTRE_MDC_NAME) != 0 && + strcmp(type, LUSTRE_OSC_NAME) != 0 && + strcmp(type, LUSTRE_OSP_NAME) != 0 && + strcmp(type, LUSTRE_LWP_NAME) != 0 && + strcmp(type, LUSTRE_MDT_NAME) != 0 && + strcmp(type, LUSTRE_OST_NAME) != 0) + continue; if (strncmp(obd->obd_name, fsname, namelen)) continue; class_incref(obd, __FUNCTION__, obd); - cfs_read_unlock(&obd_dev_lock); - rc2 = obd_set_info_async(obd->obd_self_export, + read_unlock(&obd_dev_lock); + rc2 = obd_set_info_async(NULL, obd->obd_self_export, sizeof(KEY_SPTLRPC_CONF), KEY_SPTLRPC_CONF, 0, NULL, NULL); rc = rc ? rc : rc2; class_decref(obd, __FUNCTION__, obd); - cfs_read_lock(&obd_dev_lock); + read_lock(&obd_dev_lock); } - cfs_read_unlock(&obd_dev_lock); + read_unlock(&obd_dev_lock); return rc; } EXPORT_SYMBOL(class_notify_sptlrpc_conf); void obd_cleanup_caches(void) { - int rc; - ENTRY; if (obd_device_cachep) { - rc = cfs_mem_cache_destroy(obd_device_cachep); - LASSERTF(rc == 0, "Cannot destropy ll_obd_device_cache: rc %d\n", rc); + kmem_cache_destroy(obd_device_cachep); obd_device_cachep = NULL; } if (obdo_cachep) { - rc = cfs_mem_cache_destroy(obdo_cachep); - LASSERTF(rc == 0, "Cannot destory ll_obdo_cache\n"); + kmem_cache_destroy(obdo_cachep); obdo_cachep = NULL; } if (import_cachep) { - rc = cfs_mem_cache_destroy(import_cachep); - LASSERTF(rc == 0, "Cannot destory ll_import_cache\n"); + kmem_cache_destroy(import_cachep); import_cachep = NULL; } - if (capa_cachep) { - rc = cfs_mem_cache_destroy(capa_cachep); - LASSERTF(rc == 0, "Cannot destory capa_cache\n"); - capa_cachep = NULL; - } + EXIT; } int obd_init_caches(void) { - ENTRY; + int rc; + ENTRY; - LASSERT(obd_device_cachep == NULL); - obd_device_cachep = cfs_mem_cache_create("ll_obd_dev_cache", - sizeof(struct obd_device), - 0, 0); - if (!obd_device_cachep) - GOTO(out, -ENOMEM); - - LASSERT(obdo_cachep == NULL); - obdo_cachep = cfs_mem_cache_create("ll_obdo_cache", sizeof(struct obdo), - 0, 0); - if (!obdo_cachep) - GOTO(out, -ENOMEM); - - LASSERT(import_cachep == NULL); - import_cachep = cfs_mem_cache_create("ll_import_cache", - sizeof(struct obd_import), - 0, 0); - if (!import_cachep) - GOTO(out, -ENOMEM); - - LASSERT(capa_cachep == NULL); - capa_cachep = cfs_mem_cache_create("capa_cache", - sizeof(struct obd_capa), 0, 0); - if (!capa_cachep) - GOTO(out, -ENOMEM); + LASSERT(obd_device_cachep == NULL); + obd_device_cachep = kmem_cache_create("ll_obd_dev_cache", + sizeof(struct obd_device), + 0, 0, NULL); + if (!obd_device_cachep) + GOTO(out, rc = -ENOMEM); - RETURN(0); - out: - obd_cleanup_caches(); - RETURN(-ENOMEM); + LASSERT(obdo_cachep == NULL); + obdo_cachep = kmem_cache_create("ll_obdo_cache", sizeof(struct obdo), + 0, 0, NULL); + if (!obdo_cachep) + GOTO(out, rc = -ENOMEM); + LASSERT(import_cachep == NULL); + import_cachep = kmem_cache_create("ll_import_cache", + sizeof(struct obd_import), + 0, 0, NULL); + if (!import_cachep) + GOTO(out, rc = -ENOMEM); + + RETURN(0); +out: + obd_cleanup_caches(); + RETURN(rc); } /* map connection to client */ @@ -687,8 +721,8 @@ struct obd_export *class_conn2export(struct lustre_handle *conn) } CDEBUG(D_INFO, "looking for export cookie "LPX64"\n", conn->cookie); - export = class_handle2object(conn->cookie); - RETURN(export); + export = class_handle2object(conn->cookie, NULL); + RETURN(export); } EXPORT_SYMBOL(class_conn2export); @@ -711,7 +745,6 @@ struct obd_device *class_conn2obd(struct lustre_handle *conn) } return NULL; } -EXPORT_SYMBOL(class_conn2obd); struct obd_import *class_exp2cliimp(struct obd_export *exp) { @@ -729,7 +762,6 @@ struct obd_import *class_conn2cliimp(struct lustre_handle *conn) return NULL; return obd->u.cli.cl_import; } -EXPORT_SYMBOL(class_conn2cliimp); /* Export management functions */ static void class_export_destroy(struct obd_export *exp) @@ -738,20 +770,19 @@ static void class_export_destroy(struct obd_export *exp) ENTRY; LASSERT_ATOMIC_ZERO(&exp->exp_refcount); + LASSERT(obd != NULL); CDEBUG(D_IOCTL, "destroying export %p/%s for %s\n", exp, exp->exp_client_uuid.uuid, obd->obd_name); - LASSERT(obd != NULL); - /* "Local" exports (lctl, LOV->{mdc,osc}) have no connection. */ if (exp->exp_connection) ptlrpc_put_connection_superhack(exp->exp_connection); - LASSERT(cfs_list_empty(&exp->exp_outstanding_replies)); - LASSERT(cfs_list_empty(&exp->exp_uncommitted_replies)); - LASSERT(cfs_list_empty(&exp->exp_req_replay_queue)); - LASSERT(cfs_list_empty(&exp->exp_hp_rpcs)); + LASSERT(list_empty(&exp->exp_outstanding_replies)); + LASSERT(list_empty(&exp->exp_uncommitted_replies)); + LASSERT(list_empty(&exp->exp_req_replay_queue)); + LASSERT(list_empty(&exp->exp_hp_rpcs)); obd_destroy_export(exp); class_decref(obd, "export", exp); @@ -764,11 +795,16 @@ static void export_handle_addref(void *export) class_export_get(export); } +static struct portals_handle_ops export_handle_ops = { + .hop_addref = export_handle_addref, + .hop_free = NULL, +}; + struct obd_export *class_export_get(struct obd_export *exp) { - cfs_atomic_inc(&exp->exp_refcount); + atomic_inc(&exp->exp_refcount); CDEBUG(D_INFO, "GETting export %p : new refcount %d\n", exp, - cfs_atomic_read(&exp->exp_refcount)); + atomic_read(&exp->exp_refcount)); return exp; } EXPORT_SYMBOL(class_export_get); @@ -778,18 +814,19 @@ void class_export_put(struct obd_export *exp) LASSERT(exp != NULL); LASSERT_ATOMIC_GT_LT(&exp->exp_refcount, 0, LI_POISON); CDEBUG(D_INFO, "PUTting export %p : new refcount %d\n", exp, - cfs_atomic_read(&exp->exp_refcount) - 1); + atomic_read(&exp->exp_refcount) - 1); - if (cfs_atomic_dec_and_test(&exp->exp_refcount)) { - LASSERT(!cfs_list_empty(&exp->exp_obd_chain)); - CDEBUG(D_IOCTL, "final put %p/%s\n", - exp, exp->exp_client_uuid.uuid); + if (atomic_dec_and_test(&exp->exp_refcount)) { + LASSERT(!list_empty(&exp->exp_obd_chain)); + LASSERT(list_empty(&exp->exp_stale_list)); + CDEBUG(D_IOCTL, "final put %p/%s\n", + exp, exp->exp_client_uuid.uuid); - /* release nid stat refererence */ - lprocfs_exp_cleanup(exp); + /* release nid stat refererence */ + lprocfs_exp_cleanup(exp); - obd_zombie_export_add(exp); - } + obd_zombie_export_add(exp); + } } EXPORT_SYMBOL(class_export_put); @@ -800,7 +837,7 @@ struct obd_export *class_new_export(struct obd_device *obd, struct obd_uuid *cluuid) { struct obd_export *export; - cfs_hash_t *hash = NULL; + struct cfs_hash *hash = NULL; int rc = 0; ENTRY; @@ -810,45 +847,49 @@ struct obd_export *class_new_export(struct obd_device *obd, export->exp_conn_cnt = 0; export->exp_lock_hash = NULL; - cfs_atomic_set(&export->exp_refcount, 2); - cfs_atomic_set(&export->exp_rpc_count, 0); - cfs_atomic_set(&export->exp_cb_count, 0); - cfs_atomic_set(&export->exp_locks_count, 0); + export->exp_flock_hash = NULL; + atomic_set(&export->exp_refcount, 2); + atomic_set(&export->exp_rpc_count, 0); + atomic_set(&export->exp_cb_count, 0); + atomic_set(&export->exp_locks_count, 0); #if LUSTRE_TRACKS_LOCK_EXP_REFS - CFS_INIT_LIST_HEAD(&export->exp_locks_list); - cfs_spin_lock_init(&export->exp_locks_list_guard); + INIT_LIST_HEAD(&export->exp_locks_list); + spin_lock_init(&export->exp_locks_list_guard); #endif - cfs_atomic_set(&export->exp_replay_count, 0); - export->exp_obd = obd; - CFS_INIT_LIST_HEAD(&export->exp_outstanding_replies); - cfs_spin_lock_init(&export->exp_uncommitted_replies_lock); - CFS_INIT_LIST_HEAD(&export->exp_uncommitted_replies); - CFS_INIT_LIST_HEAD(&export->exp_req_replay_queue); - CFS_INIT_LIST_HEAD(&export->exp_handle.h_link); - CFS_INIT_LIST_HEAD(&export->exp_hp_rpcs); - class_handle_hash(&export->exp_handle, export_handle_addref); - export->exp_last_request_time = cfs_time_current_sec(); - cfs_spin_lock_init(&export->exp_lock); - cfs_spin_lock_init(&export->exp_rpc_lock); - CFS_INIT_HLIST_NODE(&export->exp_uuid_hash); - CFS_INIT_HLIST_NODE(&export->exp_nid_hash); - cfs_spin_lock_init(&export->exp_bl_list_lock); - CFS_INIT_LIST_HEAD(&export->exp_bl_list); - - export->exp_sp_peer = LUSTRE_SP_ANY; - export->exp_flvr.sf_rpc = SPTLRPC_FLVR_INVALID; - export->exp_client_uuid = *cluuid; - obd_init_export(export); - - cfs_spin_lock(&obd->obd_dev_lock); - /* shouldn't happen, but might race */ - if (obd->obd_stopping) - GOTO(exit_unlock, rc = -ENODEV); - - hash = cfs_hash_getref(obd->obd_uuid_hash); - if (hash == NULL) - GOTO(exit_unlock, rc = -ENODEV); - cfs_spin_unlock(&obd->obd_dev_lock); + atomic_set(&export->exp_replay_count, 0); + export->exp_obd = obd; + INIT_LIST_HEAD(&export->exp_outstanding_replies); + spin_lock_init(&export->exp_uncommitted_replies_lock); + INIT_LIST_HEAD(&export->exp_uncommitted_replies); + INIT_LIST_HEAD(&export->exp_req_replay_queue); + INIT_LIST_HEAD(&export->exp_handle.h_link); + INIT_LIST_HEAD(&export->exp_hp_rpcs); + INIT_LIST_HEAD(&export->exp_reg_rpcs); + class_handle_hash(&export->exp_handle, &export_handle_ops); + export->exp_last_request_time = cfs_time_current_sec(); + spin_lock_init(&export->exp_lock); + spin_lock_init(&export->exp_rpc_lock); + INIT_HLIST_NODE(&export->exp_uuid_hash); + INIT_HLIST_NODE(&export->exp_nid_hash); + INIT_HLIST_NODE(&export->exp_gen_hash); + spin_lock_init(&export->exp_bl_list_lock); + INIT_LIST_HEAD(&export->exp_bl_list); + INIT_LIST_HEAD(&export->exp_stale_list); + + export->exp_sp_peer = LUSTRE_SP_ANY; + export->exp_flvr.sf_rpc = SPTLRPC_FLVR_INVALID; + export->exp_client_uuid = *cluuid; + obd_init_export(export); + + spin_lock(&obd->obd_dev_lock); + /* shouldn't happen, but might race */ + if (obd->obd_stopping) + GOTO(exit_unlock, rc = -ENODEV); + + hash = cfs_hash_getref(obd->obd_uuid_hash); + if (hash == NULL) + GOTO(exit_unlock, rc = -ENODEV); + spin_unlock(&obd->obd_dev_lock); if (!obd_uuid_equals(cluuid, &obd->obd_uuid)) { rc = cfs_hash_add_unique(hash, cluuid, &export->exp_uuid_hash); @@ -859,28 +900,29 @@ struct obd_export *class_new_export(struct obd_device *obd, } } - cfs_spin_lock(&obd->obd_dev_lock); + at_init(&export->exp_bl_lock_at, obd_timeout, 0); + spin_lock(&obd->obd_dev_lock); if (obd->obd_stopping) { cfs_hash_del(hash, cluuid, &export->exp_uuid_hash); GOTO(exit_unlock, rc = -ENODEV); } class_incref(obd, "export", export); - cfs_list_add(&export->exp_obd_chain, &export->exp_obd->obd_exports); - cfs_list_add_tail(&export->exp_obd_chain_timed, - &export->exp_obd->obd_exports_timed); + list_add(&export->exp_obd_chain, &export->exp_obd->obd_exports); + list_add_tail(&export->exp_obd_chain_timed, + &export->exp_obd->obd_exports_timed); export->exp_obd->obd_num_exports++; - cfs_spin_unlock(&obd->obd_dev_lock); - cfs_hash_putref(hash); - RETURN(export); + spin_unlock(&obd->obd_dev_lock); + cfs_hash_putref(hash); + RETURN(export); exit_unlock: - cfs_spin_unlock(&obd->obd_dev_lock); + spin_unlock(&obd->obd_dev_lock); exit_err: if (hash) cfs_hash_putref(hash); class_handle_unhash(&export->exp_handle); - LASSERT(cfs_hlist_unhashed(&export->exp_uuid_hash)); + LASSERT(hlist_unhashed(&export->exp_uuid_hash)); obd_destroy_export(export); OBD_FREE_PTR(export); return ERR_PTR(rc); @@ -889,25 +931,38 @@ EXPORT_SYMBOL(class_new_export); void class_unlink_export(struct obd_export *exp) { - class_handle_unhash(&exp->exp_handle); + class_handle_unhash(&exp->exp_handle); + + spin_lock(&exp->exp_obd->obd_dev_lock); + /* delete an uuid-export hashitem from hashtables */ + if (!hlist_unhashed(&exp->exp_uuid_hash)) + cfs_hash_del(exp->exp_obd->obd_uuid_hash, + &exp->exp_client_uuid, + &exp->exp_uuid_hash); + + if (!hlist_unhashed(&exp->exp_gen_hash)) { + struct tg_export_data *ted = &exp->exp_target_data; + struct cfs_hash *hash; - cfs_spin_lock(&exp->exp_obd->obd_dev_lock); - /* delete an uuid-export hashitem from hashtables */ - if (!cfs_hlist_unhashed(&exp->exp_uuid_hash)) - cfs_hash_del(exp->exp_obd->obd_uuid_hash, - &exp->exp_client_uuid, - &exp->exp_uuid_hash); + hash = cfs_hash_getref(exp->exp_obd->obd_gen_hash); + cfs_hash_del(hash, &ted->ted_lcd->lcd_generation, + &exp->exp_gen_hash); + cfs_hash_putref(hash); + } - cfs_list_move(&exp->exp_obd_chain, &exp->exp_obd->obd_unlinked_exports); - cfs_list_del_init(&exp->exp_obd_chain_timed); - exp->exp_obd->obd_num_exports--; - cfs_spin_unlock(&exp->exp_obd->obd_dev_lock); - class_export_put(exp); + list_move(&exp->exp_obd_chain, &exp->exp_obd->obd_unlinked_exports); + list_del_init(&exp->exp_obd_chain_timed); + exp->exp_obd->obd_num_exports--; + spin_unlock(&exp->exp_obd->obd_dev_lock); + atomic_inc(&obd_stale_export_num); + + /* A reference is kept by obd_stale_exports list */ + obd_stale_export_put(exp); } EXPORT_SYMBOL(class_unlink_export); /* Import management functions */ -void class_import_destroy(struct obd_import *imp) +static void class_import_destroy(struct obd_import *imp) { ENTRY; @@ -918,12 +973,12 @@ void class_import_destroy(struct obd_import *imp) ptlrpc_put_connection_superhack(imp->imp_connection); - while (!cfs_list_empty(&imp->imp_conn_list)) { - struct obd_import_conn *imp_conn; + while (!list_empty(&imp->imp_conn_list)) { + struct obd_import_conn *imp_conn; - imp_conn = cfs_list_entry(imp->imp_conn_list.next, - struct obd_import_conn, oic_item); - cfs_list_del_init(&imp_conn->oic_item); + imp_conn = list_entry(imp->imp_conn_list.next, + struct obd_import_conn, oic_item); + list_del_init(&imp_conn->oic_item); ptlrpc_put_connection_superhack(imp_conn->oic_conn); OBD_FREE(imp_conn, sizeof(*imp_conn)); } @@ -939,11 +994,16 @@ static void import_handle_addref(void *import) class_import_get(import); } +static struct portals_handle_ops import_handle_ops = { + .hop_addref = import_handle_addref, + .hop_free = NULL, +}; + struct obd_import *class_import_get(struct obd_import *import) { - cfs_atomic_inc(&import->imp_refcount); + atomic_inc(&import->imp_refcount); CDEBUG(D_INFO, "import %p refcount=%d obd=%s\n", import, - cfs_atomic_read(&import->imp_refcount), + atomic_read(&import->imp_refcount), import->imp_obd->obd_name); return import; } @@ -951,21 +1011,23 @@ EXPORT_SYMBOL(class_import_get); void class_import_put(struct obd_import *imp) { - ENTRY; + ENTRY; - LASSERT(cfs_list_empty(&imp->imp_zombie_chain)); + LASSERT(list_empty(&imp->imp_zombie_chain)); LASSERT_ATOMIC_GT_LT(&imp->imp_refcount, 0, LI_POISON); CDEBUG(D_INFO, "import %p refcount=%d obd=%s\n", imp, - cfs_atomic_read(&imp->imp_refcount) - 1, + atomic_read(&imp->imp_refcount) - 1, imp->imp_obd->obd_name); - if (cfs_atomic_dec_and_test(&imp->imp_refcount)) { + if (atomic_dec_and_test(&imp->imp_refcount)) { CDEBUG(D_INFO, "final put import %p\n", imp); obd_zombie_import_add(imp); } - EXIT; + /* catch possible import put race */ + LASSERT_ATOMIC_GE_LT(&imp->imp_refcount, 0, LI_POISON); + EXIT; } EXPORT_SYMBOL(class_import_put); @@ -983,52 +1045,55 @@ static void init_imp_at(struct imp_at *at) { struct obd_import *class_new_import(struct obd_device *obd) { - struct obd_import *imp; - - OBD_ALLOC(imp, sizeof(*imp)); - if (imp == NULL) - return NULL; - - CFS_INIT_LIST_HEAD(&imp->imp_zombie_chain); - CFS_INIT_LIST_HEAD(&imp->imp_replay_list); - CFS_INIT_LIST_HEAD(&imp->imp_sending_list); - CFS_INIT_LIST_HEAD(&imp->imp_delayed_list); - cfs_spin_lock_init(&imp->imp_lock); - imp->imp_last_success_conn = 0; - imp->imp_state = LUSTRE_IMP_NEW; - imp->imp_obd = class_incref(obd, "import", imp); - cfs_sema_init(&imp->imp_sec_mutex, 1); - cfs_waitq_init(&imp->imp_recovery_waitq); - - cfs_atomic_set(&imp->imp_refcount, 2); - cfs_atomic_set(&imp->imp_unregistering, 0); - cfs_atomic_set(&imp->imp_inflight, 0); - cfs_atomic_set(&imp->imp_replay_inflight, 0); - cfs_atomic_set(&imp->imp_inval_count, 0); - CFS_INIT_LIST_HEAD(&imp->imp_conn_list); - CFS_INIT_LIST_HEAD(&imp->imp_handle.h_link); - class_handle_hash(&imp->imp_handle, import_handle_addref); - init_imp_at(&imp->imp_at); - - /* the default magic is V2, will be used in connect RPC, and - * then adjusted according to the flags in request/reply. */ - imp->imp_msg_magic = LUSTRE_MSG_MAGIC_V2; - - return imp; + struct obd_import *imp; + + OBD_ALLOC(imp, sizeof(*imp)); + if (imp == NULL) + return NULL; + + INIT_LIST_HEAD(&imp->imp_pinger_chain); + INIT_LIST_HEAD(&imp->imp_zombie_chain); + INIT_LIST_HEAD(&imp->imp_replay_list); + INIT_LIST_HEAD(&imp->imp_sending_list); + INIT_LIST_HEAD(&imp->imp_delayed_list); + INIT_LIST_HEAD(&imp->imp_committed_list); + imp->imp_replay_cursor = &imp->imp_committed_list; + spin_lock_init(&imp->imp_lock); + imp->imp_last_success_conn = 0; + imp->imp_state = LUSTRE_IMP_NEW; + imp->imp_obd = class_incref(obd, "import", imp); + mutex_init(&imp->imp_sec_mutex); + init_waitqueue_head(&imp->imp_recovery_waitq); + + atomic_set(&imp->imp_refcount, 2); + atomic_set(&imp->imp_unregistering, 0); + atomic_set(&imp->imp_inflight, 0); + atomic_set(&imp->imp_replay_inflight, 0); + atomic_set(&imp->imp_inval_count, 0); + INIT_LIST_HEAD(&imp->imp_conn_list); + INIT_LIST_HEAD(&imp->imp_handle.h_link); + class_handle_hash(&imp->imp_handle, &import_handle_ops); + init_imp_at(&imp->imp_at); + + /* the default magic is V2, will be used in connect RPC, and + * then adjusted according to the flags in request/reply. */ + imp->imp_msg_magic = LUSTRE_MSG_MAGIC_V2; + + return imp; } EXPORT_SYMBOL(class_new_import); void class_destroy_import(struct obd_import *import) { - LASSERT(import != NULL); - LASSERT(import != LP_POISON); + LASSERT(import != NULL); + LASSERT(import != LP_POISON); - class_handle_unhash(&import->imp_handle); + class_handle_unhash(&import->imp_handle); - cfs_spin_lock(&import->imp_lock); - import->imp_generation++; - cfs_spin_unlock(&import->imp_lock); - class_import_put(import); + spin_lock(&import->imp_lock); + import->imp_generation++; + spin_unlock(&import->imp_lock); + class_import_put(import); } EXPORT_SYMBOL(class_destroy_import); @@ -1036,7 +1101,7 @@ EXPORT_SYMBOL(class_destroy_import); void __class_export_add_lock_ref(struct obd_export *exp, struct ldlm_lock *lock) { - cfs_spin_lock(&exp->exp_locks_list_guard); + spin_lock(&exp->exp_locks_list_guard); LASSERT(lock->l_exp_refs_nr >= 0); @@ -1046,18 +1111,17 @@ void __class_export_add_lock_ref(struct obd_export *exp, struct ldlm_lock *lock) exp, lock, lock->l_exp_refs_target); } if ((lock->l_exp_refs_nr ++) == 0) { - cfs_list_add(&lock->l_exp_refs_link, &exp->exp_locks_list); + list_add(&lock->l_exp_refs_link, &exp->exp_locks_list); lock->l_exp_refs_target = exp; } CDEBUG(D_INFO, "lock = %p, export = %p, refs = %u\n", lock, exp, lock->l_exp_refs_nr); - cfs_spin_unlock(&exp->exp_locks_list_guard); + spin_unlock(&exp->exp_locks_list_guard); } -EXPORT_SYMBOL(__class_export_add_lock_ref); void __class_export_del_lock_ref(struct obd_export *exp, struct ldlm_lock *lock) { - cfs_spin_lock(&exp->exp_locks_list_guard); + spin_lock(&exp->exp_locks_list_guard); LASSERT(lock->l_exp_refs_nr > 0); if (lock->l_exp_refs_target != exp) { LCONSOLE_WARN("lock %p, " @@ -1065,14 +1129,13 @@ void __class_export_del_lock_ref(struct obd_export *exp, struct ldlm_lock *lock) lock, lock->l_exp_refs_target, exp); } if (-- lock->l_exp_refs_nr == 0) { - cfs_list_del_init(&lock->l_exp_refs_link); + list_del_init(&lock->l_exp_refs_link); lock->l_exp_refs_target = NULL; } CDEBUG(D_INFO, "lock = %p, export = %p, refs = %u\n", lock, exp, lock->l_exp_refs_nr); - cfs_spin_unlock(&exp->exp_locks_list_guard); + spin_unlock(&exp->exp_locks_list_guard); } -EXPORT_SYMBOL(__class_export_del_lock_ref); #endif /* A connection defines an export context in which preallocation can @@ -1102,37 +1165,45 @@ int class_connect(struct lustre_handle *conn, struct obd_device *obd, EXPORT_SYMBOL(class_connect); /* if export is involved in recovery then clean up related things */ -void class_export_recovery_cleanup(struct obd_export *exp) -{ - struct obd_device *obd = exp->exp_obd; - - cfs_spin_lock(&obd->obd_recovery_task_lock); - if (exp->exp_delayed) - obd->obd_delayed_clients--; - if (obd->obd_recovering && exp->exp_in_recovery) { - cfs_spin_lock(&exp->exp_lock); - exp->exp_in_recovery = 0; - cfs_spin_unlock(&exp->exp_lock); - LASSERT_ATOMIC_POS(&obd->obd_connected_clients); - cfs_atomic_dec(&obd->obd_connected_clients); - } - cfs_spin_unlock(&obd->obd_recovery_task_lock); - /** Cleanup req replay fields */ - if (exp->exp_req_replay_needed) { - cfs_spin_lock(&exp->exp_lock); - exp->exp_req_replay_needed = 0; - cfs_spin_unlock(&exp->exp_lock); - LASSERT(cfs_atomic_read(&obd->obd_req_replay_clients)); - cfs_atomic_dec(&obd->obd_req_replay_clients); - } - /** Cleanup lock replay data */ - if (exp->exp_lock_replay_needed) { - cfs_spin_lock(&exp->exp_lock); - exp->exp_lock_replay_needed = 0; - cfs_spin_unlock(&exp->exp_lock); - LASSERT(cfs_atomic_read(&obd->obd_lock_replay_clients)); - cfs_atomic_dec(&obd->obd_lock_replay_clients); - } +static void class_export_recovery_cleanup(struct obd_export *exp) +{ + struct obd_device *obd = exp->exp_obd; + + spin_lock(&obd->obd_recovery_task_lock); + if (obd->obd_recovering) { + if (exp->exp_in_recovery) { + spin_lock(&exp->exp_lock); + exp->exp_in_recovery = 0; + spin_unlock(&exp->exp_lock); + LASSERT_ATOMIC_POS(&obd->obd_connected_clients); + atomic_dec(&obd->obd_connected_clients); + } + + /* if called during recovery then should update + * obd_stale_clients counter, + * lightweight exports are not counted */ + if ((exp_connect_flags(exp) & OBD_CONNECT_LIGHTWEIGHT) == 0) + exp->exp_obd->obd_stale_clients++; + } + spin_unlock(&obd->obd_recovery_task_lock); + + spin_lock(&exp->exp_lock); + /** Cleanup req replay fields */ + if (exp->exp_req_replay_needed) { + exp->exp_req_replay_needed = 0; + + LASSERT(atomic_read(&obd->obd_req_replay_clients)); + atomic_dec(&obd->obd_req_replay_clients); + } + + /** Cleanup lock replay data */ + if (exp->exp_lock_replay_needed) { + exp->exp_lock_replay_needed = 0; + + LASSERT(atomic_read(&obd->obd_lock_replay_clients)); + atomic_dec(&obd->obd_lock_replay_clients); + } + spin_unlock(&exp->exp_lock); } /* This function removes 1-3 references from the export: @@ -1151,23 +1222,23 @@ int class_disconnect(struct obd_export *export) RETURN(-EINVAL); } - cfs_spin_lock(&export->exp_lock); - already_disconnected = export->exp_disconnected; - export->exp_disconnected = 1; - cfs_spin_unlock(&export->exp_lock); + spin_lock(&export->exp_lock); + already_disconnected = export->exp_disconnected; + export->exp_disconnected = 1; + spin_unlock(&export->exp_lock); /* class_cleanup(), abort_recovery(), and class_fail_export() * all end up in here, and if any of them race we shouldn't * call extra class_export_puts(). */ if (already_disconnected) { - LASSERT(cfs_hlist_unhashed(&export->exp_nid_hash)); + LASSERT(hlist_unhashed(&export->exp_nid_hash)); GOTO(no_disconn, already_disconnected); } CDEBUG(D_IOCTL, "disconnect: cookie "LPX64"\n", export->exp_handle.h_cookie); - if (!cfs_hlist_unhashed(&export->exp_nid_hash)) + if (!hlist_unhashed(&export->exp_nid_hash)) cfs_hash_del(export->exp_obd->obd_nid_hash, &export->exp_connection->c_peer.nid, &export->exp_nid_hash); @@ -1183,18 +1254,18 @@ EXPORT_SYMBOL(class_disconnect); /* Return non-zero for a fully connected export */ int class_connected_export(struct obd_export *exp) { - if (exp) { - int connected; - cfs_spin_lock(&exp->exp_lock); - connected = (exp->exp_conn_cnt > 0); - cfs_spin_unlock(&exp->exp_lock); - return connected; - } - return 0; + int connected = 0; + + if (exp) { + spin_lock(&exp->exp_lock); + connected = (exp->exp_conn_cnt > 0) && !exp->exp_failed; + spin_unlock(&exp->exp_lock); + } + return connected; } EXPORT_SYMBOL(class_connected_export); -static void class_disconnect_export_list(cfs_list_t *list, +static void class_disconnect_export_list(struct list_head *list, enum obd_option flags) { int rc; @@ -1203,15 +1274,15 @@ static void class_disconnect_export_list(cfs_list_t *list, /* It's possible that an export may disconnect itself, but * nothing else will be added to this list. */ - while (!cfs_list_empty(list)) { - exp = cfs_list_entry(list->next, struct obd_export, - exp_obd_chain); - /* need for safe call CDEBUG after obd_disconnect */ - class_export_get(exp); + while (!list_empty(list)) { + exp = list_entry(list->next, struct obd_export, + exp_obd_chain); + /* need for safe call CDEBUG after obd_disconnect */ + class_export_get(exp); - cfs_spin_lock(&exp->exp_lock); - exp->exp_flags = flags; - cfs_spin_unlock(&exp->exp_lock); + spin_lock(&exp->exp_lock); + exp->exp_flags = flags; + spin_unlock(&exp->exp_lock); if (obd_uuid_equals(&exp->exp_client_uuid, &exp->exp_obd->obd_uuid)) { @@ -1220,7 +1291,7 @@ static void class_disconnect_export_list(cfs_list_t *list, exp); /* Need to delete this now so we don't end up pointing * to work_list later when this export is cleaned up. */ - cfs_list_del_init(&exp->exp_obd_chain); + list_del_init(&exp->exp_obd_chain); class_export_put(exp); continue; } @@ -1242,17 +1313,17 @@ static void class_disconnect_export_list(cfs_list_t *list, void class_disconnect_exports(struct obd_device *obd) { - cfs_list_t work_list; - ENTRY; + struct list_head work_list; + ENTRY; - /* Move all of the exports from obd_exports to a work list, en masse. */ - CFS_INIT_LIST_HEAD(&work_list); - cfs_spin_lock(&obd->obd_dev_lock); - cfs_list_splice_init(&obd->obd_exports, &work_list); - cfs_list_splice_init(&obd->obd_delayed_exports, &work_list); - cfs_spin_unlock(&obd->obd_dev_lock); + /* Move all of the exports from obd_exports to a work list, en masse. */ + INIT_LIST_HEAD(&work_list); + spin_lock(&obd->obd_dev_lock); + list_splice_init(&obd->obd_exports, &work_list); + list_splice_init(&obd->obd_delayed_exports, &work_list); + spin_unlock(&obd->obd_dev_lock); - if (!cfs_list_empty(&work_list)) { + if (!list_empty(&work_list)) { CDEBUG(D_HA, "OBD device %d (%p) has exports, " "disconnecting them\n", obd->obd_minor, obd); class_disconnect_export_list(&work_list, @@ -1269,53 +1340,61 @@ EXPORT_SYMBOL(class_disconnect_exports); void class_disconnect_stale_exports(struct obd_device *obd, int (*test_export)(struct obd_export *)) { - cfs_list_t work_list; - cfs_list_t *pos, *n; - struct obd_export *exp; + struct list_head work_list; + struct obd_export *exp, *n; int evicted = 0; ENTRY; - CFS_INIT_LIST_HEAD(&work_list); - cfs_spin_lock(&obd->obd_dev_lock); - cfs_list_for_each_safe(pos, n, &obd->obd_exports) { - exp = cfs_list_entry(pos, struct obd_export, exp_obd_chain); - if (test_export(exp)) - continue; - + INIT_LIST_HEAD(&work_list); + spin_lock(&obd->obd_dev_lock); + list_for_each_entry_safe(exp, n, &obd->obd_exports, + exp_obd_chain) { /* don't count self-export as client */ if (obd_uuid_equals(&exp->exp_client_uuid, &exp->exp_obd->obd_uuid)) continue; - cfs_list_move(&exp->exp_obd_chain, &work_list); + /* don't evict clients which have no slot in last_rcvd + * (e.g. lightweight connection) */ + if (exp->exp_target_data.ted_lr_idx == -1) + continue; + + spin_lock(&exp->exp_lock); + if (exp->exp_failed || test_export(exp)) { + spin_unlock(&exp->exp_lock); + continue; + } + exp->exp_failed = 1; + spin_unlock(&exp->exp_lock); + + list_move(&exp->exp_obd_chain, &work_list); evicted++; - CDEBUG(D_ERROR, "%s: disconnect stale client %s@%s\n", + CDEBUG(D_HA, "%s: disconnect stale client %s@%s\n", obd->obd_name, exp->exp_client_uuid.uuid, exp->exp_connection == NULL ? "" : libcfs_nid2str(exp->exp_connection->c_peer.nid)); print_export_data(exp, "EVICTING", 0); } - cfs_spin_unlock(&obd->obd_dev_lock); + spin_unlock(&obd->obd_dev_lock); - if (evicted) { - CDEBUG(D_HA, "%s: disconnecting %d stale clients\n", - obd->obd_name, evicted); - obd->obd_stale_clients += evicted; - } - class_disconnect_export_list(&work_list, exp_flags_from_obd(obd) | - OBD_OPT_ABORT_RECOV); - EXIT; + if (evicted) + LCONSOLE_WARN("%s: disconnecting %d stale clients\n", + obd->obd_name, evicted); + + class_disconnect_export_list(&work_list, exp_flags_from_obd(obd) | + OBD_OPT_ABORT_RECOV); + EXIT; } EXPORT_SYMBOL(class_disconnect_stale_exports); void class_fail_export(struct obd_export *exp) { - int rc, already_failed; + int rc, already_failed; - cfs_spin_lock(&exp->exp_lock); - already_failed = exp->exp_failed; - exp->exp_failed = 1; - cfs_spin_unlock(&exp->exp_lock); + spin_lock(&exp->exp_lock); + already_failed = exp->exp_failed; + exp->exp_failed = 1; + spin_unlock(&exp->exp_lock); if (already_failed) { CDEBUG(D_HA, "disconnecting dead export %p/%s; skipping\n", @@ -1329,6 +1408,9 @@ void class_fail_export(struct obd_export *exp) if (obd_dump_on_timeout) libcfs_debug_dumplog(); + /* need for safe call CDEBUG after obd_disconnect */ + class_export_get(exp); + /* Most callers into obd_disconnect are removing their own reference * (request, for example) in addition to the one from the hash table. * We don't have such a reference here, so make one. */ @@ -1339,6 +1421,7 @@ void class_fail_export(struct obd_export *exp) else CDEBUG(D_HA, "disconnected export %p/%s\n", exp, exp->exp_client_uuid.uuid); + class_export_put(exp); } EXPORT_SYMBOL(class_fail_export); @@ -1353,13 +1436,25 @@ EXPORT_SYMBOL(obd_export_nid2str); int obd_export_evict_by_nid(struct obd_device *obd, const char *nid) { - struct obd_export *doomed_exp = NULL; - int exports_evicted = 0; - - lnet_nid_t nid_key = libcfs_str2nid((char *)nid); - - do { - doomed_exp = cfs_hash_lookup(obd->obd_nid_hash, &nid_key); + struct cfs_hash *nid_hash; + struct obd_export *doomed_exp = NULL; + int exports_evicted = 0; + + lnet_nid_t nid_key = libcfs_str2nid((char *)nid); + + spin_lock(&obd->obd_dev_lock); + /* umount has run already, so evict thread should leave + * its task to umount thread now */ + if (obd->obd_stopping) { + spin_unlock(&obd->obd_dev_lock); + return exports_evicted; + } + nid_hash = obd->obd_nid_hash; + cfs_hash_getref(nid_hash); + spin_unlock(&obd->obd_dev_lock); + + do { + doomed_exp = cfs_hash_lookup(nid_hash, &nid_key); if (doomed_exp == NULL) break; @@ -1370,13 +1465,16 @@ int obd_export_evict_by_nid(struct obd_device *obd, const char *nid) LASSERTF(doomed_exp != obd->obd_self_export, "self-export is hashed by NID?\n"); exports_evicted++; - CWARN("%s: evict NID '%s' (%s) #%d at adminstrative request\n", - obd->obd_name, nid, doomed_exp->exp_client_uuid.uuid, - exports_evicted); + LCONSOLE_WARN("%s: evicting %s (at %s) by administrative " + "request\n", obd->obd_name, + obd_uuid2str(&doomed_exp->exp_client_uuid), + obd_export_nid2str(doomed_exp)); class_fail_export(doomed_exp); class_export_put(doomed_exp); } while (1); + cfs_hash_putref(nid_hash); + if (!exports_evicted) CDEBUG(D_HA,"%s: can't disconnect NID '%s': no exports found\n", obd->obd_name, nid); @@ -1386,17 +1484,28 @@ EXPORT_SYMBOL(obd_export_evict_by_nid); int obd_export_evict_by_uuid(struct obd_device *obd, const char *uuid) { - struct obd_export *doomed_exp = NULL; - struct obd_uuid doomed_uuid; - int exports_evicted = 0; + struct cfs_hash *uuid_hash; + struct obd_export *doomed_exp = NULL; + struct obd_uuid doomed_uuid; + int exports_evicted = 0; + + spin_lock(&obd->obd_dev_lock); + if (obd->obd_stopping) { + spin_unlock(&obd->obd_dev_lock); + return exports_evicted; + } + uuid_hash = obd->obd_uuid_hash; + cfs_hash_getref(uuid_hash); + spin_unlock(&obd->obd_dev_lock); obd_str2uuid(&doomed_uuid, uuid); if (obd_uuid_equals(&doomed_uuid, &obd->obd_uuid)) { CERROR("%s: can't evict myself\n", obd->obd_name); + cfs_hash_putref(uuid_hash); return exports_evicted; } - doomed_exp = cfs_hash_lookup(obd->obd_uuid_hash, &doomed_uuid); + doomed_exp = cfs_hash_lookup(uuid_hash, &doomed_uuid); if (doomed_exp == NULL) { CERROR("%s: can't disconnect %s: no exports found\n", @@ -1408,38 +1517,37 @@ int obd_export_evict_by_uuid(struct obd_device *obd, const char *uuid) class_export_put(doomed_exp); exports_evicted++; } + cfs_hash_putref(uuid_hash); return exports_evicted; } -EXPORT_SYMBOL(obd_export_evict_by_uuid); #if LUSTRE_TRACKS_LOCK_EXP_REFS void (*class_export_dump_hook)(struct obd_export*) = NULL; -EXPORT_SYMBOL(class_export_dump_hook); #endif static void print_export_data(struct obd_export *exp, const char *status, - int locks) -{ - struct ptlrpc_reply_state *rs; - struct ptlrpc_reply_state *first_reply = NULL; - int nreplies = 0; - - cfs_spin_lock(&exp->exp_lock); - cfs_list_for_each_entry(rs, &exp->exp_outstanding_replies, - rs_exp_list) { - if (nreplies == 0) - first_reply = rs; - nreplies++; - } - cfs_spin_unlock(&exp->exp_lock); + int locks) +{ + struct ptlrpc_reply_state *rs; + struct ptlrpc_reply_state *first_reply = NULL; + int nreplies = 0; + + spin_lock(&exp->exp_lock); + list_for_each_entry(rs, &exp->exp_outstanding_replies, + rs_exp_list) { + if (nreplies == 0) + first_reply = rs; + nreplies++; + } + spin_unlock(&exp->exp_lock); CDEBUG(D_HA, "%s: %s %p %s %s %d (%d %d %d) %d %d %d %d: %p %s "LPU64"\n", exp->exp_obd->obd_name, status, exp, exp->exp_client_uuid.uuid, - obd_export_nid2str(exp), cfs_atomic_read(&exp->exp_refcount), - cfs_atomic_read(&exp->exp_rpc_count), - cfs_atomic_read(&exp->exp_cb_count), - cfs_atomic_read(&exp->exp_locks_count), + obd_export_nid2str(exp), atomic_read(&exp->exp_refcount), + atomic_read(&exp->exp_rpc_count), + atomic_read(&exp->exp_cb_count), + atomic_read(&exp->exp_locks_count), exp->exp_disconnected, exp->exp_delayed, exp->exp_failed, nreplies, first_reply, nreplies > 3 ? "..." : "", exp->exp_last_committed); @@ -1453,42 +1561,41 @@ void dump_exports(struct obd_device *obd, int locks) { struct obd_export *exp; - cfs_spin_lock(&obd->obd_dev_lock); - cfs_list_for_each_entry(exp, &obd->obd_exports, exp_obd_chain) - print_export_data(exp, "ACTIVE", locks); - cfs_list_for_each_entry(exp, &obd->obd_unlinked_exports, exp_obd_chain) - print_export_data(exp, "UNLINKED", locks); - cfs_list_for_each_entry(exp, &obd->obd_delayed_exports, exp_obd_chain) - print_export_data(exp, "DELAYED", locks); - cfs_spin_unlock(&obd->obd_dev_lock); - cfs_spin_lock(&obd_zombie_impexp_lock); - cfs_list_for_each_entry(exp, &obd_zombie_exports, exp_obd_chain) - print_export_data(exp, "ZOMBIE", locks); - cfs_spin_unlock(&obd_zombie_impexp_lock); -} -EXPORT_SYMBOL(dump_exports); + spin_lock(&obd->obd_dev_lock); + list_for_each_entry(exp, &obd->obd_exports, exp_obd_chain) + print_export_data(exp, "ACTIVE", locks); + list_for_each_entry(exp, &obd->obd_unlinked_exports, exp_obd_chain) + print_export_data(exp, "UNLINKED", locks); + list_for_each_entry(exp, &obd->obd_delayed_exports, exp_obd_chain) + print_export_data(exp, "DELAYED", locks); + spin_unlock(&obd->obd_dev_lock); + spin_lock(&obd_zombie_impexp_lock); + list_for_each_entry(exp, &obd_zombie_exports, exp_obd_chain) + print_export_data(exp, "ZOMBIE", locks); + spin_unlock(&obd_zombie_impexp_lock); +} void obd_exports_barrier(struct obd_device *obd) { - int waited = 2; - LASSERT(cfs_list_empty(&obd->obd_exports)); - cfs_spin_lock(&obd->obd_dev_lock); - while (!cfs_list_empty(&obd->obd_unlinked_exports)) { - cfs_spin_unlock(&obd->obd_dev_lock); - cfs_schedule_timeout_and_set_state(CFS_TASK_UNINT, - cfs_time_seconds(waited)); - if (waited > 5 && IS_PO2(waited)) { - LCONSOLE_WARN("%s is waiting for obd_unlinked_exports " - "more than %d seconds. " - "The obd refcount = %d. Is it stuck?\n", - obd->obd_name, waited, - cfs_atomic_read(&obd->obd_refcount)); - dump_exports(obd, 1); - } - waited *= 2; - cfs_spin_lock(&obd->obd_dev_lock); - } - cfs_spin_unlock(&obd->obd_dev_lock); + int waited = 2; + LASSERT(list_empty(&obd->obd_exports)); + spin_lock(&obd->obd_dev_lock); + while (!list_empty(&obd->obd_unlinked_exports)) { + spin_unlock(&obd->obd_dev_lock); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(cfs_time_seconds(waited)); + if (waited > 5 && IS_PO2(waited)) { + LCONSOLE_WARN("%s is waiting for obd_unlinked_exports " + "more than %d seconds. " + "The obd refcount = %d. Is it stuck?\n", + obd->obd_name, waited, + atomic_read(&obd->obd_refcount)); + dump_exports(obd, 1); + } + waited *= 2; + spin_lock(&obd->obd_dev_lock); + } + spin_unlock(&obd->obd_dev_lock); } EXPORT_SYMBOL(obd_exports_barrier); @@ -1500,58 +1607,58 @@ static int zombies_count = 0; */ void obd_zombie_impexp_cull(void) { - struct obd_import *import; - struct obd_export *export; - ENTRY; - - do { - cfs_spin_lock(&obd_zombie_impexp_lock); - - import = NULL; - if (!cfs_list_empty(&obd_zombie_imports)) { - import = cfs_list_entry(obd_zombie_imports.next, - struct obd_import, - imp_zombie_chain); - cfs_list_del_init(&import->imp_zombie_chain); - } - - export = NULL; - if (!cfs_list_empty(&obd_zombie_exports)) { - export = cfs_list_entry(obd_zombie_exports.next, - struct obd_export, - exp_obd_chain); - cfs_list_del_init(&export->exp_obd_chain); - } - - cfs_spin_unlock(&obd_zombie_impexp_lock); - - if (import != NULL) { - class_import_destroy(import); - cfs_spin_lock(&obd_zombie_impexp_lock); - zombies_count--; - cfs_spin_unlock(&obd_zombie_impexp_lock); - } - - if (export != NULL) { - class_export_destroy(export); - cfs_spin_lock(&obd_zombie_impexp_lock); - zombies_count--; - cfs_spin_unlock(&obd_zombie_impexp_lock); - } - - cfs_cond_resched(); - } while (import != NULL || export != NULL); - EXIT; -} - -static cfs_completion_t obd_zombie_start; -static cfs_completion_t obd_zombie_stop; -static unsigned long obd_zombie_flags; -static cfs_waitq_t obd_zombie_waitq; -static pid_t obd_zombie_pid; + struct obd_import *import; + struct obd_export *export; + ENTRY; + + do { + spin_lock(&obd_zombie_impexp_lock); + + import = NULL; + if (!list_empty(&obd_zombie_imports)) { + import = list_entry(obd_zombie_imports.next, + struct obd_import, + imp_zombie_chain); + list_del_init(&import->imp_zombie_chain); + } + + export = NULL; + if (!list_empty(&obd_zombie_exports)) { + export = list_entry(obd_zombie_exports.next, + struct obd_export, + exp_obd_chain); + list_del_init(&export->exp_obd_chain); + } + + spin_unlock(&obd_zombie_impexp_lock); + + if (import != NULL) { + class_import_destroy(import); + spin_lock(&obd_zombie_impexp_lock); + zombies_count--; + spin_unlock(&obd_zombie_impexp_lock); + } + + if (export != NULL) { + class_export_destroy(export); + spin_lock(&obd_zombie_impexp_lock); + zombies_count--; + spin_unlock(&obd_zombie_impexp_lock); + } + + cond_resched(); + } while (import != NULL || export != NULL); + EXIT; +} + +static struct completion obd_zombie_start; +static struct completion obd_zombie_stop; +static unsigned long obd_zombie_flags; +static wait_queue_head_t obd_zombie_waitq; +static pid_t obd_zombie_pid; enum { - OBD_ZOMBIE_STOP = 1 << 1 + OBD_ZOMBIE_STOP = 0x0001, }; /** @@ -1559,44 +1666,45 @@ enum { */ static int obd_zombie_impexp_check(void *arg) { - int rc; + int rc; - cfs_spin_lock(&obd_zombie_impexp_lock); - rc = (zombies_count == 0) && - !cfs_test_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags); - cfs_spin_unlock(&obd_zombie_impexp_lock); + spin_lock(&obd_zombie_impexp_lock); + rc = (zombies_count == 0) && + !test_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags); + spin_unlock(&obd_zombie_impexp_lock); - RETURN(rc); + RETURN(rc); } /** * Add export to the obd_zombe thread and notify it. */ static void obd_zombie_export_add(struct obd_export *exp) { - cfs_spin_lock(&exp->exp_obd->obd_dev_lock); - LASSERT(!cfs_list_empty(&exp->exp_obd_chain)); - cfs_list_del_init(&exp->exp_obd_chain); - cfs_spin_unlock(&exp->exp_obd->obd_dev_lock); - cfs_spin_lock(&obd_zombie_impexp_lock); - zombies_count++; - cfs_list_add(&exp->exp_obd_chain, &obd_zombie_exports); - cfs_spin_unlock(&obd_zombie_impexp_lock); + atomic_dec(&obd_stale_export_num); + spin_lock(&exp->exp_obd->obd_dev_lock); + LASSERT(!list_empty(&exp->exp_obd_chain)); + list_del_init(&exp->exp_obd_chain); + spin_unlock(&exp->exp_obd->obd_dev_lock); + spin_lock(&obd_zombie_impexp_lock); + zombies_count++; + list_add(&exp->exp_obd_chain, &obd_zombie_exports); + spin_unlock(&obd_zombie_impexp_lock); - obd_zombie_impexp_notify(); + obd_zombie_impexp_notify(); } /** * Add import to the obd_zombe thread and notify it. */ static void obd_zombie_import_add(struct obd_import *imp) { - LASSERT(imp->imp_sec == NULL); - cfs_spin_lock(&obd_zombie_impexp_lock); - LASSERT(cfs_list_empty(&imp->imp_zombie_chain)); - zombies_count++; - cfs_list_add(&imp->imp_zombie_chain, &obd_zombie_imports); - cfs_spin_unlock(&obd_zombie_impexp_lock); + LASSERT(imp->imp_sec == NULL); + spin_lock(&obd_zombie_impexp_lock); + LASSERT(list_empty(&imp->imp_zombie_chain)); + zombies_count++; + list_add(&imp->imp_zombie_chain, &obd_zombie_imports); + spin_unlock(&obd_zombie_impexp_lock); - obd_zombie_impexp_notify(); + obd_zombie_impexp_notify(); } /** @@ -1604,12 +1712,12 @@ static void obd_zombie_import_add(struct obd_import *imp) { */ static void obd_zombie_impexp_notify(void) { - /* - * Make sure obd_zomebie_impexp_thread get this notification. - * It is possible this signal only get by obd_zombie_barrier, and - * barrier gulps this notification and sleeps away and hangs ensues - */ - cfs_waitq_broadcast(&obd_zombie_waitq); + /* + * Make sure obd_zomebie_impexp_thread get this notification. + * It is possible this signal only get by obd_zombie_barrier, and + * barrier gulps this notification and sleeps away and hangs ensues + */ + wake_up_all(&obd_zombie_waitq); } /** @@ -1617,13 +1725,13 @@ static void obd_zombie_impexp_notify(void) */ static int obd_zombie_is_idle(void) { - int rc; + int rc; - LASSERT(!cfs_test_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags)); - cfs_spin_lock(&obd_zombie_impexp_lock); - rc = (zombies_count == 0); - cfs_spin_unlock(&obd_zombie_impexp_lock); - return rc; + LASSERT(!test_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags)); + spin_lock(&obd_zombie_impexp_lock); + rc = (zombies_count == 0); + spin_unlock(&obd_zombie_impexp_lock); + return rc; } /** @@ -1631,119 +1739,147 @@ static int obd_zombie_is_idle(void) */ void obd_zombie_barrier(void) { - struct l_wait_info lwi = { 0 }; + struct l_wait_info lwi = { 0 }; - if (obd_zombie_pid == cfs_curproc_pid()) - /* don't wait for myself */ - return; - l_wait_event(obd_zombie_waitq, obd_zombie_is_idle(), &lwi); + if (obd_zombie_pid == current_pid()) + /* don't wait for myself */ + return; + l_wait_event(obd_zombie_waitq, obd_zombie_is_idle(), &lwi); } EXPORT_SYMBOL(obd_zombie_barrier); -#ifdef __KERNEL__ -/** - * destroy zombie export/import thread. - */ -static int obd_zombie_impexp_thread(void *unused) +struct obd_export *obd_stale_export_get(void) { - int rc; + struct obd_export *exp = NULL; + ENTRY; - if ((rc = cfs_daemonize_ctxt("obd_zombid"))) { - cfs_complete(&obd_zombie_start); - RETURN(rc); - } + spin_lock(&obd_stale_export_lock); + if (!list_empty(&obd_stale_exports)) { + exp = list_entry(obd_stale_exports.next, + struct obd_export, exp_stale_list); + list_del_init(&exp->exp_stale_list); + } + spin_unlock(&obd_stale_export_lock); - cfs_complete(&obd_zombie_start); + if (exp) { + CDEBUG(D_DLMTRACE, "Get export %p: total %d\n", exp, + atomic_read(&obd_stale_export_num)); + } + RETURN(exp); +} +EXPORT_SYMBOL(obd_stale_export_get); - obd_zombie_pid = cfs_curproc_pid(); +void obd_stale_export_put(struct obd_export *exp) +{ + ENTRY; - while(!cfs_test_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags)) { - struct l_wait_info lwi = { 0 }; + LASSERT(list_empty(&exp->exp_stale_list)); + if (exp->exp_lock_hash && + atomic_read(&exp->exp_lock_hash->hs_count)) { + CDEBUG(D_DLMTRACE, "Put export %p: total %d\n", exp, + atomic_read(&obd_stale_export_num)); - l_wait_event(obd_zombie_waitq, - !obd_zombie_impexp_check(NULL), &lwi); - obd_zombie_impexp_cull(); + spin_lock_bh(&exp->exp_bl_list_lock); + spin_lock(&obd_stale_export_lock); + /* Add to the tail if there is no blocked locks, + * to the head otherwise. */ + if (list_empty(&exp->exp_bl_list)) + list_add_tail(&exp->exp_stale_list, + &obd_stale_exports); + else + list_add(&exp->exp_stale_list, + &obd_stale_exports); - /* - * Notify obd_zombie_barrier callers that queues - * may be empty. - */ - cfs_waitq_signal(&obd_zombie_waitq); - } + spin_unlock(&obd_stale_export_lock); + spin_unlock_bh(&exp->exp_bl_list_lock); + } else { + class_export_put(exp); + } + EXIT; +} +EXPORT_SYMBOL(obd_stale_export_put); - cfs_complete(&obd_zombie_stop); +/** + * Adjust the position of the export in the stale list, + * i.e. move to the head of the list if is needed. + **/ +void obd_stale_export_adjust(struct obd_export *exp) +{ + LASSERT(exp != NULL); + spin_lock_bh(&exp->exp_bl_list_lock); + spin_lock(&obd_stale_export_lock); - RETURN(0); + if (!list_empty(&exp->exp_stale_list) && + !list_empty(&exp->exp_bl_list)) + list_move(&exp->exp_stale_list, &obd_stale_exports); + + spin_unlock(&obd_stale_export_lock); + spin_unlock_bh(&exp->exp_bl_list_lock); } +EXPORT_SYMBOL(obd_stale_export_adjust); + +/** + * destroy zombie export/import thread. + */ +static int obd_zombie_impexp_thread(void *unused) +{ + unshare_fs_struct(); + complete(&obd_zombie_start); -#else /* ! KERNEL */ + obd_zombie_pid = current_pid(); -static cfs_atomic_t zombie_recur = CFS_ATOMIC_INIT(0); -static void *obd_zombie_impexp_work_cb; -static void *obd_zombie_impexp_idle_cb; + while (!test_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags)) { + struct l_wait_info lwi = { 0 }; -int obd_zombie_impexp_kill(void *arg) -{ - int rc = 0; + l_wait_event(obd_zombie_waitq, + !obd_zombie_impexp_check(NULL), &lwi); + obd_zombie_impexp_cull(); - if (cfs_atomic_inc_return(&zombie_recur) == 1) { - obd_zombie_impexp_cull(); - rc = 1; - } - cfs_atomic_dec(&zombie_recur); - return rc; + /* + * Notify obd_zombie_barrier callers that queues + * may be empty. + */ + wake_up(&obd_zombie_waitq); + } + + complete(&obd_zombie_stop); + + RETURN(0); } -#endif /** * start destroy zombie import/export thread */ int obd_zombie_impexp_init(void) { - int rc; + struct task_struct *task; - CFS_INIT_LIST_HEAD(&obd_zombie_imports); - CFS_INIT_LIST_HEAD(&obd_zombie_exports); - cfs_spin_lock_init(&obd_zombie_impexp_lock); - cfs_init_completion(&obd_zombie_start); - cfs_init_completion(&obd_zombie_stop); - cfs_waitq_init(&obd_zombie_waitq); - obd_zombie_pid = 0; - -#ifdef __KERNEL__ - rc = cfs_create_thread(obd_zombie_impexp_thread, NULL, 0); - if (rc < 0) - RETURN(rc); + INIT_LIST_HEAD(&obd_zombie_imports); - cfs_wait_for_completion(&obd_zombie_start); -#else + INIT_LIST_HEAD(&obd_zombie_exports); + spin_lock_init(&obd_zombie_impexp_lock); + init_completion(&obd_zombie_start); + init_completion(&obd_zombie_stop); + init_waitqueue_head(&obd_zombie_waitq); + obd_zombie_pid = 0; - obd_zombie_impexp_work_cb = - liblustre_register_wait_callback("obd_zombi_impexp_kill", - &obd_zombie_impexp_kill, NULL); + task = kthread_run(obd_zombie_impexp_thread, NULL, "obd_zombid"); + if (IS_ERR(task)) + RETURN(PTR_ERR(task)); - obd_zombie_impexp_idle_cb = - liblustre_register_idle_callback("obd_zombi_impexp_check", - &obd_zombie_impexp_check, NULL); - rc = 0; -#endif - RETURN(rc); + wait_for_completion(&obd_zombie_start); + RETURN(0); } /** * stop destroy zombie import/export thread */ void obd_zombie_impexp_stop(void) { - cfs_set_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags); + set_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags); obd_zombie_impexp_notify(); -#ifdef __KERNEL__ - cfs_wait_for_completion(&obd_zombie_stop); -#else - liblustre_deregister_wait_callback(obd_zombie_impexp_work_cb); - liblustre_deregister_idle_callback(obd_zombie_impexp_idle_cb); -#endif + wait_for_completion(&obd_zombie_stop); } /***** Kernel-userspace comm helpers *******/ @@ -1811,5 +1947,368 @@ inline void kuc_free(void *p, int payload_len) } EXPORT_SYMBOL(kuc_free); +struct obd_request_slot_waiter { + struct list_head orsw_entry; + wait_queue_head_t orsw_waitq; + bool orsw_signaled; +}; + +static bool obd_request_slot_avail(struct client_obd *cli, + struct obd_request_slot_waiter *orsw) +{ + bool avail; + + spin_lock(&cli->cl_loi_list_lock); + avail = !!list_empty(&orsw->orsw_entry); + spin_unlock(&cli->cl_loi_list_lock); + + return avail; +}; + +/* + * For network flow control, the RPC sponsor needs to acquire a credit + * before sending the RPC. The credits count for a connection is defined + * by the "cl_max_rpcs_in_flight". If all the credits are occpuied, then + * the subsequent RPC sponsors need to wait until others released their + * credits, or the administrator increased the "cl_max_rpcs_in_flight". + */ +int obd_get_request_slot(struct client_obd *cli) +{ + struct obd_request_slot_waiter orsw; + struct l_wait_info lwi; + int rc; + + spin_lock(&cli->cl_loi_list_lock); + if (cli->cl_r_in_flight < cli->cl_max_rpcs_in_flight) { + cli->cl_r_in_flight++; + spin_unlock(&cli->cl_loi_list_lock); + return 0; + } + + init_waitqueue_head(&orsw.orsw_waitq); + list_add_tail(&orsw.orsw_entry, &cli->cl_loi_read_list); + orsw.orsw_signaled = false; + spin_unlock(&cli->cl_loi_list_lock); + + lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL); + rc = l_wait_event(orsw.orsw_waitq, + obd_request_slot_avail(cli, &orsw) || + orsw.orsw_signaled, + &lwi); + + /* Here, we must take the lock to avoid the on-stack 'orsw' to be + * freed but other (such as obd_put_request_slot) is using it. */ + spin_lock(&cli->cl_loi_list_lock); + if (rc != 0) { + if (!orsw.orsw_signaled) { + if (list_empty(&orsw.orsw_entry)) + cli->cl_r_in_flight--; + else + list_del(&orsw.orsw_entry); + } + } + + if (orsw.orsw_signaled) { + LASSERT(list_empty(&orsw.orsw_entry)); + + rc = -EINTR; + } + spin_unlock(&cli->cl_loi_list_lock); + + return rc; +} +EXPORT_SYMBOL(obd_get_request_slot); + +void obd_put_request_slot(struct client_obd *cli) +{ + struct obd_request_slot_waiter *orsw; + + spin_lock(&cli->cl_loi_list_lock); + cli->cl_r_in_flight--; + + /* If there is free slot, wakeup the first waiter. */ + if (!list_empty(&cli->cl_loi_read_list) && + likely(cli->cl_r_in_flight < cli->cl_max_rpcs_in_flight)) { + orsw = list_entry(cli->cl_loi_read_list.next, + struct obd_request_slot_waiter, orsw_entry); + list_del_init(&orsw->orsw_entry); + cli->cl_r_in_flight++; + wake_up(&orsw->orsw_waitq); + } + spin_unlock(&cli->cl_loi_list_lock); +} +EXPORT_SYMBOL(obd_put_request_slot); + +__u32 obd_get_max_rpcs_in_flight(struct client_obd *cli) +{ + return cli->cl_max_rpcs_in_flight; +} +EXPORT_SYMBOL(obd_get_max_rpcs_in_flight); + +int obd_set_max_rpcs_in_flight(struct client_obd *cli, __u32 max) +{ + struct obd_request_slot_waiter *orsw; + __u32 old; + int diff; + int i; + char *typ_name; + int rc; + + if (max > OBD_MAX_RIF_MAX || max < 1) + return -ERANGE; + + typ_name = cli->cl_import->imp_obd->obd_type->typ_name; + if (strcmp(typ_name, LUSTRE_MDC_NAME) == 0) { + /* adjust max_mod_rpcs_in_flight to ensure it is always + * strictly lower that max_rpcs_in_flight */ + if (max < 2) { + CERROR("%s: cannot set max_rpcs_in_flight to 1 " + "because it must be higher than " + "max_mod_rpcs_in_flight value", + cli->cl_import->imp_obd->obd_name); + return -ERANGE; + } + if (max <= cli->cl_max_mod_rpcs_in_flight) { + rc = obd_set_max_mod_rpcs_in_flight(cli, max - 1); + if (rc != 0) + return rc; + } + } + + spin_lock(&cli->cl_loi_list_lock); + old = cli->cl_max_rpcs_in_flight; + cli->cl_max_rpcs_in_flight = max; + diff = max - old; + + /* We increase the max_rpcs_in_flight, then wakeup some waiters. */ + for (i = 0; i < diff; i++) { + if (list_empty(&cli->cl_loi_read_list)) + break; + + orsw = list_entry(cli->cl_loi_read_list.next, + struct obd_request_slot_waiter, orsw_entry); + list_del_init(&orsw->orsw_entry); + cli->cl_r_in_flight++; + wake_up(&orsw->orsw_waitq); + } + spin_unlock(&cli->cl_loi_list_lock); + + return 0; +} +EXPORT_SYMBOL(obd_set_max_rpcs_in_flight); + +__u16 obd_get_max_mod_rpcs_in_flight(struct client_obd *cli) +{ + return cli->cl_max_mod_rpcs_in_flight; +} +EXPORT_SYMBOL(obd_get_max_mod_rpcs_in_flight); + +int obd_set_max_mod_rpcs_in_flight(struct client_obd *cli, __u16 max) +{ + struct obd_connect_data *ocd; + __u16 maxmodrpcs; + __u16 prev; + + if (max > OBD_MAX_RIF_MAX || max < 1) + return -ERANGE; + + /* cannot exceed or equal max_rpcs_in_flight */ + if (max >= cli->cl_max_rpcs_in_flight) { + CERROR("%s: can't set max_mod_rpcs_in_flight to a value (%hu) " + "higher or equal to max_rpcs_in_flight value (%u)\n", + cli->cl_import->imp_obd->obd_name, + max, cli->cl_max_rpcs_in_flight); + return -ERANGE; + } + + /* cannot exceed max modify RPCs in flight supported by the server */ + ocd = &cli->cl_import->imp_connect_data; + if (ocd->ocd_connect_flags & OBD_CONNECT_MULTIMODRPCS) + maxmodrpcs = ocd->ocd_maxmodrpcs; + else + maxmodrpcs = 1; + if (max > maxmodrpcs) { + CERROR("%s: can't set max_mod_rpcs_in_flight to a value (%hu) " + "higher than max_mod_rpcs_per_client value (%hu) " + "returned by the server at connection\n", + cli->cl_import->imp_obd->obd_name, + max, maxmodrpcs); + return -ERANGE; + } + + spin_lock(&cli->cl_mod_rpcs_lock); + + prev = cli->cl_max_mod_rpcs_in_flight; + cli->cl_max_mod_rpcs_in_flight = max; + + /* wakeup waiters if limit has been increased */ + if (cli->cl_max_mod_rpcs_in_flight > prev) + wake_up(&cli->cl_mod_rpcs_waitq); + + spin_unlock(&cli->cl_mod_rpcs_lock); + + return 0; +} +EXPORT_SYMBOL(obd_set_max_mod_rpcs_in_flight); + + +#define pct(a, b) (b ? a * 100 / b : 0) +int obd_mod_rpc_stats_seq_show(struct client_obd *cli, + struct seq_file *seq) +{ + struct timeval now; + unsigned long mod_tot = 0, mod_cum; + int i; + + do_gettimeofday(&now); + + spin_lock(&cli->cl_mod_rpcs_lock); + + seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n", + now.tv_sec, now.tv_usec); + seq_printf(seq, "modify_RPCs_in_flight: %hu\n", + cli->cl_mod_rpcs_in_flight); + + seq_printf(seq, "\n\t\t\tmodify\n"); + seq_printf(seq, "rpcs in flight rpcs %% cum %%\n"); + mod_tot = lprocfs_oh_sum(&cli->cl_mod_rpcs_hist); + + mod_cum = 0; + for (i = 0; i < OBD_HIST_MAX; i++) { + unsigned long mod = cli->cl_mod_rpcs_hist.oh_buckets[i]; + mod_cum += mod; + seq_printf(seq, "%d:\t\t%10lu %3lu %3lu\n", + i, mod, pct(mod, mod_tot), + pct(mod_cum, mod_tot)); + if (mod_cum == mod_tot) + break; + } + + spin_unlock(&cli->cl_mod_rpcs_lock); + + return 0; +} +EXPORT_SYMBOL(obd_mod_rpc_stats_seq_show); +#undef pct + + +/* The number of modify RPCs sent in parallel is limited + * because the server has a finite number of slots per client to + * store request result and ensure reply reconstruction when needed. + * On the client, this limit is stored in cl_max_mod_rpcs_in_flight + * that takes into account server limit and cl_max_rpcs_in_flight + * value. + * On the MDC client, to avoid a potential deadlock (see Bugzilla 3462), + * one close request is allowed above the maximum. + */ +static inline bool obd_mod_rpc_slot_avail_locked(struct client_obd *cli, + bool close_req) +{ + bool avail; + + /* A slot is available if + * - number of modify RPCs in flight is less than the max + * - it's a close RPC and no other close request is in flight + */ + avail = cli->cl_mod_rpcs_in_flight < cli->cl_max_mod_rpcs_in_flight || + (close_req && cli->cl_close_rpcs_in_flight == 0); + + return avail; +} + +static inline bool obd_mod_rpc_slot_avail(struct client_obd *cli, + bool close_req) +{ + bool avail; + + spin_lock(&cli->cl_mod_rpcs_lock); + avail = obd_mod_rpc_slot_avail_locked(cli, close_req); + spin_unlock(&cli->cl_mod_rpcs_lock); + return avail; +} + +/* Get a modify RPC slot from the obd client @cli according + * to the kind of operation @opc that is going to be sent + * and the intent @it of the operation if it applies. + * If the maximum number of modify RPCs in flight is reached + * the thread is put to sleep. + * Returns the tag to be set in the request message. Tag 0 + * is reserved for non-modifying requests. + */ +__u16 obd_get_mod_rpc_slot(struct client_obd *cli, __u32 opc, + struct lookup_intent *it) +{ + struct l_wait_info lwi = LWI_INTR(NULL, NULL); + bool close_req = false; + __u16 i, max; + + /* read-only metadata RPCs don't consume a slot on MDT + * for reply reconstruction + */ + if (it != NULL && (it->it_op == IT_GETATTR || it->it_op == IT_LOOKUP || + it->it_op == IT_LAYOUT || it->it_op == IT_READDIR)) + return 0; + + if (opc == MDS_CLOSE) + close_req = true; + + do { + spin_lock(&cli->cl_mod_rpcs_lock); + max = cli->cl_max_mod_rpcs_in_flight; + if (obd_mod_rpc_slot_avail_locked(cli, close_req)) { + /* there is a slot available */ + cli->cl_mod_rpcs_in_flight++; + if (close_req) + cli->cl_close_rpcs_in_flight++; + lprocfs_oh_tally(&cli->cl_mod_rpcs_hist, + cli->cl_mod_rpcs_in_flight); + /* find a free tag */ + i = find_first_zero_bit(cli->cl_mod_tag_bitmap, + max + 1); + LASSERT(i < OBD_MAX_RIF_MAX); + LASSERT(!test_and_set_bit(i, cli->cl_mod_tag_bitmap)); + spin_unlock(&cli->cl_mod_rpcs_lock); + /* tag 0 is reserved for non-modify RPCs */ + return i + 1; + } + spin_unlock(&cli->cl_mod_rpcs_lock); + + CDEBUG(D_RPCTRACE, "%s: sleeping for a modify RPC slot " + "opc %u, max %hu\n", + cli->cl_import->imp_obd->obd_name, opc, max); + + l_wait_event(cli->cl_mod_rpcs_waitq, + obd_mod_rpc_slot_avail(cli, close_req), &lwi); + } while (true); +} +EXPORT_SYMBOL(obd_get_mod_rpc_slot); + +/* Put a modify RPC slot from the obd client @cli according + * to the kind of operation @opc that has been sent and the + * intent @it of the operation if it applies. + */ +void obd_put_mod_rpc_slot(struct client_obd *cli, __u32 opc, + struct lookup_intent *it, __u16 tag) +{ + bool close_req = false; + + if (it != NULL && (it->it_op == IT_GETATTR || it->it_op == IT_LOOKUP || + it->it_op == IT_LAYOUT || it->it_op == IT_READDIR)) + return; + + if (opc == MDS_CLOSE) + close_req = true; + + spin_lock(&cli->cl_mod_rpcs_lock); + cli->cl_mod_rpcs_in_flight--; + if (close_req) + cli->cl_close_rpcs_in_flight--; + /* release the tag in the bitmap */ + LASSERT(tag - 1 < OBD_MAX_RIF_MAX); + LASSERT(test_and_clear_bit(tag - 1, cli->cl_mod_tag_bitmap) != 0); + spin_unlock(&cli->cl_mod_rpcs_lock); + wake_up(&cli->cl_mod_rpcs_waitq); +} +EXPORT_SYMBOL(obd_put_mod_rpc_slot);