X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fobdclass%2Fgenops.c;h=316c9f93dfd6f0c63c8619b366efd8e302a5b9a9;hb=3b41ac8f3feee5252a55553ad795545cfc8636f6;hp=edd6dfb00c65bf711c4ebacfcc3be81fe304d7f2;hpb=b2f366a2212bb5f87fb1ada932c4082fdfd77931;p=fs%2Flustre-release.git diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index edd6dfb..316c9f9 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -1,6 +1,4 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * +/* * GPL HEADER START * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -26,8 +24,10 @@ * GPL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Whamcloud, Inc. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -46,22 +46,26 @@ #include #include #include -#include -extern struct list_head obd_types; -spinlock_t obd_types_lock; +extern cfs_list_t obd_types; +cfs_spinlock_t obd_types_lock; cfs_mem_cache_t *obd_device_cachep; cfs_mem_cache_t *obdo_cachep; EXPORT_SYMBOL(obdo_cachep); cfs_mem_cache_t *import_cachep; -struct list_head obd_zombie_imports; -struct list_head obd_zombie_exports; -spinlock_t obd_zombie_impexp_lock; +cfs_list_t obd_zombie_imports; +cfs_list_t obd_zombie_exports; +cfs_spinlock_t obd_zombie_impexp_lock; static void obd_zombie_impexp_notify(void); +static void obd_zombie_export_add(struct obd_export *exp); +static void obd_zombie_import_add(struct obd_import *imp); +static void print_export_data(struct obd_export *exp, + const char *status, int locks); int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c); +EXPORT_SYMBOL(ptlrpc_put_connection_superhack); /* * support functions: we could use inter-module communication, but this @@ -71,13 +75,12 @@ static struct obd_device *obd_device_alloc(void) { struct obd_device *obd; - OBD_SLAB_ALLOC_PTR(obd, obd_device_cachep); + OBD_SLAB_ALLOC_PTR_GFP(obd, obd_device_cachep, CFS_ALLOC_IO); if (obd != NULL) { obd->obd_magic = OBD_DEVICE_MAGIC; } return obd; } -EXPORT_SYMBOL(obd_device_alloc); static void obd_device_free(struct obd_device *obd) { @@ -95,29 +98,34 @@ static void obd_device_free(struct obd_device *obd) struct obd_type *class_search_type(const char *name) { - struct list_head *tmp; + cfs_list_t *tmp; struct obd_type *type; - spin_lock(&obd_types_lock); - list_for_each(tmp, &obd_types) { - type = list_entry(tmp, struct obd_type, typ_chain); + cfs_spin_lock(&obd_types_lock); + cfs_list_for_each(tmp, &obd_types) { + type = cfs_list_entry(tmp, struct obd_type, typ_chain); if (strcmp(type->typ_name, name) == 0) { - spin_unlock(&obd_types_lock); + cfs_spin_unlock(&obd_types_lock); return type; } } - spin_unlock(&obd_types_lock); + cfs_spin_unlock(&obd_types_lock); return NULL; } +EXPORT_SYMBOL(class_search_type); struct obd_type *class_get_type(const char *name) { struct obd_type *type = class_search_type(name); -#ifdef CONFIG_KMOD +#ifdef HAVE_MODULE_LOADING_SUPPORT if (!type) { const char *modname = name; - if (!request_module(modname)) { + + if (strcmp(modname, "obdfilter") == 0) + modname = "ofd"; + + if (!cfs_request_module("%s", modname)) { CDEBUG(D_INFO, "Loaded module '%s'\n", modname); type = class_search_type(name); } else { @@ -127,22 +135,24 @@ struct obd_type *class_get_type(const char *name) } #endif if (type) { - spin_lock(&type->obd_type_lock); + cfs_spin_lock(&type->obd_type_lock); type->typ_refcnt++; - try_module_get(type->typ_dt_ops->o_owner); - spin_unlock(&type->obd_type_lock); + cfs_try_module_get(type->typ_dt_ops->o_owner); + cfs_spin_unlock(&type->obd_type_lock); } return type; } +EXPORT_SYMBOL(class_get_type); void class_put_type(struct obd_type *type) { LASSERT(type); - spin_lock(&type->obd_type_lock); + cfs_spin_lock(&type->obd_type_lock); type->typ_refcnt--; - module_put(type->typ_dt_ops->o_owner); - spin_unlock(&type->obd_type_lock); + cfs_module_put(type->typ_dt_ops->o_owner); + cfs_spin_unlock(&type->obd_type_lock); } +EXPORT_SYMBOL(class_put_type); #define CLASS_MAX_NAME 1024 @@ -181,7 +191,7 @@ int class_register_type(struct obd_ops *dt_ops, struct md_ops *md_ops, if (md_ops) *(type->typ_md_ops) = *md_ops; strcpy(type->typ_name, name); - spin_lock_init(&type->obd_type_lock); + cfs_spin_lock_init(&type->obd_type_lock); #ifdef LPROCFS type->typ_procroot = lprocfs_register(type->typ_name, proc_lustre_root, @@ -199,9 +209,9 @@ int class_register_type(struct obd_ops *dt_ops, struct md_ops *md_ops, GOTO (failed, rc); } - spin_lock(&obd_types_lock); - list_add(&type->typ_chain, &obd_types); - spin_unlock(&obd_types_lock); + cfs_spin_lock(&obd_types_lock); + cfs_list_add(&type->typ_chain, &obd_types); + cfs_spin_unlock(&obd_types_lock); RETURN (0); @@ -215,6 +225,7 @@ int class_register_type(struct obd_ops *dt_ops, struct md_ops *md_ops, OBD_FREE(type, sizeof(*type)); RETURN(rc); } +EXPORT_SYMBOL(class_register_type); int class_unregister_type(const char *name) { @@ -235,16 +246,18 @@ int class_unregister_type(const char *name) RETURN(-EBUSY); } - if (type->typ_procroot) { - lprocfs_remove(&type->typ_procroot); - } + /* we do not use type->typ_procroot as for compatibility purposes + * other modules can share names (i.e. lod can use lov entry). so + * we can't reference pointer as it can get invalided when another + * module removes the entry */ + lprocfs_try_remove_proc_entry(type->typ_name, proc_lustre_root); if (type->typ_lu) lu_device_type_fini(type->typ_lu); - spin_lock(&obd_types_lock); - list_del(&type->typ_chain); - spin_unlock(&obd_types_lock); + cfs_spin_lock(&obd_types_lock); + cfs_list_del(&type->typ_chain); + cfs_spin_unlock(&obd_types_lock); OBD_FREE(type->typ_name, strlen(name) + 1); if (type->typ_dt_ops != NULL) OBD_FREE_PTR(type->typ_dt_ops); @@ -253,14 +266,15 @@ int class_unregister_type(const char *name) OBD_FREE(type, sizeof(*type)); RETURN(0); } /* class_unregister_type */ +EXPORT_SYMBOL(class_unregister_type); /** * Create a new obd device. * * Find an empty slot in ::obd_devs[], create a new obd device in it. * - * \param typename [in] obd device type string. - * \param name [in] obd device name. + * \param[in] type_name obd device type string. + * \param[in] name obd device name. * * \retval NULL if create fails, otherwise return the obd device * pointer created. @@ -272,6 +286,7 @@ struct obd_device *class_newdev(const char *type_name, const char *name) struct obd_type *type = NULL; int i; int new_obd_minor = 0; + ENTRY; if (strlen(name) >= MAX_OBD_NAME) { CERROR("name/uuid must be < %u bytes long\n", MAX_OBD_NAME); @@ -285,18 +300,19 @@ struct obd_device *class_newdev(const char *type_name, const char *name) } newdev = obd_device_alloc(); - if (newdev == NULL) { - class_put_type(type); - RETURN(ERR_PTR(-ENOMEM)); - } + if (newdev == NULL) + GOTO(out_type, result = ERR_PTR(-ENOMEM)); + LASSERT(newdev->obd_magic == OBD_DEVICE_MAGIC); - spin_lock(&obd_dev_lock); + cfs_write_lock(&obd_dev_lock); for (i = 0; i < class_devno_max(); i++) { struct obd_device *obd = class_num2obd(i); + if (obd && obd->obd_name && (strcmp(name, obd->obd_name) == 0)) { - CERROR("Device %s already exists, won't add\n", name); + CERROR("Device %s already exists at %d, won't add\n", + name, i); if (result) { LASSERTF(result->obd_magic == OBD_DEVICE_MAGIC, "%p obd_magic %08x != %08x\n", result, @@ -321,22 +337,26 @@ struct obd_device *class_newdev(const char *type_name, const char *name) obd_devs[i] = result; } } - spin_unlock(&obd_dev_lock); + cfs_write_unlock(&obd_dev_lock); if (result == NULL && i >= class_devno_max()) { CERROR("all %u OBD devices used, increase MAX_OBD_DEVICES\n", class_devno_max()); - result = ERR_PTR(-EOVERFLOW); + GOTO(out, result = ERR_PTR(-EOVERFLOW)); } - if (IS_ERR(result)) { - obd_device_free(newdev); - class_put_type(type); - } else { - CDEBUG(D_IOCTL, "Adding new device %s (%p)\n", - result->obd_name, result); - } - return result; + if (IS_ERR(result)) + GOTO(out, result); + + CDEBUG(D_IOCTL, "Adding new device %s (%p)\n", + result->obd_name, result); + + RETURN(result); +out: + obd_device_free(newdev); +out_type: + class_put_type(type); + return result; } void class_release_dev(struct obd_device *obd) @@ -349,12 +369,12 @@ void class_release_dev(struct obd_device *obd) obd, obd->obd_minor, obd_devs[obd->obd_minor]); LASSERT(obd_type != NULL); - CDEBUG(D_INFO, "Release obd device %s obd_type name =%s\n", - obd->obd_name,obd->obd_type->typ_name); + CDEBUG(D_INFO, "Release obd device %s at %d obd_type name =%s\n", + obd->obd_name, obd->obd_minor, obd->obd_type->typ_name); - spin_lock(&obd_dev_lock); + cfs_write_lock(&obd_dev_lock); obd_devs[obd->obd_minor] = NULL; - spin_unlock(&obd_dev_lock); + cfs_write_unlock(&obd_dev_lock); obd_device_free(obd); class_put_type(obd_type); @@ -367,24 +387,26 @@ int class_name2dev(const char *name) if (!name) return -1; - spin_lock(&obd_dev_lock); + cfs_read_lock(&obd_dev_lock); for (i = 0; i < class_devno_max(); i++) { struct obd_device *obd = class_num2obd(i); + if (obd && obd->obd_name && strcmp(name, obd->obd_name) == 0) { /* Make sure we finished attaching before we give out any references */ LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC); if (obd->obd_attached) { - spin_unlock(&obd_dev_lock); + cfs_read_unlock(&obd_dev_lock); return i; } break; } } - spin_unlock(&obd_dev_lock); + cfs_read_unlock(&obd_dev_lock); return -1; } +EXPORT_SYMBOL(class_name2dev); struct obd_device *class_name2obd(const char *name) { @@ -394,24 +416,27 @@ struct obd_device *class_name2obd(const char *name) return NULL; return class_num2obd(dev); } +EXPORT_SYMBOL(class_name2obd); int class_uuid2dev(struct obd_uuid *uuid) { int i; - spin_lock(&obd_dev_lock); + cfs_read_lock(&obd_dev_lock); for (i = 0; i < class_devno_max(); i++) { struct obd_device *obd = class_num2obd(i); + if (obd && obd_uuid_equals(uuid, &obd->obd_uuid)) { LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC); - spin_unlock(&obd_dev_lock); + cfs_read_unlock(&obd_dev_lock); return i; } } - spin_unlock(&obd_dev_lock); + cfs_read_unlock(&obd_dev_lock); return -1; } +EXPORT_SYMBOL(class_uuid2dev); struct obd_device *class_uuid2obd(struct obd_uuid *uuid) { @@ -420,6 +445,7 @@ struct obd_device *class_uuid2obd(struct obd_uuid *uuid) return NULL; return class_num2obd(dev); } +EXPORT_SYMBOL(class_uuid2obd); /** * Get obd device from ::obd_devs[] @@ -448,15 +474,17 @@ struct obd_device *class_num2obd(int num) return obd; } +EXPORT_SYMBOL(class_num2obd); void class_obd_list(void) { char *status; int i; - spin_lock(&obd_dev_lock); + cfs_read_lock(&obd_dev_lock); for (i = 0; i < class_devno_max(); i++) { struct obd_device *obd = class_num2obd(i); + if (obd == NULL) continue; if (obd->obd_stopping) @@ -470,9 +498,9 @@ void class_obd_list(void) LCONSOLE(D_CONFIG, "%3d %s %s %s %s %d\n", i, status, obd->obd_type->typ_name, obd->obd_name, obd->obd_uuid.uuid, - atomic_read(&obd->obd_refcount)); + cfs_atomic_read(&obd->obd_refcount)); } - spin_unlock(&obd_dev_lock); + cfs_read_unlock(&obd_dev_lock); return; } @@ -485,9 +513,10 @@ struct obd_device * class_find_client_obd(struct obd_uuid *tgt_uuid, { int i; - spin_lock(&obd_dev_lock); + cfs_read_lock(&obd_dev_lock); for (i = 0; i < class_devno_max(); i++) { struct obd_device *obd = class_num2obd(i); + if (obd == NULL) continue; if ((strncmp(obd->obd_type->typ_name, typ_name, @@ -496,15 +525,16 @@ struct obd_device * class_find_client_obd(struct obd_uuid *tgt_uuid, &obd->u.cli.cl_target_uuid) && ((grp_uuid)? obd_uuid_equals(grp_uuid, &obd->obd_uuid) : 1)) { - spin_unlock(&obd_dev_lock); + cfs_read_unlock(&obd_dev_lock); return obd; } } } - spin_unlock(&obd_dev_lock); + cfs_read_unlock(&obd_dev_lock); return NULL; } +EXPORT_SYMBOL(class_find_client_obd); /* Iterate the obd_device list looking devices have grp_uuid. Start searching at *next, and if a device is found, the next index to look @@ -521,23 +551,68 @@ struct obd_device * class_devices_in_group(struct obd_uuid *grp_uuid, int *next) else return NULL; - spin_lock(&obd_dev_lock); + cfs_read_lock(&obd_dev_lock); for (; i < class_devno_max(); i++) { struct obd_device *obd = class_num2obd(i); + if (obd == NULL) continue; if (obd_uuid_equals(grp_uuid, &obd->obd_uuid)) { if (next != NULL) *next = i+1; - spin_unlock(&obd_dev_lock); + cfs_read_unlock(&obd_dev_lock); return obd; } } - spin_unlock(&obd_dev_lock); + cfs_read_unlock(&obd_dev_lock); return NULL; } +EXPORT_SYMBOL(class_devices_in_group); + +/** + * to notify sptlrpc log for \a fsname has changed, let every relevant OBD + * adjust sptlrpc settings accordingly. + */ +int class_notify_sptlrpc_conf(const char *fsname, int namelen) +{ + struct obd_device *obd; + const char *type; + int i, rc = 0, rc2; + LASSERT(namelen > 0); + + cfs_read_lock(&obd_dev_lock); + for (i = 0; i < class_devno_max(); i++) { + obd = class_num2obd(i); + + if (obd == NULL || obd->obd_set_up == 0 || obd->obd_stopping) + continue; + + /* only notify mdc, osc, mdt, ost */ + type = obd->obd_type->typ_name; + if (strcmp(type, LUSTRE_MDC_NAME) != 0 && + strcmp(type, LUSTRE_OSC_NAME) != 0 && + strcmp(type, LUSTRE_MDT_NAME) != 0 && + strcmp(type, LUSTRE_OST_NAME) != 0) + continue; + + if (strncmp(obd->obd_name, fsname, namelen)) + continue; + + class_incref(obd, __FUNCTION__, obd); + cfs_read_unlock(&obd_dev_lock); + rc2 = obd_set_info_async(NULL, obd->obd_self_export, + sizeof(KEY_SPTLRPC_CONF), + KEY_SPTLRPC_CONF, 0, NULL, NULL); + rc = rc ? rc : rc2; + class_decref(obd, __FUNCTION__, obd); + cfs_read_lock(&obd_dev_lock); + } + cfs_read_unlock(&obd_dev_lock); + return rc; +} +EXPORT_SYMBOL(class_notify_sptlrpc_conf); void obd_cleanup_caches(void) { @@ -624,6 +699,7 @@ struct obd_export *class_conn2export(struct lustre_handle *conn) export = class_handle2object(conn->cookie); RETURN(export); } +EXPORT_SYMBOL(class_conn2export); struct obd_device *class_exp2obd(struct obd_export *exp) { @@ -631,6 +707,7 @@ struct obd_device *class_exp2obd(struct obd_export *exp) return exp->exp_obd; return NULL; } +EXPORT_SYMBOL(class_exp2obd); struct obd_device *class_conn2obd(struct lustre_handle *conn) { @@ -643,6 +720,7 @@ struct obd_device *class_conn2obd(struct lustre_handle *conn) } return NULL; } +EXPORT_SYMBOL(class_conn2obd); struct obd_import *class_exp2cliimp(struct obd_export *exp) { @@ -651,6 +729,7 @@ struct obd_import *class_exp2cliimp(struct obd_export *exp) return NULL; return obd->u.cli.cl_import; } +EXPORT_SYMBOL(class_exp2cliimp); struct obd_import *class_conn2cliimp(struct lustre_handle *conn) { @@ -659,18 +738,50 @@ struct obd_import *class_conn2cliimp(struct lustre_handle *conn) return NULL; return obd->u.cli.cl_import; } +EXPORT_SYMBOL(class_conn2cliimp); /* Export management functions */ +static void class_export_destroy(struct obd_export *exp) +{ + struct obd_device *obd = exp->exp_obd; + ENTRY; + + LASSERT_ATOMIC_ZERO(&exp->exp_refcount); + LASSERT(obd != NULL); + + CDEBUG(D_IOCTL, "destroying export %p/%s for %s\n", exp, + exp->exp_client_uuid.uuid, obd->obd_name); + + /* "Local" exports (lctl, LOV->{mdc,osc}) have no connection. */ + if (exp->exp_connection) + ptlrpc_put_connection_superhack(exp->exp_connection); + + LASSERT(cfs_list_empty(&exp->exp_outstanding_replies)); + LASSERT(cfs_list_empty(&exp->exp_uncommitted_replies)); + LASSERT(cfs_list_empty(&exp->exp_req_replay_queue)); + LASSERT(cfs_list_empty(&exp->exp_hp_rpcs)); + obd_destroy_export(exp); + class_decref(obd, "export", exp); + + OBD_FREE_RCU(exp, sizeof(*exp), &exp->exp_handle); + EXIT; +} + static void export_handle_addref(void *export) { class_export_get(export); } +static struct portals_handle_ops export_handle_ops = { + .hop_addref = export_handle_addref, + .hop_free = NULL, +}; + struct obd_export *class_export_get(struct obd_export *exp) { - atomic_inc(&exp->exp_refcount); + cfs_atomic_inc(&exp->exp_refcount); CDEBUG(D_INFO, "GETting export %p : new refcount %d\n", exp, - atomic_read(&exp->exp_refcount)); + cfs_atomic_read(&exp->exp_refcount)); return exp; } EXPORT_SYMBOL(class_export_get); @@ -678,52 +789,23 @@ EXPORT_SYMBOL(class_export_get); void class_export_put(struct obd_export *exp) { LASSERT(exp != NULL); + LASSERT_ATOMIC_GT_LT(&exp->exp_refcount, 0, LI_POISON); CDEBUG(D_INFO, "PUTting export %p : new refcount %d\n", exp, - atomic_read(&exp->exp_refcount) - 1); - LASSERT(atomic_read(&exp->exp_refcount) > 0); - LASSERT(atomic_read(&exp->exp_refcount) < 0x5a5a5a); - - if (atomic_dec_and_test(&exp->exp_refcount)) { - LASSERT (list_empty(&exp->exp_obd_chain)); + cfs_atomic_read(&exp->exp_refcount) - 1); + if (cfs_atomic_dec_and_test(&exp->exp_refcount)) { + LASSERT(!cfs_list_empty(&exp->exp_obd_chain)); CDEBUG(D_IOCTL, "final put %p/%s\n", exp, exp->exp_client_uuid.uuid); - spin_lock(&obd_zombie_impexp_lock); - list_add(&exp->exp_obd_chain, &obd_zombie_exports); - spin_unlock(&obd_zombie_impexp_lock); + /* release nid stat refererence */ + lprocfs_exp_cleanup(exp); - if (obd_zombie_impexp_notify != NULL) - obd_zombie_impexp_notify(); + obd_zombie_export_add(exp); } } EXPORT_SYMBOL(class_export_put); -static void class_export_destroy(struct obd_export *exp) -{ - struct obd_device *obd = exp->exp_obd; - ENTRY; - - LASSERT (atomic_read(&exp->exp_refcount) == 0); - - CDEBUG(D_IOCTL, "destroying export %p/%s for %s\n", exp, - exp->exp_client_uuid.uuid, obd->obd_name); - - LASSERT(obd != NULL); - - /* "Local" exports (lctl, LOV->{mdc,osc}) have no connection. */ - if (exp->exp_connection) - ptlrpc_put_connection_superhack(exp->exp_connection); - - LASSERT(list_empty(&exp->exp_outstanding_replies)); - LASSERT(list_empty(&exp->exp_req_replay_queue)); - obd_destroy_export(exp); - class_decref(obd, "export", exp); - - OBD_FREE_RCU(exp, sizeof(*exp), &exp->exp_handle); - EXIT; -} - /* Creates a new export, adds it to the hash table, and returns a * pointer to it. The refcount is 2: one for the hash reference, and * one for the pointer returned by this function. */ @@ -731,7 +813,9 @@ struct obd_export *class_new_export(struct obd_device *obd, struct obd_uuid *cluuid) { struct obd_export *export; + cfs_hash_t *hash = NULL; int rc = 0; + ENTRY; OBD_ALLOC_PTR(export); if (!export) @@ -739,46 +823,81 @@ struct obd_export *class_new_export(struct obd_device *obd, export->exp_conn_cnt = 0; export->exp_lock_hash = NULL; - atomic_set(&export->exp_refcount, 2); - atomic_set(&export->exp_rpc_count, 0); + export->exp_flock_hash = NULL; + cfs_atomic_set(&export->exp_refcount, 2); + cfs_atomic_set(&export->exp_rpc_count, 0); + cfs_atomic_set(&export->exp_cb_count, 0); + cfs_atomic_set(&export->exp_locks_count, 0); +#if LUSTRE_TRACKS_LOCK_EXP_REFS + CFS_INIT_LIST_HEAD(&export->exp_locks_list); + cfs_spin_lock_init(&export->exp_locks_list_guard); +#endif + cfs_atomic_set(&export->exp_replay_count, 0); export->exp_obd = obd; CFS_INIT_LIST_HEAD(&export->exp_outstanding_replies); + cfs_spin_lock_init(&export->exp_uncommitted_replies_lock); + CFS_INIT_LIST_HEAD(&export->exp_uncommitted_replies); CFS_INIT_LIST_HEAD(&export->exp_req_replay_queue); CFS_INIT_LIST_HEAD(&export->exp_handle.h_link); - class_handle_hash(&export->exp_handle, export_handle_addref); - export->exp_last_request_time = cfs_time_current_sec(); - spin_lock_init(&export->exp_lock); - INIT_HLIST_NODE(&export->exp_uuid_hash); - INIT_HLIST_NODE(&export->exp_nid_hash); + CFS_INIT_LIST_HEAD(&export->exp_hp_rpcs); + class_handle_hash(&export->exp_handle, &export_handle_ops); + export->exp_last_request_time = cfs_time_current_sec(); + cfs_spin_lock_init(&export->exp_lock); + cfs_spin_lock_init(&export->exp_rpc_lock); + CFS_INIT_HLIST_NODE(&export->exp_uuid_hash); + CFS_INIT_HLIST_NODE(&export->exp_nid_hash); + cfs_spin_lock_init(&export->exp_bl_list_lock); + CFS_INIT_LIST_HEAD(&export->exp_bl_list); export->exp_sp_peer = LUSTRE_SP_ANY; export->exp_flvr.sf_rpc = SPTLRPC_FLVR_INVALID; export->exp_client_uuid = *cluuid; obd_init_export(export); - spin_lock(&obd->obd_dev_lock); + cfs_spin_lock(&obd->obd_dev_lock); + /* shouldn't happen, but might race */ + if (obd->obd_stopping) + GOTO(exit_unlock, rc = -ENODEV); + + hash = cfs_hash_getref(obd->obd_uuid_hash); + if (hash == NULL) + GOTO(exit_unlock, rc = -ENODEV); + cfs_spin_unlock(&obd->obd_dev_lock); + if (!obd_uuid_equals(cluuid, &obd->obd_uuid)) { - rc = lustre_hash_add_unique(obd->obd_uuid_hash, cluuid, - &export->exp_uuid_hash); + rc = cfs_hash_add_unique(hash, cluuid, &export->exp_uuid_hash); if (rc != 0) { LCONSOLE_WARN("%s: denying duplicate export for %s, %d\n", obd->obd_name, cluuid->uuid, rc); - spin_unlock(&obd->obd_dev_lock); - class_handle_unhash(&export->exp_handle); - OBD_FREE_PTR(export); - return ERR_PTR(-EALREADY); + GOTO(exit_err, rc = -EALREADY); } } - LASSERT(!obd->obd_stopping); /* shouldn't happen, but might race */ + cfs_spin_lock(&obd->obd_dev_lock); + if (obd->obd_stopping) { + cfs_hash_del(hash, cluuid, &export->exp_uuid_hash); + GOTO(exit_unlock, rc = -ENODEV); + } + class_incref(obd, "export", export); - list_add(&export->exp_obd_chain, &export->exp_obd->obd_exports); - list_add_tail(&export->exp_obd_chain_timed, - &export->exp_obd->obd_exports_timed); + cfs_list_add(&export->exp_obd_chain, &export->exp_obd->obd_exports); + cfs_list_add_tail(&export->exp_obd_chain_timed, + &export->exp_obd->obd_exports_timed); export->exp_obd->obd_num_exports++; - spin_unlock(&obd->obd_dev_lock); + cfs_spin_unlock(&obd->obd_dev_lock); + cfs_hash_putref(hash); + RETURN(export); - return export; +exit_unlock: + cfs_spin_unlock(&obd->obd_dev_lock); +exit_err: + if (hash) + cfs_hash_putref(hash); + class_handle_unhash(&export->exp_handle); + LASSERT(cfs_hlist_unhashed(&export->exp_uuid_hash)); + obd_destroy_export(export); + OBD_FREE_PTR(export); + return ERR_PTR(rc); } EXPORT_SYMBOL(class_new_export); @@ -786,93 +905,91 @@ void class_unlink_export(struct obd_export *exp) { class_handle_unhash(&exp->exp_handle); - spin_lock(&exp->exp_obd->obd_dev_lock); + cfs_spin_lock(&exp->exp_obd->obd_dev_lock); /* delete an uuid-export hashitem from hashtables */ - if (!hlist_unhashed(&exp->exp_uuid_hash)) - lustre_hash_del(exp->exp_obd->obd_uuid_hash, - &exp->exp_client_uuid, - &exp->exp_uuid_hash); + if (!cfs_hlist_unhashed(&exp->exp_uuid_hash)) + cfs_hash_del(exp->exp_obd->obd_uuid_hash, + &exp->exp_client_uuid, + &exp->exp_uuid_hash); - list_del_init(&exp->exp_obd_chain); - list_del_init(&exp->exp_obd_chain_timed); + cfs_list_move(&exp->exp_obd_chain, &exp->exp_obd->obd_unlinked_exports); + cfs_list_del_init(&exp->exp_obd_chain_timed); exp->exp_obd->obd_num_exports--; - spin_unlock(&exp->exp_obd->obd_dev_lock); - + cfs_spin_unlock(&exp->exp_obd->obd_dev_lock); class_export_put(exp); } EXPORT_SYMBOL(class_unlink_export); /* Import management functions */ +void class_import_destroy(struct obd_import *imp) +{ + ENTRY; + + CDEBUG(D_IOCTL, "destroying import %p for %s\n", imp, + imp->imp_obd->obd_name); + + LASSERT_ATOMIC_ZERO(&imp->imp_refcount); + + ptlrpc_put_connection_superhack(imp->imp_connection); + + while (!cfs_list_empty(&imp->imp_conn_list)) { + struct obd_import_conn *imp_conn; + + imp_conn = cfs_list_entry(imp->imp_conn_list.next, + struct obd_import_conn, oic_item); + cfs_list_del_init(&imp_conn->oic_item); + ptlrpc_put_connection_superhack(imp_conn->oic_conn); + OBD_FREE(imp_conn, sizeof(*imp_conn)); + } + + LASSERT(imp->imp_sec == NULL); + class_decref(imp->imp_obd, "import", imp); + OBD_FREE_RCU(imp, sizeof(*imp), &imp->imp_handle); + EXIT; +} + static void import_handle_addref(void *import) { class_import_get(import); } +static struct portals_handle_ops import_handle_ops = { + .hop_addref = import_handle_addref, + .hop_free = NULL, +}; + struct obd_import *class_import_get(struct obd_import *import) { - LASSERT(atomic_read(&import->imp_refcount) >= 0); - LASSERT(atomic_read(&import->imp_refcount) < 0x5a5a5a); - atomic_inc(&import->imp_refcount); + cfs_atomic_inc(&import->imp_refcount); CDEBUG(D_INFO, "import %p refcount=%d obd=%s\n", import, - atomic_read(&import->imp_refcount), + cfs_atomic_read(&import->imp_refcount), import->imp_obd->obd_name); return import; } EXPORT_SYMBOL(class_import_get); -void class_import_put(struct obd_import *import) +void class_import_put(struct obd_import *imp) { ENTRY; - LASSERT(atomic_read(&import->imp_refcount) > 0); - LASSERT(atomic_read(&import->imp_refcount) < 0x5a5a5a); - LASSERT(list_empty(&import->imp_zombie_chain)); + LASSERT(cfs_list_empty(&imp->imp_zombie_chain)); + LASSERT_ATOMIC_GT_LT(&imp->imp_refcount, 0, LI_POISON); - CDEBUG(D_INFO, "import %p refcount=%d obd=%s\n", import, - atomic_read(&import->imp_refcount) - 1, - import->imp_obd->obd_name); + CDEBUG(D_INFO, "import %p refcount=%d obd=%s\n", imp, + cfs_atomic_read(&imp->imp_refcount) - 1, + imp->imp_obd->obd_name); - if (atomic_dec_and_test(&import->imp_refcount)) { - CDEBUG(D_INFO, "final put import %p\n", import); - spin_lock(&obd_zombie_impexp_lock); - list_add(&import->imp_zombie_chain, &obd_zombie_imports); - spin_unlock(&obd_zombie_impexp_lock); - - if (obd_zombie_impexp_notify != NULL) - obd_zombie_impexp_notify(); + if (cfs_atomic_dec_and_test(&imp->imp_refcount)) { + CDEBUG(D_INFO, "final put import %p\n", imp); + obd_zombie_import_add(imp); } - EXIT; + /* catch possible import put race */ + LASSERT_ATOMIC_GE_LT(&imp->imp_refcount, 0, LI_POISON); + EXIT; } EXPORT_SYMBOL(class_import_put); -void class_import_destroy(struct obd_import *import) -{ - ENTRY; - - CDEBUG(D_IOCTL, "destroying import %p for %s\n", import, - import->imp_obd->obd_name); - - LASSERT(atomic_read(&import->imp_refcount) == 0); - - ptlrpc_put_connection_superhack(import->imp_connection); - - while (!list_empty(&import->imp_conn_list)) { - struct obd_import_conn *imp_conn; - - imp_conn = list_entry(import->imp_conn_list.next, - struct obd_import_conn, oic_item); - list_del(&imp_conn->oic_item); - ptlrpc_put_connection_superhack(imp_conn->oic_conn); - OBD_FREE(imp_conn, sizeof(*imp_conn)); - } - - LASSERT(import->imp_sec == NULL); - class_decref(import->imp_obd, "import", import); - OBD_FREE_RCU(import, sizeof(*import), &import->imp_handle); - EXIT; -} - static void init_imp_at(struct imp_at *at) { int i; at_init(&at->iat_net_latency, 0, 0); @@ -897,21 +1014,21 @@ struct obd_import *class_new_import(struct obd_device *obd) CFS_INIT_LIST_HEAD(&imp->imp_replay_list); CFS_INIT_LIST_HEAD(&imp->imp_sending_list); CFS_INIT_LIST_HEAD(&imp->imp_delayed_list); - spin_lock_init(&imp->imp_lock); + cfs_spin_lock_init(&imp->imp_lock); imp->imp_last_success_conn = 0; imp->imp_state = LUSTRE_IMP_NEW; imp->imp_obd = class_incref(obd, "import", imp); - sema_init(&imp->imp_sec_mutex, 1); + cfs_mutex_init(&imp->imp_sec_mutex); cfs_waitq_init(&imp->imp_recovery_waitq); - atomic_set(&imp->imp_refcount, 2); - atomic_set(&imp->imp_unregistering, 0); - atomic_set(&imp->imp_inflight, 0); - atomic_set(&imp->imp_replay_inflight, 0); - atomic_set(&imp->imp_inval_count, 0); + cfs_atomic_set(&imp->imp_refcount, 2); + cfs_atomic_set(&imp->imp_unregistering, 0); + cfs_atomic_set(&imp->imp_inflight, 0); + cfs_atomic_set(&imp->imp_replay_inflight, 0); + cfs_atomic_set(&imp->imp_inval_count, 0); CFS_INIT_LIST_HEAD(&imp->imp_conn_list); CFS_INIT_LIST_HEAD(&imp->imp_handle.h_link); - class_handle_hash(&imp->imp_handle, import_handle_addref); + class_handle_hash(&imp->imp_handle, &import_handle_ops); init_imp_at(&imp->imp_at); /* the default magic is V2, will be used in connect RPC, and @@ -929,13 +1046,56 @@ void class_destroy_import(struct obd_import *import) class_handle_unhash(&import->imp_handle); - spin_lock(&import->imp_lock); + cfs_spin_lock(&import->imp_lock); import->imp_generation++; - spin_unlock(&import->imp_lock); + cfs_spin_unlock(&import->imp_lock); class_import_put(import); } EXPORT_SYMBOL(class_destroy_import); +#if LUSTRE_TRACKS_LOCK_EXP_REFS + +void __class_export_add_lock_ref(struct obd_export *exp, struct ldlm_lock *lock) +{ + cfs_spin_lock(&exp->exp_locks_list_guard); + + LASSERT(lock->l_exp_refs_nr >= 0); + + if (lock->l_exp_refs_target != NULL && + lock->l_exp_refs_target != exp) { + LCONSOLE_WARN("setting export %p for lock %p which already has export %p\n", + exp, lock, lock->l_exp_refs_target); + } + if ((lock->l_exp_refs_nr ++) == 0) { + cfs_list_add(&lock->l_exp_refs_link, &exp->exp_locks_list); + lock->l_exp_refs_target = exp; + } + CDEBUG(D_INFO, "lock = %p, export = %p, refs = %u\n", + lock, exp, lock->l_exp_refs_nr); + cfs_spin_unlock(&exp->exp_locks_list_guard); +} +EXPORT_SYMBOL(__class_export_add_lock_ref); + +void __class_export_del_lock_ref(struct obd_export *exp, struct ldlm_lock *lock) +{ + cfs_spin_lock(&exp->exp_locks_list_guard); + LASSERT(lock->l_exp_refs_nr > 0); + if (lock->l_exp_refs_target != exp) { + LCONSOLE_WARN("lock %p, " + "mismatching export pointers: %p, %p\n", + lock, lock->l_exp_refs_target, exp); + } + if (-- lock->l_exp_refs_nr == 0) { + cfs_list_del_init(&lock->l_exp_refs_link); + lock->l_exp_refs_target = NULL; + } + CDEBUG(D_INFO, "lock = %p, export = %p, refs = %u\n", + lock, exp, lock->l_exp_refs_nr); + cfs_spin_unlock(&exp->exp_locks_list_guard); +} +EXPORT_SYMBOL(__class_export_del_lock_ref); +#endif + /* A connection defines an export context in which preallocation can be managed. This releases the export pointer reference, and returns the export handle, so the export refcount is 1 when this function @@ -967,89 +1127,112 @@ void class_export_recovery_cleanup(struct obd_export *exp) { struct obd_device *obd = exp->exp_obd; - spin_lock_bh(&obd->obd_processing_task_lock); + cfs_spin_lock(&obd->obd_recovery_task_lock); + if (exp->exp_delayed) + obd->obd_delayed_clients--; if (obd->obd_recovering && exp->exp_in_recovery) { - spin_lock(&exp->exp_lock); + cfs_spin_lock(&exp->exp_lock); exp->exp_in_recovery = 0; - spin_unlock(&exp->exp_lock); - obd->obd_connected_clients--; - /* each connected client is counted as recoverable */ - obd->obd_recoverable_clients--; - if (exp->exp_req_replay_needed) { - spin_lock(&exp->exp_lock); - exp->exp_req_replay_needed = 0; - spin_unlock(&exp->exp_lock); - LASSERT(atomic_read(&obd->obd_req_replay_clients)); - atomic_dec(&obd->obd_req_replay_clients); - } - if (exp->exp_lock_replay_needed) { - spin_lock(&exp->exp_lock); - exp->exp_lock_replay_needed = 0; - spin_unlock(&exp->exp_lock); - LASSERT(atomic_read(&obd->obd_lock_replay_clients)); - atomic_dec(&obd->obd_lock_replay_clients); - } + cfs_spin_unlock(&exp->exp_lock); + LASSERT_ATOMIC_POS(&obd->obd_connected_clients); + cfs_atomic_dec(&obd->obd_connected_clients); + } + cfs_spin_unlock(&obd->obd_recovery_task_lock); + /** Cleanup req replay fields */ + if (exp->exp_req_replay_needed) { + cfs_spin_lock(&exp->exp_lock); + exp->exp_req_replay_needed = 0; + cfs_spin_unlock(&exp->exp_lock); + LASSERT(cfs_atomic_read(&obd->obd_req_replay_clients)); + cfs_atomic_dec(&obd->obd_req_replay_clients); + } + /** Cleanup lock replay data */ + if (exp->exp_lock_replay_needed) { + cfs_spin_lock(&exp->exp_lock); + exp->exp_lock_replay_needed = 0; + cfs_spin_unlock(&exp->exp_lock); + LASSERT(cfs_atomic_read(&obd->obd_lock_replay_clients)); + cfs_atomic_dec(&obd->obd_lock_replay_clients); } - spin_unlock_bh(&obd->obd_processing_task_lock); } -/* This function removes two references from the export: one for the - * hash entry and one for the export pointer passed in. The export - * pointer passed to this function is destroyed should not be used - * again. */ +/* This function removes 1-3 references from the export: + * 1 - for export pointer passed + * and if disconnect really need + * 2 - removing from hash + * 3 - in client_unlink_export + * The export pointer passed to this function can destroyed */ int class_disconnect(struct obd_export *export) { int already_disconnected; ENTRY; if (export == NULL) { - fixme(); - CDEBUG(D_IOCTL, "attempting to free NULL export %p\n", export); + CWARN("attempting to free NULL export %p\n", export); RETURN(-EINVAL); } - spin_lock(&export->exp_lock); + cfs_spin_lock(&export->exp_lock); already_disconnected = export->exp_disconnected; export->exp_disconnected = 1; - - if (!hlist_unhashed(&export->exp_nid_hash)) - lustre_hash_del(export->exp_obd->obd_nid_hash, - &export->exp_connection->c_peer.nid, - &export->exp_nid_hash); - - spin_unlock(&export->exp_lock); + cfs_spin_unlock(&export->exp_lock); /* class_cleanup(), abort_recovery(), and class_fail_export() * all end up in here, and if any of them race we shouldn't * call extra class_export_puts(). */ - if (already_disconnected) - RETURN(0); + if (already_disconnected) { + LASSERT(cfs_hlist_unhashed(&export->exp_nid_hash)); + GOTO(no_disconn, already_disconnected); + } CDEBUG(D_IOCTL, "disconnect: cookie "LPX64"\n", export->exp_handle.h_cookie); + if (!cfs_hlist_unhashed(&export->exp_nid_hash)) + cfs_hash_del(export->exp_obd->obd_nid_hash, + &export->exp_connection->c_peer.nid, + &export->exp_nid_hash); + class_export_recovery_cleanup(export); class_unlink_export(export); +no_disconn: class_export_put(export); RETURN(0); } +EXPORT_SYMBOL(class_disconnect); -static void class_disconnect_export_list(struct list_head *list, int flags) +/* Return non-zero for a fully connected export */ +int class_connected_export(struct obd_export *exp) +{ + if (exp) { + int connected; + cfs_spin_lock(&exp->exp_lock); + connected = (exp->exp_conn_cnt > 0); + cfs_spin_unlock(&exp->exp_lock); + return connected; + } + return 0; +} +EXPORT_SYMBOL(class_connected_export); + +static void class_disconnect_export_list(cfs_list_t *list, + enum obd_option flags) { int rc; - struct lustre_handle fake_conn; - struct obd_export *fake_exp, *exp; + struct obd_export *exp; ENTRY; /* It's possible that an export may disconnect itself, but * nothing else will be added to this list. */ - while (!list_empty(list)) { - exp = list_entry(list->next, struct obd_export, exp_obd_chain); + while (!cfs_list_empty(list)) { + exp = cfs_list_entry(list->next, struct obd_export, + exp_obd_chain); + /* need for safe call CDEBUG after obd_disconnect */ class_export_get(exp); - spin_lock(&exp->exp_lock); + cfs_spin_lock(&exp->exp_lock); exp->exp_flags = flags; - spin_unlock(&exp->exp_lock); + cfs_spin_unlock(&exp->exp_lock); if (obd_uuid_equals(&exp->exp_client_uuid, &exp->exp_obd->obd_uuid)) { @@ -1058,54 +1241,43 @@ static void class_disconnect_export_list(struct list_head *list, int flags) exp); /* Need to delete this now so we don't end up pointing * to work_list later when this export is cleaned up. */ - list_del_init(&exp->exp_obd_chain); - class_export_put(exp); - continue; - } - - fake_conn.cookie = exp->exp_handle.h_cookie; - fake_exp = class_conn2export(&fake_conn); - if (!fake_exp) { + cfs_list_del_init(&exp->exp_obd_chain); class_export_put(exp); continue; } - spin_lock(&fake_exp->exp_lock); - fake_exp->exp_flags = flags; - spin_unlock(&fake_exp->exp_lock); - + class_export_get(exp); CDEBUG(D_HA, "%s: disconnecting export at %s (%p), " "last request at "CFS_TIME_T"\n", exp->exp_obd->obd_name, obd_export_nid2str(exp), exp, exp->exp_last_request_time); - rc = obd_disconnect(fake_exp); + /* release one export reference anyway */ + rc = obd_disconnect(exp); + + CDEBUG(D_HA, "disconnected export at %s (%p): rc %d\n", + obd_export_nid2str(exp), exp, rc); class_export_put(exp); } EXIT; } -static inline int get_exp_flags_from_obd(struct obd_device *obd) -{ - return ((obd->obd_fail ? OBD_OPT_FAILOVER : 0) | - (obd->obd_force ? OBD_OPT_FORCE : 0)); -} - void class_disconnect_exports(struct obd_device *obd) { - struct list_head work_list; + cfs_list_t work_list; ENTRY; /* Move all of the exports from obd_exports to a work list, en masse. */ - spin_lock(&obd->obd_dev_lock); - list_add(&work_list, &obd->obd_exports); - list_del_init(&obd->obd_exports); - spin_unlock(&obd->obd_dev_lock); + CFS_INIT_LIST_HEAD(&work_list); + cfs_spin_lock(&obd->obd_dev_lock); + cfs_list_splice_init(&obd->obd_exports, &work_list); + cfs_list_splice_init(&obd->obd_delayed_exports, &work_list); + cfs_spin_unlock(&obd->obd_dev_lock); - if (!list_empty(&work_list)) { + if (!cfs_list_empty(&work_list)) { CDEBUG(D_HA, "OBD device %d (%p) has exports, " "disconnecting them\n", obd->obd_minor, obd); class_disconnect_export_list(&work_list, - get_exp_flags_from_obd(obd)); + exp_flags_from_obd(obd)); } else CDEBUG(D_HA, "OBD device %d (%p) has no exports\n", obd->obd_minor, obd); @@ -1115,41 +1287,54 @@ EXPORT_SYMBOL(class_disconnect_exports); /* Remove exports that have not completed recovery. */ -int class_disconnect_stale_exports(struct obd_device *obd, - int (*test_export)(struct obd_export *)) +void class_disconnect_stale_exports(struct obd_device *obd, + int (*test_export)(struct obd_export *)) { - struct list_head work_list; - struct list_head *pos, *n; - struct obd_export *exp; - int cnt = 0; + cfs_list_t work_list; + struct obd_export *exp, *n; + int evicted = 0; ENTRY; CFS_INIT_LIST_HEAD(&work_list); - spin_lock(&obd->obd_dev_lock); - list_for_each_safe(pos, n, &obd->obd_exports) { - exp = list_entry(pos, struct obd_export, exp_obd_chain); - if (test_export(exp)) - continue; - - list_del(&exp->exp_obd_chain); - list_add(&exp->exp_obd_chain, &work_list); + cfs_spin_lock(&obd->obd_dev_lock); + cfs_list_for_each_entry_safe(exp, n, &obd->obd_exports, + exp_obd_chain) { /* don't count self-export as client */ if (obd_uuid_equals(&exp->exp_client_uuid, - &exp->exp_obd->obd_uuid)) + &exp->exp_obd->obd_uuid)) continue; - cnt++; - CDEBUG(D_ERROR, "%s: disconnect stale client %s@%s\n", + /* don't evict clients which have no slot in last_rcvd + * (e.g. lightweight connection) */ + if (exp->exp_target_data.ted_lr_idx == -1) + continue; + + cfs_spin_lock(&exp->exp_lock); + if (test_export(exp)) { + cfs_spin_unlock(&exp->exp_lock); + continue; + } + exp->exp_failed = 1; + cfs_spin_unlock(&exp->exp_lock); + + cfs_list_move(&exp->exp_obd_chain, &work_list); + evicted++; + CDEBUG(D_HA, "%s: disconnect stale client %s@%s\n", obd->obd_name, exp->exp_client_uuid.uuid, exp->exp_connection == NULL ? "" : libcfs_nid2str(exp->exp_connection->c_peer.nid)); + print_export_data(exp, "EVICTING", 0); } - spin_unlock(&obd->obd_dev_lock); + cfs_spin_unlock(&obd->obd_dev_lock); - CDEBUG(D_ERROR, "%s: disconnecting %d stale clients\n", - obd->obd_name, cnt); - class_disconnect_export_list(&work_list, get_exp_flags_from_obd(obd)); - RETURN(cnt); + if (evicted) { + LCONSOLE_WARN("%s: disconnecting %d stale clients\n", + obd->obd_name, evicted); + obd->obd_stale_clients += evicted; + } + class_disconnect_export_list(&work_list, exp_flags_from_obd(obd) | + OBD_OPT_ABORT_RECOV); + EXIT; } EXPORT_SYMBOL(class_disconnect_stale_exports); @@ -1157,10 +1342,10 @@ void class_fail_export(struct obd_export *exp) { int rc, already_failed; - spin_lock(&exp->exp_lock); + cfs_spin_lock(&exp->exp_lock); already_failed = exp->exp_failed; exp->exp_failed = 1; - spin_unlock(&exp->exp_lock); + cfs_spin_unlock(&exp->exp_lock); if (already_failed) { CDEBUG(D_HA, "disconnecting dead export %p/%s; skipping\n", @@ -1174,6 +1359,9 @@ void class_fail_export(struct obd_export *exp) if (obd_dump_on_timeout) libcfs_debug_dumplog(); + /* need for safe call CDEBUG after obd_disconnect */ + class_export_get(exp); + /* Most callers into obd_disconnect are removing their own reference * (request, for example) in addition to the one from the hash table. * We don't have such a reference here, so make one. */ @@ -1184,6 +1372,7 @@ void class_fail_export(struct obd_export *exp) else CDEBUG(D_HA, "disconnected export %p/%s\n", exp, exp->exp_client_uuid.uuid); + class_export_put(exp); } EXPORT_SYMBOL(class_fail_export); @@ -1204,7 +1393,7 @@ int obd_export_evict_by_nid(struct obd_device *obd, const char *nid) lnet_nid_t nid_key = libcfs_str2nid((char *)nid); do { - doomed_exp = lustre_hash_lookup(obd->obd_nid_hash, &nid_key); + doomed_exp = cfs_hash_lookup(obd->obd_nid_hash, &nid_key); if (doomed_exp == NULL) break; @@ -1241,7 +1430,7 @@ int obd_export_evict_by_uuid(struct obd_device *obd, const char *uuid) return exports_evicted; } - doomed_exp = lustre_hash_lookup(obd->obd_uuid_hash, &doomed_uuid); + doomed_exp = cfs_hash_lookup(obd->obd_uuid_hash, &doomed_uuid); if (doomed_exp == NULL) { CERROR("%s: can't disconnect %s: no exports found\n", @@ -1258,6 +1447,88 @@ int obd_export_evict_by_uuid(struct obd_device *obd, const char *uuid) } EXPORT_SYMBOL(obd_export_evict_by_uuid); +#if LUSTRE_TRACKS_LOCK_EXP_REFS +void (*class_export_dump_hook)(struct obd_export*) = NULL; +EXPORT_SYMBOL(class_export_dump_hook); +#endif + +static void print_export_data(struct obd_export *exp, const char *status, + int locks) +{ + struct ptlrpc_reply_state *rs; + struct ptlrpc_reply_state *first_reply = NULL; + int nreplies = 0; + + cfs_spin_lock(&exp->exp_lock); + cfs_list_for_each_entry(rs, &exp->exp_outstanding_replies, + rs_exp_list) { + if (nreplies == 0) + first_reply = rs; + nreplies++; + } + cfs_spin_unlock(&exp->exp_lock); + + CDEBUG(D_HA, "%s: %s %p %s %s %d (%d %d %d) %d %d %d %d: %p %s "LPU64"\n", + exp->exp_obd->obd_name, status, exp, exp->exp_client_uuid.uuid, + obd_export_nid2str(exp), cfs_atomic_read(&exp->exp_refcount), + cfs_atomic_read(&exp->exp_rpc_count), + cfs_atomic_read(&exp->exp_cb_count), + cfs_atomic_read(&exp->exp_locks_count), + exp->exp_disconnected, exp->exp_delayed, exp->exp_failed, + nreplies, first_reply, nreplies > 3 ? "..." : "", + exp->exp_last_committed); +#if LUSTRE_TRACKS_LOCK_EXP_REFS + if (locks && class_export_dump_hook != NULL) + class_export_dump_hook(exp); +#endif +} + +void dump_exports(struct obd_device *obd, int locks) +{ + struct obd_export *exp; + + cfs_spin_lock(&obd->obd_dev_lock); + cfs_list_for_each_entry(exp, &obd->obd_exports, exp_obd_chain) + print_export_data(exp, "ACTIVE", locks); + cfs_list_for_each_entry(exp, &obd->obd_unlinked_exports, exp_obd_chain) + print_export_data(exp, "UNLINKED", locks); + cfs_list_for_each_entry(exp, &obd->obd_delayed_exports, exp_obd_chain) + print_export_data(exp, "DELAYED", locks); + cfs_spin_unlock(&obd->obd_dev_lock); + cfs_spin_lock(&obd_zombie_impexp_lock); + cfs_list_for_each_entry(exp, &obd_zombie_exports, exp_obd_chain) + print_export_data(exp, "ZOMBIE", locks); + cfs_spin_unlock(&obd_zombie_impexp_lock); +} +EXPORT_SYMBOL(dump_exports); + +void obd_exports_barrier(struct obd_device *obd) +{ + int waited = 2; + LASSERT(cfs_list_empty(&obd->obd_exports)); + cfs_spin_lock(&obd->obd_dev_lock); + while (!cfs_list_empty(&obd->obd_unlinked_exports)) { + cfs_spin_unlock(&obd->obd_dev_lock); + cfs_schedule_timeout_and_set_state(CFS_TASK_UNINT, + cfs_time_seconds(waited)); + if (waited > 5 && IS_PO2(waited)) { + LCONSOLE_WARN("%s is waiting for obd_unlinked_exports " + "more than %d seconds. " + "The obd refcount = %d. Is it stuck?\n", + obd->obd_name, waited, + cfs_atomic_read(&obd->obd_refcount)); + dump_exports(obd, 1); + } + waited *= 2; + cfs_spin_lock(&obd->obd_dev_lock); + } + cfs_spin_unlock(&obd->obd_dev_lock); +} +EXPORT_SYMBOL(obd_exports_barrier); + +/* Total amount of zombies to be destroyed */ +static int zombies_count = 0; + /** * kill zombie imports and exports */ @@ -1268,43 +1539,53 @@ void obd_zombie_impexp_cull(void) ENTRY; do { - spin_lock (&obd_zombie_impexp_lock); + cfs_spin_lock(&obd_zombie_impexp_lock); import = NULL; - if (!list_empty(&obd_zombie_imports)) { - import = list_entry(obd_zombie_imports.next, - struct obd_import, - imp_zombie_chain); - list_del(&import->imp_zombie_chain); + if (!cfs_list_empty(&obd_zombie_imports)) { + import = cfs_list_entry(obd_zombie_imports.next, + struct obd_import, + imp_zombie_chain); + cfs_list_del_init(&import->imp_zombie_chain); } export = NULL; - if (!list_empty(&obd_zombie_exports)) { - export = list_entry(obd_zombie_exports.next, - struct obd_export, - exp_obd_chain); - list_del_init(&export->exp_obd_chain); + if (!cfs_list_empty(&obd_zombie_exports)) { + export = cfs_list_entry(obd_zombie_exports.next, + struct obd_export, + exp_obd_chain); + cfs_list_del_init(&export->exp_obd_chain); } - spin_unlock(&obd_zombie_impexp_lock); + cfs_spin_unlock(&obd_zombie_impexp_lock); - if (import != NULL) + if (import != NULL) { class_import_destroy(import); + cfs_spin_lock(&obd_zombie_impexp_lock); + zombies_count--; + cfs_spin_unlock(&obd_zombie_impexp_lock); + } - if (export != NULL) + if (export != NULL) { class_export_destroy(export); + cfs_spin_lock(&obd_zombie_impexp_lock); + zombies_count--; + cfs_spin_unlock(&obd_zombie_impexp_lock); + } + cfs_cond_resched(); } while (import != NULL || export != NULL); EXIT; } -static struct completion obd_zombie_start; -static struct completion obd_zombie_stop; +static cfs_completion_t obd_zombie_start; +static cfs_completion_t obd_zombie_stop; static unsigned long obd_zombie_flags; static cfs_waitq_t obd_zombie_waitq; +static pid_t obd_zombie_pid; enum { - OBD_ZOMBIE_STOP = 1 + OBD_ZOMBIE_STOP = 1 << 1 }; /** @@ -1314,22 +1595,56 @@ static int obd_zombie_impexp_check(void *arg) { int rc; - spin_lock(&obd_zombie_impexp_lock); - rc = list_empty(&obd_zombie_imports) && - list_empty(&obd_zombie_exports) && - !test_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags); - - spin_unlock(&obd_zombie_impexp_lock); + cfs_spin_lock(&obd_zombie_impexp_lock); + rc = (zombies_count == 0) && + !cfs_test_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags); + cfs_spin_unlock(&obd_zombie_impexp_lock); RETURN(rc); } /** + * Add export to the obd_zombe thread and notify it. + */ +static void obd_zombie_export_add(struct obd_export *exp) { + cfs_spin_lock(&exp->exp_obd->obd_dev_lock); + LASSERT(!cfs_list_empty(&exp->exp_obd_chain)); + cfs_list_del_init(&exp->exp_obd_chain); + cfs_spin_unlock(&exp->exp_obd->obd_dev_lock); + cfs_spin_lock(&obd_zombie_impexp_lock); + zombies_count++; + cfs_list_add(&exp->exp_obd_chain, &obd_zombie_exports); + cfs_spin_unlock(&obd_zombie_impexp_lock); + + obd_zombie_impexp_notify(); +} + +/** + * Add import to the obd_zombe thread and notify it. + */ +static void obd_zombie_import_add(struct obd_import *imp) { + LASSERT(imp->imp_sec == NULL); + LASSERT(imp->imp_rq_pool == NULL); + cfs_spin_lock(&obd_zombie_impexp_lock); + LASSERT(cfs_list_empty(&imp->imp_zombie_chain)); + zombies_count++; + cfs_list_add(&imp->imp_zombie_chain, &obd_zombie_imports); + cfs_spin_unlock(&obd_zombie_impexp_lock); + + obd_zombie_impexp_notify(); +} + +/** * notify import/export destroy thread about new zombie. */ static void obd_zombie_impexp_notify(void) { - cfs_waitq_signal(&obd_zombie_waitq); + /* + * Make sure obd_zomebie_impexp_thread get this notification. + * It is possible this signal only get by obd_zombie_barrier, and + * barrier gulps this notification and sleeps away and hangs ensues + */ + cfs_waitq_broadcast(&obd_zombie_waitq); } /** @@ -1339,11 +1654,10 @@ static int obd_zombie_is_idle(void) { int rc; - LASSERT(!test_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags)); - spin_lock(&obd_zombie_impexp_lock); - rc = list_empty(&obd_zombie_imports) && - list_empty(&obd_zombie_exports); - spin_unlock(&obd_zombie_impexp_lock); + LASSERT(!cfs_test_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags)); + cfs_spin_lock(&obd_zombie_impexp_lock); + rc = (zombies_count == 0); + cfs_spin_unlock(&obd_zombie_impexp_lock); return rc; } @@ -1354,6 +1668,9 @@ void obd_zombie_barrier(void) { struct l_wait_info lwi = { 0 }; + if (obd_zombie_pid == cfs_curproc_pid()) + /* don't wait for myself */ + return; l_wait_event(obd_zombie_waitq, obd_zombie_is_idle(), &lwi); } EXPORT_SYMBOL(obd_zombie_barrier); @@ -1368,30 +1685,36 @@ static int obd_zombie_impexp_thread(void *unused) int rc; if ((rc = cfs_daemonize_ctxt("obd_zombid"))) { - complete(&obd_zombie_start); + cfs_complete(&obd_zombie_start); RETURN(rc); } - complete(&obd_zombie_start); + cfs_complete(&obd_zombie_start); - while(!test_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags)) { - struct l_wait_info lwi = { 0 }; + obd_zombie_pid = cfs_curproc_pid(); - l_wait_event(obd_zombie_waitq, !obd_zombie_impexp_check(NULL), &lwi); + while(!cfs_test_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags)) { + struct l_wait_info lwi = { 0 }; + l_wait_event(obd_zombie_waitq, + !obd_zombie_impexp_check(NULL), &lwi); obd_zombie_impexp_cull(); - /* Notify obd_zombie_barrier callers that queues may be empty */ + + /* + * Notify obd_zombie_barrier callers that queues + * may be empty. + */ cfs_waitq_signal(&obd_zombie_waitq); } - complete(&obd_zombie_stop); + cfs_complete(&obd_zombie_stop); RETURN(0); } #else /* ! KERNEL */ -static atomic_t zombie_recur = ATOMIC_INIT(0); +static cfs_atomic_t zombie_recur = CFS_ATOMIC_INIT(0); static void *obd_zombie_impexp_work_cb; static void *obd_zombie_impexp_idle_cb; @@ -1399,11 +1722,11 @@ int obd_zombie_impexp_kill(void *arg) { int rc = 0; - if (atomic_inc_return(&zombie_recur) == 1) { + if (cfs_atomic_inc_return(&zombie_recur) == 1) { obd_zombie_impexp_cull(); rc = 1; } - atomic_dec(&zombie_recur); + cfs_atomic_dec(&zombie_recur); return rc; } @@ -1418,17 +1741,18 @@ int obd_zombie_impexp_init(void) CFS_INIT_LIST_HEAD(&obd_zombie_imports); CFS_INIT_LIST_HEAD(&obd_zombie_exports); - spin_lock_init(&obd_zombie_impexp_lock); - init_completion(&obd_zombie_start); - init_completion(&obd_zombie_stop); + cfs_spin_lock_init(&obd_zombie_impexp_lock); + cfs_init_completion(&obd_zombie_start); + cfs_init_completion(&obd_zombie_stop); cfs_waitq_init(&obd_zombie_waitq); + obd_zombie_pid = 0; #ifdef __KERNEL__ - rc = cfs_kernel_thread(obd_zombie_impexp_thread, NULL, 0); + rc = cfs_create_thread(obd_zombie_impexp_thread, NULL, 0); if (rc < 0) RETURN(rc); - wait_for_completion(&obd_zombie_start); + cfs_wait_for_completion(&obd_zombie_start); #else obd_zombie_impexp_work_cb = @@ -1439,7 +1763,6 @@ int obd_zombie_impexp_init(void) liblustre_register_idle_callback("obd_zombi_impexp_check", &obd_zombie_impexp_check, NULL); rc = 0; - #endif RETURN(rc); } @@ -1448,12 +1771,80 @@ int obd_zombie_impexp_init(void) */ void obd_zombie_impexp_stop(void) { - set_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags); + cfs_set_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags); obd_zombie_impexp_notify(); #ifdef __KERNEL__ - wait_for_completion(&obd_zombie_stop); + cfs_wait_for_completion(&obd_zombie_stop); #else liblustre_deregister_wait_callback(obd_zombie_impexp_work_cb); liblustre_deregister_idle_callback(obd_zombie_impexp_idle_cb); #endif } + +/***** Kernel-userspace comm helpers *******/ + +/* Get length of entire message, including header */ +int kuc_len(int payload_len) +{ + return sizeof(struct kuc_hdr) + payload_len; +} +EXPORT_SYMBOL(kuc_len); + +/* Get a pointer to kuc header, given a ptr to the payload + * @param p Pointer to payload area + * @returns Pointer to kuc header + */ +struct kuc_hdr * kuc_ptr(void *p) +{ + struct kuc_hdr *lh = ((struct kuc_hdr *)p) - 1; + LASSERT(lh->kuc_magic == KUC_MAGIC); + return lh; +} +EXPORT_SYMBOL(kuc_ptr); + +/* Test if payload is part of kuc message + * @param p Pointer to payload area + * @returns boolean + */ +int kuc_ispayload(void *p) +{ + struct kuc_hdr *kh = ((struct kuc_hdr *)p) - 1; + + if (kh->kuc_magic == KUC_MAGIC) + return 1; + else + return 0; +} +EXPORT_SYMBOL(kuc_ispayload); + +/* Alloc space for a message, and fill in header + * @return Pointer to payload area + */ +void *kuc_alloc(int payload_len, int transport, int type) +{ + struct kuc_hdr *lh; + int len = kuc_len(payload_len); + + OBD_ALLOC(lh, len); + if (lh == NULL) + return ERR_PTR(-ENOMEM); + + lh->kuc_magic = KUC_MAGIC; + lh->kuc_transport = transport; + lh->kuc_msgtype = type; + lh->kuc_msglen = len; + + return (void *)(lh + 1); +} +EXPORT_SYMBOL(kuc_alloc); + +/* Takes pointer to payload area */ +inline void kuc_free(void *p, int payload_len) +{ + struct kuc_hdr *lh = kuc_ptr(p); + OBD_FREE(lh, kuc_len(payload_len)); +} +EXPORT_SYMBOL(kuc_free); + + +