X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fobdclass%2Fgenops.c;h=4fb3ecc25c4136ce9426dc1fdce896878ff64692;hp=9d577f9e799cb9f1a02a5a2bc31246aa6da20f12;hb=892078e3b566c04471e7dcf2c28e66f2f3584f93;hpb=0bb171673e30fa3271b851f6274ca9cdb8f5e55f diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index 9d577f9..4fb3ecc 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -27,7 +27,7 @@ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2011, 2013, Intel Corporation. + * Copyright (c) 2011, 2014, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -40,30 +40,34 @@ */ #define DEBUG_SUBSYSTEM S_CLASS -#ifndef __KERNEL__ -#include -#endif -#include + +#include #include #include +#include +#include -extern cfs_list_t obd_types; spinlock_t obd_types_lock; -struct kmem_cache *obd_device_cachep; +static struct kmem_cache *obd_device_cachep; struct kmem_cache *obdo_cachep; EXPORT_SYMBOL(obdo_cachep); -struct kmem_cache *import_cachep; +static struct kmem_cache *import_cachep; + +static struct list_head obd_zombie_imports; +static struct list_head obd_zombie_exports; +static spinlock_t obd_zombie_impexp_lock; -cfs_list_t obd_zombie_imports; -cfs_list_t obd_zombie_exports; -spinlock_t obd_zombie_impexp_lock; static void obd_zombie_impexp_notify(void); static void obd_zombie_export_add(struct obd_export *exp); static void obd_zombie_import_add(struct obd_import *imp); static void print_export_data(struct obd_export *exp, const char *status, int locks); +struct list_head obd_stale_exports; +spinlock_t obd_stale_export_lock; +atomic_t obd_stale_export_num; + int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c); EXPORT_SYMBOL(ptlrpc_put_connection_superhack); @@ -75,7 +79,7 @@ static struct obd_device *obd_device_alloc(void) { struct obd_device *obd; - OBD_SLAB_ALLOC_PTR_GFP(obd, obd_device_cachep, __GFP_IO); + OBD_SLAB_ALLOC_PTR_GFP(obd, obd_device_cachep, GFP_NOFS); if (obd != NULL) { obd->obd_magic = OBD_DEVICE_MAGIC; } @@ -98,12 +102,12 @@ static void obd_device_free(struct obd_device *obd) struct obd_type *class_search_type(const char *name) { - cfs_list_t *tmp; + struct list_head *tmp; struct obd_type *type; spin_lock(&obd_types_lock); - cfs_list_for_each(tmp, &obd_types) { - type = cfs_list_entry(tmp, struct obd_type, typ_chain); + list_for_each(tmp, &obd_types) { + type = list_entry(tmp, struct obd_type, typ_chain); if (strcmp(type->typ_name, name) == 0) { spin_unlock(&obd_types_lock); return type; @@ -148,7 +152,6 @@ struct obd_type *class_get_type(const char *name) } return type; } -EXPORT_SYMBOL(class_get_type); void class_put_type(struct obd_type *type) { @@ -158,15 +161,11 @@ void class_put_type(struct obd_type *type) module_put(type->typ_dt_ops->o_owner); spin_unlock(&type->obd_type_lock); } -EXPORT_SYMBOL(class_put_type); #define CLASS_MAX_NAME 1024 int class_register_type(struct obd_ops *dt_ops, struct md_ops *md_ops, - struct lprocfs_seq_vars *module_vars, -#ifndef HAVE_ONLY_PROCFS_SEQ - struct lprocfs_vars *vars, -#endif + bool enable_proc, struct lprocfs_vars *vars, const char *name, struct lu_device_type *ldt) { struct obd_type *type; @@ -202,23 +201,16 @@ int class_register_type(struct obd_ops *dt_ops, struct md_ops *md_ops, strcpy(type->typ_name, name); spin_lock_init(&type->obd_type_lock); -#ifdef LPROCFS -#ifndef HAVE_ONLY_PROCFS_SEQ - if (vars) { +#ifdef CONFIG_PROC_FS + if (enable_proc) { type->typ_procroot = lprocfs_register(type->typ_name, - proc_lustre_root, - vars, type); - } else -#endif - { - type->typ_procroot = lprocfs_seq_register(type->typ_name, - proc_lustre_root, - module_vars, type); - } - if (IS_ERR(type->typ_procroot)) { - rc = PTR_ERR(type->typ_procroot); - type->typ_procroot = NULL; - GOTO (failed, rc); + proc_lustre_root, + vars, type); + if (IS_ERR(type->typ_procroot)) { + rc = PTR_ERR(type->typ_procroot); + type->typ_procroot = NULL; + GOTO(failed, rc); + } } #endif if (ldt != NULL) { @@ -229,25 +221,23 @@ int class_register_type(struct obd_ops *dt_ops, struct md_ops *md_ops, } spin_lock(&obd_types_lock); - cfs_list_add(&type->typ_chain, &obd_types); + list_add(&type->typ_chain, &obd_types); spin_unlock(&obd_types_lock); RETURN (0); - failed: - if (type->typ_name != NULL) +failed: + if (type->typ_name != NULL) { +#ifdef CONFIG_PROC_FS + if (type->typ_procroot != NULL) + remove_proc_subtree(type->typ_name, proc_lustre_root); +#endif OBD_FREE(type->typ_name, strlen(name) + 1); + } if (type->typ_md_ops != NULL) OBD_FREE_PTR(type->typ_md_ops); if (type->typ_dt_ops != NULL) OBD_FREE_PTR(type->typ_dt_ops); -#ifdef LPROCFS -#ifndef HAVE_ONLY_PROCFS_SEQ - lprocfs_try_remove_proc_entry(type->typ_name, proc_lustre_root); -#else - remove_proc_subtree(type->typ_name, proc_lustre_root); -#endif -#endif OBD_FREE(type, sizeof(*type)); RETURN(rc); } @@ -276,18 +266,17 @@ int class_unregister_type(const char *name) * other modules can share names (i.e. lod can use lov entry). so * we can't reference pointer as it can get invalided when another * module removes the entry */ -#ifdef LPROCFS -#ifndef HAVE_ONLY_PROCFS_SEQ - lprocfs_try_remove_proc_entry(type->typ_name, proc_lustre_root); -#else - remove_proc_subtree(type->typ_name, proc_lustre_root); -#endif +#ifdef CONFIG_PROC_FS + if (type->typ_procroot != NULL) + remove_proc_subtree(type->typ_name, proc_lustre_root); + if (type->typ_procsym != NULL) + lprocfs_remove(&type->typ_procsym); #endif if (type->typ_lu) lu_device_type_fini(type->typ_lu); spin_lock(&obd_types_lock); - cfs_list_del(&type->typ_chain); + list_del(&type->typ_chain); spin_unlock(&obd_types_lock); OBD_FREE(type->typ_name, strlen(name) + 1); if (type->typ_dt_ops != NULL) @@ -436,7 +425,6 @@ int class_name2dev(const char *name) return -1; } -EXPORT_SYMBOL(class_name2dev); struct obd_device *class_name2obd(const char *name) { @@ -466,7 +454,6 @@ int class_uuid2dev(struct obd_uuid *uuid) return -1; } -EXPORT_SYMBOL(class_uuid2dev); struct obd_device *class_uuid2obd(struct obd_uuid *uuid) { @@ -504,7 +491,6 @@ struct obd_device *class_num2obd(int num) return obd; } -EXPORT_SYMBOL(class_num2obd); /** * Get obd devices count. Device in any @@ -549,7 +535,7 @@ void class_obd_list(void) LCONSOLE(D_CONFIG, "%3d %s %s %s %s %d\n", i, status, obd->obd_type->typ_name, obd->obd_name, obd->obd_uuid.uuid, - cfs_atomic_read(&obd->obd_refcount)); + atomic_read(&obd->obd_refcount)); } read_unlock(&obd_dev_lock); return; @@ -634,19 +620,22 @@ int class_notify_sptlrpc_conf(const char *fsname, int namelen) LASSERT(namelen > 0); read_lock(&obd_dev_lock); - for (i = 0; i < class_devno_max(); i++) { - obd = class_num2obd(i); + for (i = 0; i < class_devno_max(); i++) { + obd = class_num2obd(i); - if (obd == NULL || obd->obd_set_up == 0 || obd->obd_stopping) - continue; + if (obd == NULL || obd->obd_set_up == 0 || obd->obd_stopping) + continue; - /* only notify mdc, osc, mdt, ost */ - type = obd->obd_type->typ_name; - if (strcmp(type, LUSTRE_MDC_NAME) != 0 && - strcmp(type, LUSTRE_OSC_NAME) != 0 && - strcmp(type, LUSTRE_MDT_NAME) != 0 && - strcmp(type, LUSTRE_OST_NAME) != 0) - continue; + /* only notify mdc, osc, osp, lwp, mdt, ost + * because only these have a -sptlrpc llog */ + type = obd->obd_type->typ_name; + if (strcmp(type, LUSTRE_MDC_NAME) != 0 && + strcmp(type, LUSTRE_OSC_NAME) != 0 && + strcmp(type, LUSTRE_OSP_NAME) != 0 && + strcmp(type, LUSTRE_LWP_NAME) != 0 && + strcmp(type, LUSTRE_MDT_NAME) != 0 && + strcmp(type, LUSTRE_OST_NAME) != 0) + continue; if (strncmp(obd->obd_name, fsname, namelen)) continue; @@ -680,10 +669,7 @@ void obd_cleanup_caches(void) kmem_cache_destroy(import_cachep); import_cachep = NULL; } - if (capa_cachep) { - kmem_cache_destroy(capa_cachep); - capa_cachep = NULL; - } + EXIT; } @@ -712,12 +698,6 @@ int obd_init_caches(void) if (!import_cachep) GOTO(out, rc = -ENOMEM); - LASSERT(capa_cachep == NULL); - capa_cachep = kmem_cache_create("capa_cache", sizeof(struct obd_capa), - 0, 0, NULL); - if (!capa_cachep) - GOTO(out, rc = -ENOMEM); - RETURN(0); out: obd_cleanup_caches(); @@ -765,7 +745,6 @@ struct obd_device *class_conn2obd(struct lustre_handle *conn) } return NULL; } -EXPORT_SYMBOL(class_conn2obd); struct obd_import *class_exp2cliimp(struct obd_export *exp) { @@ -783,7 +762,6 @@ struct obd_import *class_conn2cliimp(struct lustre_handle *conn) return NULL; return obd->u.cli.cl_import; } -EXPORT_SYMBOL(class_conn2cliimp); /* Export management functions */ static void class_export_destroy(struct obd_export *exp) @@ -801,10 +779,10 @@ static void class_export_destroy(struct obd_export *exp) if (exp->exp_connection) ptlrpc_put_connection_superhack(exp->exp_connection); - LASSERT(cfs_list_empty(&exp->exp_outstanding_replies)); - LASSERT(cfs_list_empty(&exp->exp_uncommitted_replies)); - LASSERT(cfs_list_empty(&exp->exp_req_replay_queue)); - LASSERT(cfs_list_empty(&exp->exp_hp_rpcs)); + LASSERT(list_empty(&exp->exp_outstanding_replies)); + LASSERT(list_empty(&exp->exp_uncommitted_replies)); + LASSERT(list_empty(&exp->exp_req_replay_queue)); + LASSERT(list_empty(&exp->exp_hp_rpcs)); obd_destroy_export(exp); class_decref(obd, "export", exp); @@ -824,9 +802,9 @@ static struct portals_handle_ops export_handle_ops = { struct obd_export *class_export_get(struct obd_export *exp) { - cfs_atomic_inc(&exp->exp_refcount); + atomic_inc(&exp->exp_refcount); CDEBUG(D_INFO, "GETting export %p : new refcount %d\n", exp, - cfs_atomic_read(&exp->exp_refcount)); + atomic_read(&exp->exp_refcount)); return exp; } EXPORT_SYMBOL(class_export_get); @@ -836,18 +814,19 @@ void class_export_put(struct obd_export *exp) LASSERT(exp != NULL); LASSERT_ATOMIC_GT_LT(&exp->exp_refcount, 0, LI_POISON); CDEBUG(D_INFO, "PUTting export %p : new refcount %d\n", exp, - cfs_atomic_read(&exp->exp_refcount) - 1); + atomic_read(&exp->exp_refcount) - 1); - if (cfs_atomic_dec_and_test(&exp->exp_refcount)) { - LASSERT(!cfs_list_empty(&exp->exp_obd_chain)); - CDEBUG(D_IOCTL, "final put %p/%s\n", - exp, exp->exp_client_uuid.uuid); + if (atomic_dec_and_test(&exp->exp_refcount)) { + LASSERT(!list_empty(&exp->exp_obd_chain)); + LASSERT(list_empty(&exp->exp_stale_list)); + CDEBUG(D_IOCTL, "final put %p/%s\n", + exp, exp->exp_client_uuid.uuid); - /* release nid stat refererence */ - lprocfs_exp_cleanup(exp); + /* release nid stat refererence */ + lprocfs_exp_cleanup(exp); - obd_zombie_export_add(exp); - } + obd_zombie_export_add(exp); + } } EXPORT_SYMBOL(class_export_put); @@ -858,7 +837,7 @@ struct obd_export *class_new_export(struct obd_device *obd, struct obd_uuid *cluuid) { struct obd_export *export; - cfs_hash_t *hash = NULL; + struct cfs_hash *hash = NULL; int rc = 0; ENTRY; @@ -869,31 +848,33 @@ struct obd_export *class_new_export(struct obd_device *obd, export->exp_conn_cnt = 0; export->exp_lock_hash = NULL; export->exp_flock_hash = NULL; - cfs_atomic_set(&export->exp_refcount, 2); - cfs_atomic_set(&export->exp_rpc_count, 0); - cfs_atomic_set(&export->exp_cb_count, 0); - cfs_atomic_set(&export->exp_locks_count, 0); + atomic_set(&export->exp_refcount, 2); + atomic_set(&export->exp_rpc_count, 0); + atomic_set(&export->exp_cb_count, 0); + atomic_set(&export->exp_locks_count, 0); #if LUSTRE_TRACKS_LOCK_EXP_REFS - CFS_INIT_LIST_HEAD(&export->exp_locks_list); + INIT_LIST_HEAD(&export->exp_locks_list); spin_lock_init(&export->exp_locks_list_guard); #endif - cfs_atomic_set(&export->exp_replay_count, 0); + atomic_set(&export->exp_replay_count, 0); export->exp_obd = obd; - CFS_INIT_LIST_HEAD(&export->exp_outstanding_replies); + INIT_LIST_HEAD(&export->exp_outstanding_replies); spin_lock_init(&export->exp_uncommitted_replies_lock); - CFS_INIT_LIST_HEAD(&export->exp_uncommitted_replies); - CFS_INIT_LIST_HEAD(&export->exp_req_replay_queue); - CFS_INIT_LIST_HEAD(&export->exp_handle.h_link); - CFS_INIT_LIST_HEAD(&export->exp_hp_rpcs); - CFS_INIT_LIST_HEAD(&export->exp_reg_rpcs); + INIT_LIST_HEAD(&export->exp_uncommitted_replies); + INIT_LIST_HEAD(&export->exp_req_replay_queue); + INIT_LIST_HEAD(&export->exp_handle.h_link); + INIT_LIST_HEAD(&export->exp_hp_rpcs); + INIT_LIST_HEAD(&export->exp_reg_rpcs); class_handle_hash(&export->exp_handle, &export_handle_ops); export->exp_last_request_time = cfs_time_current_sec(); spin_lock_init(&export->exp_lock); spin_lock_init(&export->exp_rpc_lock); - CFS_INIT_HLIST_NODE(&export->exp_uuid_hash); - CFS_INIT_HLIST_NODE(&export->exp_nid_hash); + INIT_HLIST_NODE(&export->exp_uuid_hash); + INIT_HLIST_NODE(&export->exp_nid_hash); + INIT_HLIST_NODE(&export->exp_gen_hash); spin_lock_init(&export->exp_bl_list_lock); - CFS_INIT_LIST_HEAD(&export->exp_bl_list); + INIT_LIST_HEAD(&export->exp_bl_list); + INIT_LIST_HEAD(&export->exp_stale_list); export->exp_sp_peer = LUSTRE_SP_ANY; export->exp_flvr.sf_rpc = SPTLRPC_FLVR_INVALID; @@ -919,6 +900,7 @@ struct obd_export *class_new_export(struct obd_device *obd, } } + at_init(&export->exp_bl_lock_at, obd_timeout, 0); spin_lock(&obd->obd_dev_lock); if (obd->obd_stopping) { cfs_hash_del(hash, cluuid, &export->exp_uuid_hash); @@ -926,9 +908,9 @@ struct obd_export *class_new_export(struct obd_device *obd, } class_incref(obd, "export", export); - cfs_list_add(&export->exp_obd_chain, &export->exp_obd->obd_exports); - cfs_list_add_tail(&export->exp_obd_chain_timed, - &export->exp_obd->obd_exports_timed); + list_add(&export->exp_obd_chain, &export->exp_obd->obd_exports); + list_add_tail(&export->exp_obd_chain_timed, + &export->exp_obd->obd_exports_timed); export->exp_obd->obd_num_exports++; spin_unlock(&obd->obd_dev_lock); cfs_hash_putref(hash); @@ -940,7 +922,7 @@ exit_err: if (hash) cfs_hash_putref(hash); class_handle_unhash(&export->exp_handle); - LASSERT(cfs_hlist_unhashed(&export->exp_uuid_hash)); + LASSERT(hlist_unhashed(&export->exp_uuid_hash)); obd_destroy_export(export); OBD_FREE_PTR(export); return ERR_PTR(rc); @@ -953,21 +935,34 @@ void class_unlink_export(struct obd_export *exp) spin_lock(&exp->exp_obd->obd_dev_lock); /* delete an uuid-export hashitem from hashtables */ - if (!cfs_hlist_unhashed(&exp->exp_uuid_hash)) + if (!hlist_unhashed(&exp->exp_uuid_hash)) cfs_hash_del(exp->exp_obd->obd_uuid_hash, &exp->exp_client_uuid, &exp->exp_uuid_hash); - cfs_list_move(&exp->exp_obd_chain, &exp->exp_obd->obd_unlinked_exports); - cfs_list_del_init(&exp->exp_obd_chain_timed); + if (!hlist_unhashed(&exp->exp_gen_hash)) { + struct tg_export_data *ted = &exp->exp_target_data; + struct cfs_hash *hash; + + hash = cfs_hash_getref(exp->exp_obd->obd_gen_hash); + cfs_hash_del(hash, &ted->ted_lcd->lcd_generation, + &exp->exp_gen_hash); + cfs_hash_putref(hash); + } + + list_move(&exp->exp_obd_chain, &exp->exp_obd->obd_unlinked_exports); + list_del_init(&exp->exp_obd_chain_timed); exp->exp_obd->obd_num_exports--; spin_unlock(&exp->exp_obd->obd_dev_lock); - class_export_put(exp); + atomic_inc(&obd_stale_export_num); + + /* A reference is kept by obd_stale_exports list */ + obd_stale_export_put(exp); } EXPORT_SYMBOL(class_unlink_export); /* Import management functions */ -void class_import_destroy(struct obd_import *imp) +static void class_import_destroy(struct obd_import *imp) { ENTRY; @@ -978,12 +973,12 @@ void class_import_destroy(struct obd_import *imp) ptlrpc_put_connection_superhack(imp->imp_connection); - while (!cfs_list_empty(&imp->imp_conn_list)) { - struct obd_import_conn *imp_conn; + while (!list_empty(&imp->imp_conn_list)) { + struct obd_import_conn *imp_conn; - imp_conn = cfs_list_entry(imp->imp_conn_list.next, - struct obd_import_conn, oic_item); - cfs_list_del_init(&imp_conn->oic_item); + imp_conn = list_entry(imp->imp_conn_list.next, + struct obd_import_conn, oic_item); + list_del_init(&imp_conn->oic_item); ptlrpc_put_connection_superhack(imp_conn->oic_conn); OBD_FREE(imp_conn, sizeof(*imp_conn)); } @@ -1006,9 +1001,9 @@ static struct portals_handle_ops import_handle_ops = { struct obd_import *class_import_get(struct obd_import *import) { - cfs_atomic_inc(&import->imp_refcount); + atomic_inc(&import->imp_refcount); CDEBUG(D_INFO, "import %p refcount=%d obd=%s\n", import, - cfs_atomic_read(&import->imp_refcount), + atomic_read(&import->imp_refcount), import->imp_obd->obd_name); return import; } @@ -1016,16 +1011,16 @@ EXPORT_SYMBOL(class_import_get); void class_import_put(struct obd_import *imp) { - ENTRY; + ENTRY; - LASSERT(cfs_list_empty(&imp->imp_zombie_chain)); + LASSERT(list_empty(&imp->imp_zombie_chain)); LASSERT_ATOMIC_GT_LT(&imp->imp_refcount, 0, LI_POISON); CDEBUG(D_INFO, "import %p refcount=%d obd=%s\n", imp, - cfs_atomic_read(&imp->imp_refcount) - 1, + atomic_read(&imp->imp_refcount) - 1, imp->imp_obd->obd_name); - if (cfs_atomic_dec_and_test(&imp->imp_refcount)) { + if (atomic_dec_and_test(&imp->imp_refcount)) { CDEBUG(D_INFO, "final put import %p\n", imp); obd_zombie_import_add(imp); } @@ -1056,12 +1051,12 @@ struct obd_import *class_new_import(struct obd_device *obd) if (imp == NULL) return NULL; - CFS_INIT_LIST_HEAD(&imp->imp_pinger_chain); - CFS_INIT_LIST_HEAD(&imp->imp_zombie_chain); - CFS_INIT_LIST_HEAD(&imp->imp_replay_list); - CFS_INIT_LIST_HEAD(&imp->imp_sending_list); - CFS_INIT_LIST_HEAD(&imp->imp_delayed_list); - CFS_INIT_LIST_HEAD(&imp->imp_committed_list); + INIT_LIST_HEAD(&imp->imp_pinger_chain); + INIT_LIST_HEAD(&imp->imp_zombie_chain); + INIT_LIST_HEAD(&imp->imp_replay_list); + INIT_LIST_HEAD(&imp->imp_sending_list); + INIT_LIST_HEAD(&imp->imp_delayed_list); + INIT_LIST_HEAD(&imp->imp_committed_list); imp->imp_replay_cursor = &imp->imp_committed_list; spin_lock_init(&imp->imp_lock); imp->imp_last_success_conn = 0; @@ -1070,13 +1065,13 @@ struct obd_import *class_new_import(struct obd_device *obd) mutex_init(&imp->imp_sec_mutex); init_waitqueue_head(&imp->imp_recovery_waitq); - cfs_atomic_set(&imp->imp_refcount, 2); - cfs_atomic_set(&imp->imp_unregistering, 0); - cfs_atomic_set(&imp->imp_inflight, 0); - cfs_atomic_set(&imp->imp_replay_inflight, 0); - cfs_atomic_set(&imp->imp_inval_count, 0); - CFS_INIT_LIST_HEAD(&imp->imp_conn_list); - CFS_INIT_LIST_HEAD(&imp->imp_handle.h_link); + atomic_set(&imp->imp_refcount, 2); + atomic_set(&imp->imp_unregistering, 0); + atomic_set(&imp->imp_inflight, 0); + atomic_set(&imp->imp_replay_inflight, 0); + atomic_set(&imp->imp_inval_count, 0); + INIT_LIST_HEAD(&imp->imp_conn_list); + INIT_LIST_HEAD(&imp->imp_handle.h_link); class_handle_hash(&imp->imp_handle, &import_handle_ops); init_imp_at(&imp->imp_at); @@ -1116,14 +1111,13 @@ void __class_export_add_lock_ref(struct obd_export *exp, struct ldlm_lock *lock) exp, lock, lock->l_exp_refs_target); } if ((lock->l_exp_refs_nr ++) == 0) { - cfs_list_add(&lock->l_exp_refs_link, &exp->exp_locks_list); + list_add(&lock->l_exp_refs_link, &exp->exp_locks_list); lock->l_exp_refs_target = exp; } CDEBUG(D_INFO, "lock = %p, export = %p, refs = %u\n", lock, exp, lock->l_exp_refs_nr); spin_unlock(&exp->exp_locks_list_guard); } -EXPORT_SYMBOL(__class_export_add_lock_ref); void __class_export_del_lock_ref(struct obd_export *exp, struct ldlm_lock *lock) { @@ -1135,14 +1129,13 @@ void __class_export_del_lock_ref(struct obd_export *exp, struct ldlm_lock *lock) lock, lock->l_exp_refs_target, exp); } if (-- lock->l_exp_refs_nr == 0) { - cfs_list_del_init(&lock->l_exp_refs_link); + list_del_init(&lock->l_exp_refs_link); lock->l_exp_refs_target = NULL; } CDEBUG(D_INFO, "lock = %p, export = %p, refs = %u\n", lock, exp, lock->l_exp_refs_nr); spin_unlock(&exp->exp_locks_list_guard); } -EXPORT_SYMBOL(__class_export_del_lock_ref); #endif /* A connection defines an export context in which preallocation can @@ -1172,46 +1165,45 @@ int class_connect(struct lustre_handle *conn, struct obd_device *obd, EXPORT_SYMBOL(class_connect); /* if export is involved in recovery then clean up related things */ -void class_export_recovery_cleanup(struct obd_export *exp) +static void class_export_recovery_cleanup(struct obd_export *exp) { struct obd_device *obd = exp->exp_obd; spin_lock(&obd->obd_recovery_task_lock); - if (exp->exp_delayed) - obd->obd_delayed_clients--; if (obd->obd_recovering) { if (exp->exp_in_recovery) { spin_lock(&exp->exp_lock); exp->exp_in_recovery = 0; spin_unlock(&exp->exp_lock); LASSERT_ATOMIC_POS(&obd->obd_connected_clients); - cfs_atomic_dec(&obd->obd_connected_clients); + atomic_dec(&obd->obd_connected_clients); } /* if called during recovery then should update * obd_stale_clients counter, * lightweight exports are not counted */ - if (exp->exp_failed && - (exp_connect_flags(exp) & OBD_CONNECT_LIGHTWEIGHT) == 0) + if ((exp_connect_flags(exp) & OBD_CONNECT_LIGHTWEIGHT) == 0) exp->exp_obd->obd_stale_clients++; } spin_unlock(&obd->obd_recovery_task_lock); + + spin_lock(&exp->exp_lock); /** Cleanup req replay fields */ if (exp->exp_req_replay_needed) { - spin_lock(&exp->exp_lock); exp->exp_req_replay_needed = 0; - spin_unlock(&exp->exp_lock); - LASSERT(cfs_atomic_read(&obd->obd_req_replay_clients)); - cfs_atomic_dec(&obd->obd_req_replay_clients); + + LASSERT(atomic_read(&obd->obd_req_replay_clients)); + atomic_dec(&obd->obd_req_replay_clients); } + /** Cleanup lock replay data */ if (exp->exp_lock_replay_needed) { - spin_lock(&exp->exp_lock); exp->exp_lock_replay_needed = 0; - spin_unlock(&exp->exp_lock); - LASSERT(cfs_atomic_read(&obd->obd_lock_replay_clients)); - cfs_atomic_dec(&obd->obd_lock_replay_clients); + + LASSERT(atomic_read(&obd->obd_lock_replay_clients)); + atomic_dec(&obd->obd_lock_replay_clients); } + spin_unlock(&exp->exp_lock); } /* This function removes 1-3 references from the export: @@ -1239,14 +1231,14 @@ int class_disconnect(struct obd_export *export) * all end up in here, and if any of them race we shouldn't * call extra class_export_puts(). */ if (already_disconnected) { - LASSERT(cfs_hlist_unhashed(&export->exp_nid_hash)); + LASSERT(hlist_unhashed(&export->exp_nid_hash)); GOTO(no_disconn, already_disconnected); } CDEBUG(D_IOCTL, "disconnect: cookie "LPX64"\n", export->exp_handle.h_cookie); - if (!cfs_hlist_unhashed(&export->exp_nid_hash)) + if (!hlist_unhashed(&export->exp_nid_hash)) cfs_hash_del(export->exp_obd->obd_nid_hash, &export->exp_connection->c_peer.nid, &export->exp_nid_hash); @@ -1273,7 +1265,7 @@ int class_connected_export(struct obd_export *exp) } EXPORT_SYMBOL(class_connected_export); -static void class_disconnect_export_list(cfs_list_t *list, +static void class_disconnect_export_list(struct list_head *list, enum obd_option flags) { int rc; @@ -1282,11 +1274,11 @@ static void class_disconnect_export_list(cfs_list_t *list, /* It's possible that an export may disconnect itself, but * nothing else will be added to this list. */ - while (!cfs_list_empty(list)) { - exp = cfs_list_entry(list->next, struct obd_export, - exp_obd_chain); - /* need for safe call CDEBUG after obd_disconnect */ - class_export_get(exp); + while (!list_empty(list)) { + exp = list_entry(list->next, struct obd_export, + exp_obd_chain); + /* need for safe call CDEBUG after obd_disconnect */ + class_export_get(exp); spin_lock(&exp->exp_lock); exp->exp_flags = flags; @@ -1299,7 +1291,7 @@ static void class_disconnect_export_list(cfs_list_t *list, exp); /* Need to delete this now so we don't end up pointing * to work_list later when this export is cleaned up. */ - cfs_list_del_init(&exp->exp_obd_chain); + list_del_init(&exp->exp_obd_chain); class_export_put(exp); continue; } @@ -1321,17 +1313,17 @@ static void class_disconnect_export_list(cfs_list_t *list, void class_disconnect_exports(struct obd_device *obd) { - cfs_list_t work_list; + struct list_head work_list; ENTRY; /* Move all of the exports from obd_exports to a work list, en masse. */ - CFS_INIT_LIST_HEAD(&work_list); + INIT_LIST_HEAD(&work_list); spin_lock(&obd->obd_dev_lock); - cfs_list_splice_init(&obd->obd_exports, &work_list); - cfs_list_splice_init(&obd->obd_delayed_exports, &work_list); + list_splice_init(&obd->obd_exports, &work_list); + list_splice_init(&obd->obd_delayed_exports, &work_list); spin_unlock(&obd->obd_dev_lock); - if (!cfs_list_empty(&work_list)) { + if (!list_empty(&work_list)) { CDEBUG(D_HA, "OBD device %d (%p) has exports, " "disconnecting them\n", obd->obd_minor, obd); class_disconnect_export_list(&work_list, @@ -1348,15 +1340,15 @@ EXPORT_SYMBOL(class_disconnect_exports); void class_disconnect_stale_exports(struct obd_device *obd, int (*test_export)(struct obd_export *)) { - cfs_list_t work_list; + struct list_head work_list; struct obd_export *exp, *n; int evicted = 0; ENTRY; - CFS_INIT_LIST_HEAD(&work_list); + INIT_LIST_HEAD(&work_list); spin_lock(&obd->obd_dev_lock); - cfs_list_for_each_entry_safe(exp, n, &obd->obd_exports, - exp_obd_chain) { + list_for_each_entry_safe(exp, n, &obd->obd_exports, + exp_obd_chain) { /* don't count self-export as client */ if (obd_uuid_equals(&exp->exp_client_uuid, &exp->exp_obd->obd_uuid)) @@ -1375,7 +1367,7 @@ void class_disconnect_stale_exports(struct obd_device *obd, exp->exp_failed = 1; spin_unlock(&exp->exp_lock); - cfs_list_move(&exp->exp_obd_chain, &work_list); + list_move(&exp->exp_obd_chain, &work_list); evicted++; CDEBUG(D_HA, "%s: disconnect stale client %s@%s\n", obd->obd_name, exp->exp_client_uuid.uuid, @@ -1444,7 +1436,7 @@ EXPORT_SYMBOL(obd_export_nid2str); int obd_export_evict_by_nid(struct obd_device *obd, const char *nid) { - cfs_hash_t *nid_hash; + struct cfs_hash *nid_hash; struct obd_export *doomed_exp = NULL; int exports_evicted = 0; @@ -1492,7 +1484,7 @@ EXPORT_SYMBOL(obd_export_evict_by_nid); int obd_export_evict_by_uuid(struct obd_device *obd, const char *uuid) { - cfs_hash_t *uuid_hash; + struct cfs_hash *uuid_hash; struct obd_export *doomed_exp = NULL; struct obd_uuid doomed_uuid; int exports_evicted = 0; @@ -1529,11 +1521,9 @@ int obd_export_evict_by_uuid(struct obd_device *obd, const char *uuid) return exports_evicted; } -EXPORT_SYMBOL(obd_export_evict_by_uuid); #if LUSTRE_TRACKS_LOCK_EXP_REFS void (*class_export_dump_hook)(struct obd_export*) = NULL; -EXPORT_SYMBOL(class_export_dump_hook); #endif static void print_export_data(struct obd_export *exp, const char *status, @@ -1544,8 +1534,8 @@ static void print_export_data(struct obd_export *exp, const char *status, int nreplies = 0; spin_lock(&exp->exp_lock); - cfs_list_for_each_entry(rs, &exp->exp_outstanding_replies, - rs_exp_list) { + list_for_each_entry(rs, &exp->exp_outstanding_replies, + rs_exp_list) { if (nreplies == 0) first_reply = rs; nreplies++; @@ -1554,10 +1544,10 @@ static void print_export_data(struct obd_export *exp, const char *status, CDEBUG(D_HA, "%s: %s %p %s %s %d (%d %d %d) %d %d %d %d: %p %s "LPU64"\n", exp->exp_obd->obd_name, status, exp, exp->exp_client_uuid.uuid, - obd_export_nid2str(exp), cfs_atomic_read(&exp->exp_refcount), - cfs_atomic_read(&exp->exp_rpc_count), - cfs_atomic_read(&exp->exp_cb_count), - cfs_atomic_read(&exp->exp_locks_count), + obd_export_nid2str(exp), atomic_read(&exp->exp_refcount), + atomic_read(&exp->exp_rpc_count), + atomic_read(&exp->exp_cb_count), + atomic_read(&exp->exp_locks_count), exp->exp_disconnected, exp->exp_delayed, exp->exp_failed, nreplies, first_reply, nreplies > 3 ? "..." : "", exp->exp_last_committed); @@ -1572,35 +1562,34 @@ void dump_exports(struct obd_device *obd, int locks) struct obd_export *exp; spin_lock(&obd->obd_dev_lock); - cfs_list_for_each_entry(exp, &obd->obd_exports, exp_obd_chain) + list_for_each_entry(exp, &obd->obd_exports, exp_obd_chain) print_export_data(exp, "ACTIVE", locks); - cfs_list_for_each_entry(exp, &obd->obd_unlinked_exports, exp_obd_chain) + list_for_each_entry(exp, &obd->obd_unlinked_exports, exp_obd_chain) print_export_data(exp, "UNLINKED", locks); - cfs_list_for_each_entry(exp, &obd->obd_delayed_exports, exp_obd_chain) + list_for_each_entry(exp, &obd->obd_delayed_exports, exp_obd_chain) print_export_data(exp, "DELAYED", locks); spin_unlock(&obd->obd_dev_lock); spin_lock(&obd_zombie_impexp_lock); - cfs_list_for_each_entry(exp, &obd_zombie_exports, exp_obd_chain) + list_for_each_entry(exp, &obd_zombie_exports, exp_obd_chain) print_export_data(exp, "ZOMBIE", locks); spin_unlock(&obd_zombie_impexp_lock); } -EXPORT_SYMBOL(dump_exports); void obd_exports_barrier(struct obd_device *obd) { int waited = 2; - LASSERT(cfs_list_empty(&obd->obd_exports)); + LASSERT(list_empty(&obd->obd_exports)); spin_lock(&obd->obd_dev_lock); - while (!cfs_list_empty(&obd->obd_unlinked_exports)) { + while (!list_empty(&obd->obd_unlinked_exports)) { spin_unlock(&obd->obd_dev_lock); - schedule_timeout_and_set_state(TASK_UNINTERRUPTIBLE, - cfs_time_seconds(waited)); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(cfs_time_seconds(waited)); if (waited > 5 && IS_PO2(waited)) { LCONSOLE_WARN("%s is waiting for obd_unlinked_exports " "more than %d seconds. " "The obd refcount = %d. Is it stuck?\n", obd->obd_name, waited, - cfs_atomic_read(&obd->obd_refcount)); + atomic_read(&obd->obd_refcount)); dump_exports(obd, 1); } waited *= 2; @@ -1625,21 +1614,21 @@ void obd_zombie_impexp_cull(void) do { spin_lock(&obd_zombie_impexp_lock); - import = NULL; - if (!cfs_list_empty(&obd_zombie_imports)) { - import = cfs_list_entry(obd_zombie_imports.next, - struct obd_import, - imp_zombie_chain); - cfs_list_del_init(&import->imp_zombie_chain); - } + import = NULL; + if (!list_empty(&obd_zombie_imports)) { + import = list_entry(obd_zombie_imports.next, + struct obd_import, + imp_zombie_chain); + list_del_init(&import->imp_zombie_chain); + } - export = NULL; - if (!cfs_list_empty(&obd_zombie_exports)) { - export = cfs_list_entry(obd_zombie_exports.next, - struct obd_export, - exp_obd_chain); - cfs_list_del_init(&export->exp_obd_chain); - } + export = NULL; + if (!list_empty(&obd_zombie_exports)) { + export = list_entry(obd_zombie_exports.next, + struct obd_export, + exp_obd_chain); + list_del_init(&export->exp_obd_chain); + } spin_unlock(&obd_zombie_impexp_lock); @@ -1691,13 +1680,14 @@ static int obd_zombie_impexp_check(void *arg) * Add export to the obd_zombe thread and notify it. */ static void obd_zombie_export_add(struct obd_export *exp) { + atomic_dec(&obd_stale_export_num); spin_lock(&exp->exp_obd->obd_dev_lock); - LASSERT(!cfs_list_empty(&exp->exp_obd_chain)); - cfs_list_del_init(&exp->exp_obd_chain); + LASSERT(!list_empty(&exp->exp_obd_chain)); + list_del_init(&exp->exp_obd_chain); spin_unlock(&exp->exp_obd->obd_dev_lock); spin_lock(&obd_zombie_impexp_lock); zombies_count++; - cfs_list_add(&exp->exp_obd_chain, &obd_zombie_exports); + list_add(&exp->exp_obd_chain, &obd_zombie_exports); spin_unlock(&obd_zombie_impexp_lock); obd_zombie_impexp_notify(); @@ -1708,11 +1698,10 @@ static void obd_zombie_export_add(struct obd_export *exp) { */ static void obd_zombie_import_add(struct obd_import *imp) { LASSERT(imp->imp_sec == NULL); - LASSERT(imp->imp_rq_pool == NULL); spin_lock(&obd_zombie_impexp_lock); - LASSERT(cfs_list_empty(&imp->imp_zombie_chain)); + LASSERT(list_empty(&imp->imp_zombie_chain)); zombies_count++; - cfs_list_add(&imp->imp_zombie_chain, &obd_zombie_imports); + list_add(&imp->imp_zombie_chain, &obd_zombie_imports); spin_unlock(&obd_zombie_impexp_lock); obd_zombie_impexp_notify(); @@ -1759,7 +1748,76 @@ void obd_zombie_barrier(void) } EXPORT_SYMBOL(obd_zombie_barrier); -#ifdef __KERNEL__ + +struct obd_export *obd_stale_export_get(void) +{ + struct obd_export *exp = NULL; + ENTRY; + + spin_lock(&obd_stale_export_lock); + if (!list_empty(&obd_stale_exports)) { + exp = list_entry(obd_stale_exports.next, + struct obd_export, exp_stale_list); + list_del_init(&exp->exp_stale_list); + } + spin_unlock(&obd_stale_export_lock); + + if (exp) { + CDEBUG(D_DLMTRACE, "Get export %p: total %d\n", exp, + atomic_read(&obd_stale_export_num)); + } + RETURN(exp); +} +EXPORT_SYMBOL(obd_stale_export_get); + +void obd_stale_export_put(struct obd_export *exp) +{ + ENTRY; + + LASSERT(list_empty(&exp->exp_stale_list)); + if (exp->exp_lock_hash && + atomic_read(&exp->exp_lock_hash->hs_count)) { + CDEBUG(D_DLMTRACE, "Put export %p: total %d\n", exp, + atomic_read(&obd_stale_export_num)); + + spin_lock_bh(&exp->exp_bl_list_lock); + spin_lock(&obd_stale_export_lock); + /* Add to the tail if there is no blocked locks, + * to the head otherwise. */ + if (list_empty(&exp->exp_bl_list)) + list_add_tail(&exp->exp_stale_list, + &obd_stale_exports); + else + list_add(&exp->exp_stale_list, + &obd_stale_exports); + + spin_unlock(&obd_stale_export_lock); + spin_unlock_bh(&exp->exp_bl_list_lock); + } else { + class_export_put(exp); + } + EXIT; +} +EXPORT_SYMBOL(obd_stale_export_put); + +/** + * Adjust the position of the export in the stale list, + * i.e. move to the head of the list if is needed. + **/ +void obd_stale_export_adjust(struct obd_export *exp) +{ + LASSERT(exp != NULL); + spin_lock_bh(&exp->exp_bl_list_lock); + spin_lock(&obd_stale_export_lock); + + if (!list_empty(&exp->exp_stale_list) && + !list_empty(&exp->exp_bl_list)) + list_move(&exp->exp_stale_list, &obd_stale_exports); + + spin_unlock(&obd_stale_export_lock); + spin_unlock_bh(&exp->exp_bl_list_lock); +} +EXPORT_SYMBOL(obd_stale_export_adjust); /** * destroy zombie export/import thread. @@ -1790,59 +1848,28 @@ static int obd_zombie_impexp_thread(void *unused) RETURN(0); } -#else /* ! KERNEL */ - -static cfs_atomic_t zombie_recur = CFS_ATOMIC_INIT(0); -static void *obd_zombie_impexp_work_cb; -static void *obd_zombie_impexp_idle_cb; - -int obd_zombie_impexp_kill(void *arg) -{ - int rc = 0; - - if (cfs_atomic_inc_return(&zombie_recur) == 1) { - obd_zombie_impexp_cull(); - rc = 1; - } - cfs_atomic_dec(&zombie_recur); - return rc; -} - -#endif /** * start destroy zombie import/export thread */ int obd_zombie_impexp_init(void) { -#ifdef __KERNEL__ struct task_struct *task; -#endif - CFS_INIT_LIST_HEAD(&obd_zombie_imports); - CFS_INIT_LIST_HEAD(&obd_zombie_exports); + INIT_LIST_HEAD(&obd_zombie_imports); + + INIT_LIST_HEAD(&obd_zombie_exports); spin_lock_init(&obd_zombie_impexp_lock); init_completion(&obd_zombie_start); init_completion(&obd_zombie_stop); init_waitqueue_head(&obd_zombie_waitq); obd_zombie_pid = 0; -#ifdef __KERNEL__ task = kthread_run(obd_zombie_impexp_thread, NULL, "obd_zombid"); if (IS_ERR(task)) RETURN(PTR_ERR(task)); wait_for_completion(&obd_zombie_start); -#else - - obd_zombie_impexp_work_cb = - liblustre_register_wait_callback("obd_zombi_impexp_kill", - &obd_zombie_impexp_kill, NULL); - - obd_zombie_impexp_idle_cb = - liblustre_register_idle_callback("obd_zombi_impexp_check", - &obd_zombie_impexp_check, NULL); -#endif RETURN(0); } /** @@ -1852,12 +1879,7 @@ void obd_zombie_impexp_stop(void) { set_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags); obd_zombie_impexp_notify(); -#ifdef __KERNEL__ wait_for_completion(&obd_zombie_stop); -#else - liblustre_deregister_wait_callback(obd_zombie_impexp_work_cb); - liblustre_deregister_idle_callback(obd_zombie_impexp_idle_cb); -#endif } /***** Kernel-userspace comm helpers *******/ @@ -1925,5 +1947,368 @@ inline void kuc_free(void *p, int payload_len) } EXPORT_SYMBOL(kuc_free); +struct obd_request_slot_waiter { + struct list_head orsw_entry; + wait_queue_head_t orsw_waitq; + bool orsw_signaled; +}; + +static bool obd_request_slot_avail(struct client_obd *cli, + struct obd_request_slot_waiter *orsw) +{ + bool avail; + spin_lock(&cli->cl_loi_list_lock); + avail = !!list_empty(&orsw->orsw_entry); + spin_unlock(&cli->cl_loi_list_lock); + + return avail; +}; + +/* + * For network flow control, the RPC sponsor needs to acquire a credit + * before sending the RPC. The credits count for a connection is defined + * by the "cl_max_rpcs_in_flight". If all the credits are occpuied, then + * the subsequent RPC sponsors need to wait until others released their + * credits, or the administrator increased the "cl_max_rpcs_in_flight". + */ +int obd_get_request_slot(struct client_obd *cli) +{ + struct obd_request_slot_waiter orsw; + struct l_wait_info lwi; + int rc; + + spin_lock(&cli->cl_loi_list_lock); + if (cli->cl_r_in_flight < cli->cl_max_rpcs_in_flight) { + cli->cl_r_in_flight++; + spin_unlock(&cli->cl_loi_list_lock); + return 0; + } + + init_waitqueue_head(&orsw.orsw_waitq); + list_add_tail(&orsw.orsw_entry, &cli->cl_loi_read_list); + orsw.orsw_signaled = false; + spin_unlock(&cli->cl_loi_list_lock); + + lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL); + rc = l_wait_event(orsw.orsw_waitq, + obd_request_slot_avail(cli, &orsw) || + orsw.orsw_signaled, + &lwi); + + /* Here, we must take the lock to avoid the on-stack 'orsw' to be + * freed but other (such as obd_put_request_slot) is using it. */ + spin_lock(&cli->cl_loi_list_lock); + if (rc != 0) { + if (!orsw.orsw_signaled) { + if (list_empty(&orsw.orsw_entry)) + cli->cl_r_in_flight--; + else + list_del(&orsw.orsw_entry); + } + } + + if (orsw.orsw_signaled) { + LASSERT(list_empty(&orsw.orsw_entry)); + + rc = -EINTR; + } + spin_unlock(&cli->cl_loi_list_lock); + + return rc; +} +EXPORT_SYMBOL(obd_get_request_slot); + +void obd_put_request_slot(struct client_obd *cli) +{ + struct obd_request_slot_waiter *orsw; + + spin_lock(&cli->cl_loi_list_lock); + cli->cl_r_in_flight--; + + /* If there is free slot, wakeup the first waiter. */ + if (!list_empty(&cli->cl_loi_read_list) && + likely(cli->cl_r_in_flight < cli->cl_max_rpcs_in_flight)) { + orsw = list_entry(cli->cl_loi_read_list.next, + struct obd_request_slot_waiter, orsw_entry); + list_del_init(&orsw->orsw_entry); + cli->cl_r_in_flight++; + wake_up(&orsw->orsw_waitq); + } + spin_unlock(&cli->cl_loi_list_lock); +} +EXPORT_SYMBOL(obd_put_request_slot); + +__u32 obd_get_max_rpcs_in_flight(struct client_obd *cli) +{ + return cli->cl_max_rpcs_in_flight; +} +EXPORT_SYMBOL(obd_get_max_rpcs_in_flight); + +int obd_set_max_rpcs_in_flight(struct client_obd *cli, __u32 max) +{ + struct obd_request_slot_waiter *orsw; + __u32 old; + int diff; + int i; + char *typ_name; + int rc; + + if (max > OBD_MAX_RIF_MAX || max < 1) + return -ERANGE; + + typ_name = cli->cl_import->imp_obd->obd_type->typ_name; + if (strcmp(typ_name, LUSTRE_MDC_NAME) == 0) { + /* adjust max_mod_rpcs_in_flight to ensure it is always + * strictly lower that max_rpcs_in_flight */ + if (max < 2) { + CERROR("%s: cannot set max_rpcs_in_flight to 1 " + "because it must be higher than " + "max_mod_rpcs_in_flight value", + cli->cl_import->imp_obd->obd_name); + return -ERANGE; + } + if (max <= cli->cl_max_mod_rpcs_in_flight) { + rc = obd_set_max_mod_rpcs_in_flight(cli, max - 1); + if (rc != 0) + return rc; + } + } + + spin_lock(&cli->cl_loi_list_lock); + old = cli->cl_max_rpcs_in_flight; + cli->cl_max_rpcs_in_flight = max; + diff = max - old; + + /* We increase the max_rpcs_in_flight, then wakeup some waiters. */ + for (i = 0; i < diff; i++) { + if (list_empty(&cli->cl_loi_read_list)) + break; + + orsw = list_entry(cli->cl_loi_read_list.next, + struct obd_request_slot_waiter, orsw_entry); + list_del_init(&orsw->orsw_entry); + cli->cl_r_in_flight++; + wake_up(&orsw->orsw_waitq); + } + spin_unlock(&cli->cl_loi_list_lock); + + return 0; +} +EXPORT_SYMBOL(obd_set_max_rpcs_in_flight); + +__u16 obd_get_max_mod_rpcs_in_flight(struct client_obd *cli) +{ + return cli->cl_max_mod_rpcs_in_flight; +} +EXPORT_SYMBOL(obd_get_max_mod_rpcs_in_flight); + +int obd_set_max_mod_rpcs_in_flight(struct client_obd *cli, __u16 max) +{ + struct obd_connect_data *ocd; + __u16 maxmodrpcs; + __u16 prev; + + if (max > OBD_MAX_RIF_MAX || max < 1) + return -ERANGE; + + /* cannot exceed or equal max_rpcs_in_flight */ + if (max >= cli->cl_max_rpcs_in_flight) { + CERROR("%s: can't set max_mod_rpcs_in_flight to a value (%hu) " + "higher or equal to max_rpcs_in_flight value (%u)\n", + cli->cl_import->imp_obd->obd_name, + max, cli->cl_max_rpcs_in_flight); + return -ERANGE; + } + + /* cannot exceed max modify RPCs in flight supported by the server */ + ocd = &cli->cl_import->imp_connect_data; + if (ocd->ocd_connect_flags & OBD_CONNECT_MULTIMODRPCS) + maxmodrpcs = ocd->ocd_maxmodrpcs; + else + maxmodrpcs = 1; + if (max > maxmodrpcs) { + CERROR("%s: can't set max_mod_rpcs_in_flight to a value (%hu) " + "higher than max_mod_rpcs_per_client value (%hu) " + "returned by the server at connection\n", + cli->cl_import->imp_obd->obd_name, + max, maxmodrpcs); + return -ERANGE; + } + + spin_lock(&cli->cl_mod_rpcs_lock); + + prev = cli->cl_max_mod_rpcs_in_flight; + cli->cl_max_mod_rpcs_in_flight = max; + + /* wakeup waiters if limit has been increased */ + if (cli->cl_max_mod_rpcs_in_flight > prev) + wake_up(&cli->cl_mod_rpcs_waitq); + + spin_unlock(&cli->cl_mod_rpcs_lock); + + return 0; +} +EXPORT_SYMBOL(obd_set_max_mod_rpcs_in_flight); + + +#define pct(a, b) (b ? a * 100 / b : 0) +int obd_mod_rpc_stats_seq_show(struct client_obd *cli, + struct seq_file *seq) +{ + struct timeval now; + unsigned long mod_tot = 0, mod_cum; + int i; + + do_gettimeofday(&now); + + spin_lock(&cli->cl_mod_rpcs_lock); + + seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n", + now.tv_sec, now.tv_usec); + seq_printf(seq, "modify_RPCs_in_flight: %hu\n", + cli->cl_mod_rpcs_in_flight); + + seq_printf(seq, "\n\t\t\tmodify\n"); + seq_printf(seq, "rpcs in flight rpcs %% cum %%\n"); + + mod_tot = lprocfs_oh_sum(&cli->cl_mod_rpcs_hist); + + mod_cum = 0; + for (i = 0; i < OBD_HIST_MAX; i++) { + unsigned long mod = cli->cl_mod_rpcs_hist.oh_buckets[i]; + mod_cum += mod; + seq_printf(seq, "%d:\t\t%10lu %3lu %3lu\n", + i, mod, pct(mod, mod_tot), + pct(mod_cum, mod_tot)); + if (mod_cum == mod_tot) + break; + } + + spin_unlock(&cli->cl_mod_rpcs_lock); + + return 0; +} +EXPORT_SYMBOL(obd_mod_rpc_stats_seq_show); +#undef pct + + +/* The number of modify RPCs sent in parallel is limited + * because the server has a finite number of slots per client to + * store request result and ensure reply reconstruction when needed. + * On the client, this limit is stored in cl_max_mod_rpcs_in_flight + * that takes into account server limit and cl_max_rpcs_in_flight + * value. + * On the MDC client, to avoid a potential deadlock (see Bugzilla 3462), + * one close request is allowed above the maximum. + */ +static inline bool obd_mod_rpc_slot_avail_locked(struct client_obd *cli, + bool close_req) +{ + bool avail; + + /* A slot is available if + * - number of modify RPCs in flight is less than the max + * - it's a close RPC and no other close request is in flight + */ + avail = cli->cl_mod_rpcs_in_flight < cli->cl_max_mod_rpcs_in_flight || + (close_req && cli->cl_close_rpcs_in_flight == 0); + + return avail; +} + +static inline bool obd_mod_rpc_slot_avail(struct client_obd *cli, + bool close_req) +{ + bool avail; + + spin_lock(&cli->cl_mod_rpcs_lock); + avail = obd_mod_rpc_slot_avail_locked(cli, close_req); + spin_unlock(&cli->cl_mod_rpcs_lock); + return avail; +} + +/* Get a modify RPC slot from the obd client @cli according + * to the kind of operation @opc that is going to be sent + * and the intent @it of the operation if it applies. + * If the maximum number of modify RPCs in flight is reached + * the thread is put to sleep. + * Returns the tag to be set in the request message. Tag 0 + * is reserved for non-modifying requests. + */ +__u16 obd_get_mod_rpc_slot(struct client_obd *cli, __u32 opc, + struct lookup_intent *it) +{ + struct l_wait_info lwi = LWI_INTR(NULL, NULL); + bool close_req = false; + __u16 i, max; + + /* read-only metadata RPCs don't consume a slot on MDT + * for reply reconstruction + */ + if (it != NULL && (it->it_op == IT_GETATTR || it->it_op == IT_LOOKUP || + it->it_op == IT_LAYOUT || it->it_op == IT_READDIR)) + return 0; + + if (opc == MDS_CLOSE) + close_req = true; + + do { + spin_lock(&cli->cl_mod_rpcs_lock); + max = cli->cl_max_mod_rpcs_in_flight; + if (obd_mod_rpc_slot_avail_locked(cli, close_req)) { + /* there is a slot available */ + cli->cl_mod_rpcs_in_flight++; + if (close_req) + cli->cl_close_rpcs_in_flight++; + lprocfs_oh_tally(&cli->cl_mod_rpcs_hist, + cli->cl_mod_rpcs_in_flight); + /* find a free tag */ + i = find_first_zero_bit(cli->cl_mod_tag_bitmap, + max + 1); + LASSERT(i < OBD_MAX_RIF_MAX); + LASSERT(!test_and_set_bit(i, cli->cl_mod_tag_bitmap)); + spin_unlock(&cli->cl_mod_rpcs_lock); + /* tag 0 is reserved for non-modify RPCs */ + return i + 1; + } + spin_unlock(&cli->cl_mod_rpcs_lock); + + CDEBUG(D_RPCTRACE, "%s: sleeping for a modify RPC slot " + "opc %u, max %hu\n", + cli->cl_import->imp_obd->obd_name, opc, max); + + l_wait_event(cli->cl_mod_rpcs_waitq, + obd_mod_rpc_slot_avail(cli, close_req), &lwi); + } while (true); +} +EXPORT_SYMBOL(obd_get_mod_rpc_slot); + +/* Put a modify RPC slot from the obd client @cli according + * to the kind of operation @opc that has been sent and the + * intent @it of the operation if it applies. + */ +void obd_put_mod_rpc_slot(struct client_obd *cli, __u32 opc, + struct lookup_intent *it, __u16 tag) +{ + bool close_req = false; + + if (it != NULL && (it->it_op == IT_GETATTR || it->it_op == IT_LOOKUP || + it->it_op == IT_LAYOUT || it->it_op == IT_READDIR)) + return; + + if (opc == MDS_CLOSE) + close_req = true; + + spin_lock(&cli->cl_mod_rpcs_lock); + cli->cl_mod_rpcs_in_flight--; + if (close_req) + cli->cl_close_rpcs_in_flight--; + /* release the tag in the bitmap */ + LASSERT(tag - 1 < OBD_MAX_RIF_MAX); + LASSERT(test_and_clear_bit(tag - 1, cli->cl_mod_tag_bitmap) != 0); + spin_unlock(&cli->cl_mod_rpcs_lock); + wake_up(&cli->cl_mod_rpcs_waitq); +} +EXPORT_SYMBOL(obd_put_mod_rpc_slot);