From 6846084cce067d552802374e3c65a27fd6c7bd31 Mon Sep 17 00:00:00 2001 From: shadow Date: Wed, 8 Jul 2009 03:45:44 +0000 Subject: [PATCH] lock ordering violation between &cli->cl_sem and _lprocfs_lock Branch b1_8 b=18380 i=jay i=deen --- lustre/ChangeLog | 7 +++++++ lustre/ldlm/ldlm_lib.c | 48 ++++++++++++++++++++++----------------------- lustre/ldlm/ldlm_resource.c | 5 +++-- lustre/lov/lov_obd.c | 4 ++-- 4 files changed, 35 insertions(+), 29 deletions(-) diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 90a1a3a..1cbc24f 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -14,6 +14,13 @@ tbd Sun Microsystems, Inc. of Lustre filesystem with 4K stack may cause a stack overflow. For more information, please refer to bugzilla 17630. +Severity : normal +Frequency : rare +Bugzilla : 18380 +Descriptoin: lock ordering violation between &cli->cl_sem and _lprocfs_lock +Details : move ldlm namespace creation in setup phase to avoid grab + _lprocfs_lock with cli_sem held. + Severity : enhancement Bugzilla : 19846 Description: Update kernel to RHEL5 2.6.18-128.1.14.el5. diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 18601e6..f6061c9 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -341,11 +341,20 @@ int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf) name, obddev->obd_name, cli->cl_target_uuid.uuid); spin_lock(&imp->imp_lock); - imp->imp_invalid = 1; + imp->imp_deactive = 1; spin_unlock(&imp->imp_lock); } } + obddev->obd_namespace = ldlm_namespace_new(obddev, obddev->obd_name, + LDLM_NAMESPACE_CLIENT, + LDLM_NAMESPACE_GREEDY); + if (obddev->obd_namespace == NULL) { + CERROR("Unable to create client namespace - %s\n", + obddev->obd_name); + GOTO(err_import, rc = -ENOMEM); + } + cli->cl_qchk_stat = CL_NOT_QUOTACHECKED; RETURN(rc); @@ -362,6 +371,10 @@ err: int client_obd_cleanup(struct obd_device *obddev) { ENTRY; + + ldlm_namespace_free_post(obddev->obd_namespace); + obddev->obd_namespace = NULL; + ldlm_put_ref(); RETURN(0); } @@ -375,11 +388,13 @@ int client_connect_import(struct lustre_handle *dlm_handle, struct obd_import *imp = cli->cl_import; struct obd_export **exp = localdata; struct obd_connect_data *ocd; - struct ldlm_namespace *to_be_freed = NULL; int rc; ENTRY; down_write(&cli->cl_sem); + CDEBUG(D_INFO, "connect %s - %d\n", obd->obd_name, + cli->cl_conn_count); + if (cli->cl_conn_count > 0) GOTO(out_sem, rc = -EALREADY); @@ -390,13 +405,7 @@ int client_connect_import(struct lustre_handle *dlm_handle, cli->cl_conn_count++; *exp = class_conn2export(dlm_handle); - if (obd->obd_namespace != NULL) - CERROR("already have namespace!\n"); - obd->obd_namespace = ldlm_namespace_new(obd, obd->obd_name, - LDLM_NAMESPACE_CLIENT, - LDLM_NAMESPACE_GREEDY); - if (obd->obd_namespace == NULL) - GOTO(out_disco, rc = -ENOMEM); + LASSERT(obd->obd_namespace); imp->imp_dlm_handle = *dlm_handle; rc = ptlrpc_init_import(imp); @@ -427,18 +436,12 @@ int client_connect_import(struct lustre_handle *dlm_handle, if (rc) { out_ldlm: - ldlm_namespace_free_prior(obd->obd_namespace, imp, 0); - to_be_freed = obd->obd_namespace; - obd->obd_namespace = NULL; -out_disco: cli->cl_conn_count--; class_disconnect(*exp); *exp = NULL; - } + } out_sem: up_write(&cli->cl_sem); - if (to_be_freed) - ldlm_namespace_free_post(to_be_freed); return rc; } @@ -447,7 +450,6 @@ int client_disconnect_export(struct obd_export *exp) struct obd_device *obd = class_exp2obd(exp); struct client_obd *cli; struct obd_import *imp; - struct ldlm_namespace *to_be_freed = NULL; int rc = 0, err; ENTRY; @@ -461,6 +463,9 @@ int client_disconnect_export(struct obd_export *exp) imp = cli->cl_import; down_write(&cli->cl_sem); + CDEBUG(D_INFO, "disconnect %s - %d\n", obd->obd_name, + cli->cl_conn_count); + if (!cli->cl_conn_count) { CERROR("disconnecting disconnected device (%s)\n", obd->obd_name); @@ -490,16 +495,11 @@ int client_disconnect_export(struct obd_export *exp) NULL); ldlm_namespace_free_prior(obd->obd_namespace, imp, obd->obd_force); - to_be_freed = obd->obd_namespace; } rc = ptlrpc_disconnect_import(imp, 0); ptlrpc_invalidate_import(imp); - /* set obd_namespace to NULL only after invalidate, because we can have - * some connect requests in flight, and his need store a connect flags - * in obd_namespace. bug 14260 */ - obd->obd_namespace = NULL; if (imp->imp_rq_pool) { ptlrpc_free_rq_pool(imp->imp_rq_pool); @@ -516,10 +516,8 @@ int client_disconnect_export(struct obd_export *exp) err = class_disconnect(exp); if (!rc && err) rc = err; - up_write(&cli->cl_sem); - if (to_be_freed) - ldlm_namespace_free_post(to_be_freed); + RETURN(rc); } diff --git a/lustre/ldlm/ldlm_resource.c b/lustre/ldlm/ldlm_resource.c index 9857485..7d03ea8 100644 --- a/lustre/ldlm/ldlm_resource.c +++ b/lustre/ldlm/ldlm_resource.c @@ -573,8 +573,6 @@ void ldlm_namespace_free_prior(struct ldlm_namespace *ns, return; } - /* Make sure that nobody can find this ns in its list. */ - ldlm_namespace_unregister(ns, ns->ns_client); /* Can fail with -EINTR when force == 0 in which case try harder */ rc = __ldlm_namespace_free(ns, force); @@ -600,6 +598,9 @@ void ldlm_namespace_free_post(struct ldlm_namespace *ns) return; } + /* Make sure that nobody can find this ns in its list. */ + ldlm_namespace_unregister(ns, ns->ns_client); + /* Fini pool _before_ parent proc dir is removed. This is important * as ldlm_pool_fini() removes own proc dir which is child to @dir. * Removing it after @dir may cause oops. */ diff --git a/lustre/lov/lov_obd.c b/lustre/lov/lov_obd.c index 14097fe..69d6b6e 100644 --- a/lustre/lov/lov_obd.c +++ b/lustre/lov/lov_obd.c @@ -468,13 +468,13 @@ static int lov_disconnect_obd(struct obd_device *obd, struct lov_tgt_desc *tgt) } } - if (obd->obd_no_recov) { + if (obd->obd_force) { /* Pass it on to our clients. * XXX This should be an argument to disconnect, * XXX not a back-door flag on the OBD. Ah well. */ if (osc_obd) - osc_obd->obd_no_recov = 1; + osc_obd->obd_force = 1; } obd_register_observer(osc_obd, NULL); -- 1.8.3.1