From 190c03a5b000e34bb3518653239ff328542bd5b1 Mon Sep 17 00:00:00 2001 From: nathan Date: Fri, 13 May 2005 18:11:51 +0000 Subject: [PATCH] Branch b1_4 b=6301 r=adilger Prevent damage due to racing export disconnects --- lustre/include/linux/lustre_export.h | 1 + lustre/obdclass/genops.c | 37 ++++++++++++++++++++++-------------- 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/lustre/include/linux/lustre_export.h b/lustre/include/linux/lustre_export.h index 2b83f97..0b75d4c 100644 --- a/lustre/include/linux/lustre_export.h +++ b/lustre/include/linux/lustre_export.h @@ -68,6 +68,7 @@ struct obd_export { __u64 exp_connect_flags; int exp_flags; unsigned int exp_failed:1, + exp_disconnected:1, exp_replay_needed:1, exp_libclient:1; /* liblustre client? */ union { diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index 2cee678..a93256d 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -706,6 +706,7 @@ int class_connect(struct lustre_handle *conn, struct obd_device *obd, * again. */ int class_disconnect(struct obd_export *export) { + int already_disconnected; ENTRY; if (export == NULL) { @@ -714,10 +715,15 @@ int class_disconnect(struct obd_export *export) RETURN(-EINVAL); } - /* XXX this shouldn't have to be here, but double-disconnect will crash - * otherwise, and sometimes double-disconnect happens. abort_recovery, - * for example. */ - if (list_empty(&export->exp_handle.h_link)) + spin_lock(&export->exp_lock); + already_disconnected = export->exp_disconnected; + export->exp_disconnected = 1; + spin_unlock(&export->exp_lock); + + /* class_cleanup, abort_recovery, ptlrpc_fail_export, and + ping_evictor_fail_export all end up in here, and if any of them + race we shouldn't call extra class_export_puts. */ + if (already_disconnected) RETURN(0); CDEBUG(D_IOCTL, "disconnect: cookie "LPX64"\n", @@ -1131,6 +1137,8 @@ void ping_evictor_stop(void) the network is up.) */ void class_update_export_timer(struct obd_export *exp, time_t extra_delay) { + struct obd_export *oldest_exp; + time_t oldest_time; LASSERT(exp); /* Compensate for slow machines, etc, by faking our request time @@ -1158,16 +1166,19 @@ void class_update_export_timer(struct obd_export *exp, time_t extra_delay) list_move_tail(&exp->exp_obd_chain_timed, &exp->exp_obd->obd_exports_timed); + oldest_exp = list_entry(exp->exp_obd->obd_exports_timed.next, + struct obd_export, exp_obd_chain_timed); + oldest_time = oldest_exp->exp_last_request_time; + spin_unlock(&exp->exp_obd->obd_dev_lock); + if (exp->exp_obd->obd_recoverable_clients > 0) + /* be nice to everyone during recovery */ + return; + /* Note - racing to start/reset the obd_eviction timer is safe */ if (exp->exp_obd->obd_eviction_timer == 0) { - struct obd_export *oldest_exp; /* Check if the oldest entry is expired. */ - oldest_exp = list_entry(exp->exp_obd->obd_exports_timed.next, - struct obd_export, exp_obd_chain_timed); - spin_unlock(&exp->exp_obd->obd_dev_lock); - - if (CURRENT_SECONDS > (oldest_exp->exp_last_request_time + + if (CURRENT_SECONDS > (oldest_time + (3 * obd_timeout / 2) + extra_delay)) { /* We need a second timer, in case the net was down and it just came back. Since the pinger @@ -1176,12 +1187,10 @@ void class_update_export_timer(struct obd_export *exp, time_t extra_delay) exp->exp_obd->obd_eviction_timer = CURRENT_SECONDS + 3 * PING_INTERVAL; CDEBUG(D_PET, - "Thinking about evicting old export %s at %ld\n", - oldest_exp->exp_client_uuid.uuid, - oldest_exp->exp_last_request_time); + "Thinking about evicting old export from %ld\n", + oldest_time); } } else { - spin_unlock(&exp->exp_obd->obd_dev_lock); if (CURRENT_SECONDS > (exp->exp_obd->obd_eviction_timer + extra_delay)) { /* The evictor won't evict anyone who we've heard from -- 1.8.3.1