From: adilger Date: Thu, 5 May 2005 17:44:17 +0000 (+0000) Subject: Branch: b1_4 X-Git-Tag: v1_7_100~1^25~8^2~162 X-Git-Url: https://git.whamcloud.com/gitweb?a=commitdiff_plain;h=f19d14e848014782300b59baafde8a7666fa43af;p=fs%2Flustre-release.git Branch: b1_4 Using 'lctl recover' should mark a device active if it was previously deactivated. Add some debugging to ptlrpc_pinger_main() to help analyse bug 5933. b=5933 r=nathan --- diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 7a76182..529b618 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -29,6 +29,7 @@ tbd Cluster File Systems, Inc. - don't reference lr_lvb_data until after we hold lr_lvb_sem (6170) - don't overwrite last_rcvd if there is a *_client_add() error (6068) - Correctly handle reads of files with no objects (6243) + - lctl recover will also mark a device active if deactivate used (5933) * miscellania - by default create 1 inode per 4kB space on MDS, per 16kB on OSTs - allow --write-conf on an MDS with different nettype than client (5619) @@ -50,6 +51,7 @@ tbd Cluster File Systems, Inc. - added --disable-server and --disable-client configure options (5782) - introduce a lookup cache for lconf to avoid repeated DB scans (6204) - Vanilla 2.4.29 support + - increase maximum number of obd devices to 520 (6242) 2005-03-22 Cluster File Systems, Inc. * version 1.4.1 diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c index 1b663e9..0f8549d 100644 --- a/lustre/obdclass/lprocfs_status.c +++ b/lustre/obdclass/lprocfs_status.c @@ -335,8 +335,9 @@ int lprocfs_rd_server_uuid(char *page, char **start, off_t off, int count, imp = obd->u.cli.cl_import; imp_state_name = ptlrpc_import_state_name(imp->imp_state); *eof = 1; - return snprintf(page, count, "%s\t%s\n", - imp->imp_target_uuid.uuid, imp_state_name); + return snprintf(page, count, "%s\t%s%s\n", + imp->imp_target_uuid.uuid, imp_state_name, + imp->imp_deactive ? "\tDEACTIVATED" : ""); } int lprocfs_rd_conn_uuid(char *page, char **start, off_t off, int count, diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index df39056..732ee37 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -305,9 +305,8 @@ static int import_select_connection(struct obd_import *imp) class_export_put(dlmexp); imp->imp_conn_current = imp_conn; - CWARN("%s: Using connection %s\n", - imp->imp_obd->obd_name, - imp_conn->oic_uuid.uuid); + CDEBUG(D_HA, "%s: import %p using connection %s\n", + imp->imp_obd->obd_name, imp, imp_conn->oic_uuid.uuid); spin_unlock(&imp->imp_lock); RETURN(0); diff --git a/lustre/ptlrpc/pinger.c b/lustre/ptlrpc/pinger.c index 4b79c69..26ad632 100644 --- a/lustre/ptlrpc/pinger.c +++ b/lustre/ptlrpc/pinger.c @@ -115,9 +115,12 @@ static int ptlrpc_pinger_main(void *arg) spin_lock_irqsave(&imp->imp_lock, flags); level = imp->imp_state; force = imp->imp_force_verify; - if (force) - imp->imp_force_verify = 0; + imp->imp_force_verify = 0; spin_unlock_irqrestore(&imp->imp_lock, flags); + CDEBUG(level == LUSTRE_IMP_FULL ? D_INFO : D_HA, + "level %s/%u force %u deactive %u pingable %u\n", + ptlrpc_import_state_name(level), level, + force, imp->imp_deactive, imp->imp_pingable); if (force || /* if the next ping is within, say, 5 jiffies from @@ -127,26 +130,26 @@ static int ptlrpc_pinger_main(void *arg) !imp->imp_deactive) { /* wait at least a timeout before trying recovery again. */ - imp->imp_next_ping = jiffies + + imp->imp_next_ping = jiffies + obd_timeout * HZ; ptlrpc_initiate_recovery(imp); - } - else if (level != LUSTRE_IMP_FULL || + } else if (level != LUSTRE_IMP_FULL || imp->imp_obd->obd_no_recov) { - CDEBUG(D_HA, - "not pinging %s (in recovery " - "or recovery disabled: %s)\n", + CDEBUG(D_HA, "not pinging %s " + "(in recovery: %s or recovery " + "disabled: %u/%u)\n", imp->imp_target_uuid.uuid, - ptlrpc_import_state_name(level)); - } - else if (imp->imp_pingable || force) { + ptlrpc_import_state_name(level), + imp->imp_deactive, + imp->imp_obd->obd_no_recov); + } else if (imp->imp_pingable || force) { ptlrpc_ping(imp); } } else { - if (!imp->imp_pingable) + if (!imp->imp_pingable) continue; - CDEBUG(D_HA, + CDEBUG(D_HA, "don't need to ping %s (%lu > %lu)\n", imp->imp_target_uuid.uuid, imp->imp_next_ping, this_ping); @@ -162,15 +165,16 @@ static int ptlrpc_pinger_main(void *arg) /* Wait until the next ping time, or until we're stopped. */ time_to_next_ping = this_ping + (PING_INTERVAL * HZ) - jiffies; /* The ping sent by ptlrpc_send_rpc may get sent out - say .01 second after this. + say .01 second after this. ptlrpc_pinger_sending_on_import will then set the - next ping time to next_ping + .01 sec, which means + next ping time to next_ping + .01 sec, which means we will SKIP the next ping at next_ping, and the ping will get sent 2 timeouts from now! Beware. */ CDEBUG(D_HA, "next ping in %lu (%lu)\n", time_to_next_ping, this_ping + PING_INTERVAL * HZ); if (time_to_next_ping > 0) { - lwi = LWI_TIMEOUT(time_to_next_ping, NULL, NULL); + lwi = LWI_TIMEOUT(max_t(long, time_to_next_ping, HZ), + NULL, NULL); l_wait_event(thread->t_ctl_waitq, thread->t_flags & (SVC_STOPPING|SVC_EVENT), &lwi); diff --git a/lustre/ptlrpc/recover.c b/lustre/ptlrpc/recover.c index a5c9e21..2075c47 100644 --- a/lustre/ptlrpc/recover.c +++ b/lustre/ptlrpc/recover.c @@ -318,9 +318,11 @@ int ptlrpc_set_import_active(struct obd_import *imp, int active) /* When deactivating, mark import invalid, and abort in-flight * requests. */ if (!active) { + CWARN("setting import %s INACTIVE by administrator request\n", + imp->imp_target_uuid.uuid); ptlrpc_invalidate_import(imp); imp->imp_deactive = 1; - } + } /* When activating, mark import valid, and attempt recovery */ if (active) { @@ -341,6 +343,7 @@ int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid) /* force import to be disconnected. */ ptlrpc_set_import_discon(imp); + imp->imp_deactive = 0; rc = ptlrpc_recover_import_no_retry(imp, new_uuid); RETURN(rc);