Whamcloud - gitweb
Branch: b1_4
authoradilger <adilger>
Thu, 5 May 2005 17:44:17 +0000 (17:44 +0000)
committeradilger <adilger>
Thu, 5 May 2005 17:44:17 +0000 (17:44 +0000)
Using 'lctl recover' should mark a device active if it was previously
deactivated.
Add some debugging to ptlrpc_pinger_main() to help analyse bug 5933.
b=5933
r=nathan

lustre/ChangeLog
lustre/obdclass/lprocfs_status.c
lustre/ptlrpc/import.c
lustre/ptlrpc/pinger.c
lustre/ptlrpc/recover.c

index 7a76182..529b618 100644 (file)
@@ -29,6 +29,7 @@ tbd         Cluster File Systems, Inc. <info@clusterfs.com>
        - don't reference lr_lvb_data until after we hold lr_lvb_sem (6170)
        - don't overwrite last_rcvd if there is a *_client_add() error (6068)
        - Correctly handle reads of files with no objects (6243)
+       - lctl recover will also mark a device active if deactivate used (5933)
        * miscellania
        - by default create 1 inode per 4kB space on MDS, per 16kB on OSTs
        - allow --write-conf on an MDS with different nettype than client (5619)
@@ -50,6 +51,7 @@ tbd         Cluster File Systems, Inc. <info@clusterfs.com>
        - added --disable-server and --disable-client configure options (5782)
        - introduce a lookup cache for lconf to avoid repeated DB scans (6204)
        - Vanilla 2.4.29 support
+       - increase maximum number of obd devices to 520 (6242)
 
 2005-03-22  Cluster File Systems, Inc. <info@clusterfs.com>
        * version 1.4.1
index 1b663e9..0f8549d 100644 (file)
@@ -335,8 +335,9 @@ int lprocfs_rd_server_uuid(char *page, char **start, off_t off, int count,
         imp = obd->u.cli.cl_import;
         imp_state_name = ptlrpc_import_state_name(imp->imp_state);
         *eof = 1;
-        return snprintf(page, count, "%s\t%s\n",
-                        imp->imp_target_uuid.uuid, imp_state_name);
+        return snprintf(page, count, "%s\t%s%s\n",
+                        imp->imp_target_uuid.uuid, imp_state_name,
+                        imp->imp_deactive ? "\tDEACTIVATED" : "");
 }
 
 int lprocfs_rd_conn_uuid(char *page, char **start, off_t off, int count,
index df39056..732ee37 100644 (file)
@@ -305,9 +305,8 @@ static int import_select_connection(struct obd_import *imp)
         class_export_put(dlmexp);
 
         imp->imp_conn_current = imp_conn;
-        CWARN("%s: Using connection %s\n",
-               imp->imp_obd->obd_name,
-               imp_conn->oic_uuid.uuid);
+        CDEBUG(D_HA, "%s: import %p using connection %s\n",
+               imp->imp_obd->obd_name, imp, imp_conn->oic_uuid.uuid);
         spin_unlock(&imp->imp_lock);
 
         RETURN(0);
index 4b79c69..26ad632 100644 (file)
@@ -115,9 +115,12 @@ static int ptlrpc_pinger_main(void *arg)
                         spin_lock_irqsave(&imp->imp_lock, flags);
                         level = imp->imp_state;
                         force = imp->imp_force_verify;
-                        if (force)
-                                imp->imp_force_verify = 0;
+                        imp->imp_force_verify = 0;
                         spin_unlock_irqrestore(&imp->imp_lock, flags);
+                        CDEBUG(level == LUSTRE_IMP_FULL ? D_INFO : D_HA,
+                               "level %s/%u force %u deactive %u pingable %u\n",
+                               ptlrpc_import_state_name(level), level,
+                               force, imp->imp_deactive, imp->imp_pingable);
 
                         if (force ||
                             /* if the next ping is within, say, 5 jiffies from
@@ -127,26 +130,26 @@ static int ptlrpc_pinger_main(void *arg)
                                     !imp->imp_deactive) {
                                         /* wait at least a timeout before
                                            trying recovery again. */
-                                        imp->imp_next_ping = jiffies + 
+                                        imp->imp_next_ping = jiffies +
                                                 obd_timeout * HZ;
                                         ptlrpc_initiate_recovery(imp);
-                                }
-                                else if (level != LUSTRE_IMP_FULL ||
+                                } else if (level != LUSTRE_IMP_FULL ||
                                          imp->imp_obd->obd_no_recov) {
-                                        CDEBUG(D_HA,
-                                               "not pinging %s (in recovery "
-                                               "or recovery disabled: %s)\n",
+                                        CDEBUG(D_HA, "not pinging %s "
+                                               "(in recovery: %s or recovery "
+                                               "disabled: %u/%u)\n",
                                                imp->imp_target_uuid.uuid,
-                                               ptlrpc_import_state_name(level));
-                                }
-                                else if (imp->imp_pingable || force) {
+                                               ptlrpc_import_state_name(level),
+                                               imp->imp_deactive,
+                                               imp->imp_obd->obd_no_recov);
+                                } else if (imp->imp_pingable || force) {
                                         ptlrpc_ping(imp);
                                 }
 
                         } else {
-                                if (!imp->imp_pingable) 
+                                if (!imp->imp_pingable)
                                         continue;
-                                CDEBUG(D_HA, 
+                                CDEBUG(D_HA,
                                        "don't need to ping %s (%lu > %lu)\n",
                                        imp->imp_target_uuid.uuid,
                                        imp->imp_next_ping, this_ping);
@@ -162,15 +165,16 @@ static int ptlrpc_pinger_main(void *arg)
                 /* Wait until the next ping time, or until we're stopped. */
                 time_to_next_ping = this_ping + (PING_INTERVAL * HZ) - jiffies;
                 /* The ping sent by ptlrpc_send_rpc may get sent out
-                   say .01 second after this.  
+                   say .01 second after this.
                    ptlrpc_pinger_sending_on_import will then set the
-                   next ping time to next_ping + .01 sec, which means 
+                   next ping time to next_ping + .01 sec, which means
                    we will SKIP the next ping at next_ping, and the
                    ping will get sent 2 timeouts from now!  Beware. */
                 CDEBUG(D_HA, "next ping in %lu (%lu)\n", time_to_next_ping,
                        this_ping + PING_INTERVAL * HZ);
                 if (time_to_next_ping > 0) {
-                        lwi = LWI_TIMEOUT(time_to_next_ping, NULL, NULL);
+                        lwi = LWI_TIMEOUT(max_t(long, time_to_next_ping, HZ),
+                                          NULL, NULL);
                         l_wait_event(thread->t_ctl_waitq,
                                      thread->t_flags & (SVC_STOPPING|SVC_EVENT),
                                      &lwi);
index a5c9e21..2075c47 100644 (file)
@@ -318,9 +318,11 @@ int ptlrpc_set_import_active(struct obd_import *imp, int active)
         /* When deactivating, mark import invalid, and abort in-flight
          * requests. */
         if (!active) {
+                CWARN("setting import %s INACTIVE by administrator request\n",
+                      imp->imp_target_uuid.uuid);
                 ptlrpc_invalidate_import(imp);
                 imp->imp_deactive = 1;
-        } 
+        }
 
         /* When activating, mark import valid, and attempt recovery */
         if (active) {
@@ -341,6 +343,7 @@ int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid)
         /* force import to be disconnected. */
         ptlrpc_set_import_discon(imp);
 
+        imp->imp_deactive = 0;
         rc = ptlrpc_recover_import_no_retry(imp, new_uuid);
 
         RETURN(rc);