Whamcloud - gitweb
b=6377
authorjacob <jacob>
Fri, 27 May 2005 21:34:59 +0000 (21:34 +0000)
committerjacob <jacob>
Fri, 27 May 2005 21:34:59 +0000 (21:34 +0000)
r=adilger

Use obd_recovering instead of obd_recoverable_clients to check if the
obd is in recovery.

abort_recovery didn't set obd_recoverable_clients to 0 (which it now
does), so if recovery was aborted, the server would never ping-evict
clients (which can lead to extra exports, possibly requiring recovery
to be aborted).

lustre/include/linux/obd_class.h
lustre/ldlm/ldlm_lib.c
lustre/obdclass/genops.c
lustre/obdclass/obd_config.c

index 00d3e86..f306d12 100644 (file)
@@ -80,6 +80,9 @@ void oig_complete_one(struct obd_io_group *oig,
 void oig_release(struct obd_io_group *oig);
 int oig_wait(struct obd_io_group *oig);
 
+/* buf should be len PTL_NALFMT_SIZE */
+char *obd_export_nid2str(struct obd_export *exp, char *buf);
+
 /* config.c */
 int class_process_config(struct lustre_cfg *lcfg);
 int class_attach(struct lustre_cfg *lcfg);
index 7b3706e..0db65ce 100644 (file)
@@ -886,6 +886,7 @@ void target_abort_recovery(void *data)
                 return;
         }
         obd->obd_recovering = obd->obd_abort_recovery = 0;
+        obd->obd_recoverable_clients = 0;
         target_cancel_recovery_timer(obd);
         spin_unlock_bh(&obd->obd_processing_task_lock);
 
index a109d1a..cb8b088 100644 (file)
@@ -1076,27 +1076,15 @@ static int ping_evictor_main(void *arg)
 
                         if (expire_time > exp->exp_last_request_time) {
                                 char ipbuf[PTL_NALFMT_SIZE];
-                                struct ptlrpc_peer *peer;
-
-                                peer = exp->exp_connection ?
-                                        &exp->exp_connection->c_peer : NULL;
-
-                                if (peer && peer->peer_ni) {
-                                        portals_nid2str(peer->peer_ni->pni_number,
-                                                        peer->peer_id.nid,
-                                                        ipbuf);
-                                }
 
                                 LCONSOLE_WARN("%s hasn't heard from %s in %ld "
                                               "seconds.  I think it's dead, "
                                               "and I am evicting it.\n",
                                               obd->obd_name,
-                                              (peer && peer->peer_ni) ?
-                                              ipbuf :
-                                              (char *)exp->exp_client_uuid.uuid,
-                                              (long)(CURRENT_SECONDS -
-                                                   exp->exp_last_request_time));
-
+                                              obd_export_nid2str(exp, ipbuf),
+                                              (long)(CURRENT_SECONDS - 
+                                                     exp->exp_last_request_time));
+                                
                                 ping_evictor_fail_export(exp);
                         } else {
                                 /* List is sorted, so everyone below is ok */
@@ -1155,6 +1143,9 @@ void class_update_export_timer(struct obd_export *exp, time_t extra_delay)
 {
         struct obd_export *oldest_exp;
         time_t oldest_time;
+
+        ENTRY;
+
         LASSERT(exp);
 
         /* Compensate for slow machines, etc, by faking our request time
@@ -1177,6 +1168,7 @@ void class_update_export_timer(struct obd_export *exp, time_t extra_delay)
         if (list_empty(&exp->exp_obd_chain_timed)) {
                 /* this one is not timed */
                 spin_unlock(&exp->exp_obd->obd_dev_lock);
+                EXIT;
                 return;
         }
 
@@ -1187,9 +1179,11 @@ void class_update_export_timer(struct obd_export *exp, time_t extra_delay)
         oldest_time = oldest_exp->exp_last_request_time;
         spin_unlock(&exp->exp_obd->obd_dev_lock);
 
-        if (exp->exp_obd->obd_recoverable_clients > 0)
+        if (exp->exp_obd->obd_recovering) {
                 /* be nice to everyone during recovery */
+                EXIT;
                 return;
+        }
 
         /* Note - racing to start/reset the obd_eviction timer is safe */
         if (exp->exp_obd->obd_eviction_timer == 0) {
@@ -1216,5 +1210,25 @@ void class_update_export_timer(struct obd_export *exp, time_t extra_delay)
                                 exp->exp_obd->obd_eviction_timer = 0;
                 }
         }
+
+        EXIT;
 }
 
+char *obd_export_nid2str(struct obd_export *exp, char *ipbuf)
+{
+        struct ptlrpc_peer *peer;
+        
+        peer = exp->exp_connection 
+                ? &exp->exp_connection->c_peer
+                : NULL;
+        
+        if (peer && peer->peer_ni) {
+                portals_nid2str(peer->peer_ni->pni_number,
+                                peer->peer_id.nid,
+                                ipbuf);
+        } else {
+                snprintf(ipbuf, PTL_NALFMT_SIZE, "(no nid)");
+        }
+
+        return ipbuf;
+}
index 409964c..a012b65 100644 (file)
@@ -266,6 +266,7 @@ int class_detach(struct obd_device *obd, struct lustre_cfg *lcfg)
 static void dump_exports(struct obd_device *obd)
 {
         struct obd_export *exp, *n;
+        char ipbuf[PTL_NALFMT_SIZE];
 
         list_for_each_entry_safe(exp, n, &obd->obd_exports, exp_obd_chain) {
                 struct ptlrpc_reply_state *rs;
@@ -279,8 +280,9 @@ static void dump_exports(struct obd_device *obd)
                         nreplies++;
                 }
 
-                CDEBUG(D_IOCTL, "%s: %p %s %d %d %d: %p %s\n",
+                CDEBUG(D_IOCTL, "%s: %p %s %s %d %d %d: %p %s\n",
                        obd->obd_name, exp, exp->exp_client_uuid.uuid,
+                       obd_export_nid2str(exp, ipbuf),
                        atomic_read(&exp->exp_refcount),
                        exp->exp_failed, nreplies, first_reply,
                        nreplies > 3 ? "..." : "");