Whamcloud - gitweb
LU-1522 recovery: rework LU-1166 patch in different way
authorMikhail Pershin <tappro@whamcloud.com>
Sun, 17 Jun 2012 11:08:36 +0000 (15:08 +0400)
committerOleg Drokin <green@whamcloud.com>
Tue, 31 Jul 2012 16:06:28 +0000 (12:06 -0400)
Dropping recovery counters upon last export put caused LU-1522 issue,
return class_export_recovery_cleanup() back to the
class_export_disconnect() and use exp_failed flag to avoid race
between target_handle_connect() and class_disconnect_stale_exports()

Signed-off-by: Mikhail Pershin <tappro@whamcloud.com>
Change-Id: I78c19a8d49786877d2de27c82bf40ebec494f044
Reviewed-on: http://review.whamcloud.com/3122
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: wangdi <di.wang@whamcloud.com>
Reviewed-by: Alex Zhuravlev <bzzz@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/ldlm/ldlm_lib.c
lustre/obdclass/genops.c

index 19883b2..290005e 100644 (file)
@@ -1107,14 +1107,18 @@ dont_check_exports:
                              &export->exp_connection->c_peer.nid,
                              &export->exp_nid_hash);
         }
                              &export->exp_connection->c_peer.nid,
                              &export->exp_nid_hash);
         }
-        /**
-          class_disconnect->class_export_recovery_cleanup() race
-         */
+
         if (target->obd_recovering && !export->exp_in_recovery) {
                 int has_transno;
                 __u64 transno = data->ocd_transno;
 
                 cfs_spin_lock(&export->exp_lock);
         if (target->obd_recovering && !export->exp_in_recovery) {
                 int has_transno;
                 __u64 transno = data->ocd_transno;
 
                 cfs_spin_lock(&export->exp_lock);
+               /* possible race with class_disconnect_stale_exports,
+                * export may be already in the eviction process */
+               if (export->exp_failed) {
+                       cfs_spin_unlock(&export->exp_lock);
+                       GOTO(out, rc = -ENODEV);
+               }
                 export->exp_in_recovery = 1;
                 export->exp_req_replay_needed = 1;
                 export->exp_lock_replay_needed = 1;
                 export->exp_in_recovery = 1;
                 export->exp_req_replay_needed = 1;
                 export->exp_lock_replay_needed = 1;
index 5d2547a..57e612a 100644 (file)
@@ -731,53 +731,17 @@ struct obd_import *class_conn2cliimp(struct lustre_handle *conn)
 EXPORT_SYMBOL(class_conn2cliimp);
 
 /* Export management functions */
 EXPORT_SYMBOL(class_conn2cliimp);
 
 /* Export management functions */
-
-/* if export is involved in recovery then clean up related things */
-void class_export_recovery_cleanup(struct obd_export *exp)
-{
-        struct obd_device *obd = exp->exp_obd;
-
-        cfs_spin_lock(&obd->obd_recovery_task_lock);
-        if (exp->exp_delayed)
-                obd->obd_delayed_clients--;
-        if (obd->obd_recovering && exp->exp_in_recovery) {
-                cfs_spin_lock(&exp->exp_lock);
-                exp->exp_in_recovery = 0;
-                cfs_spin_unlock(&exp->exp_lock);
-                LASSERT_ATOMIC_POS(&obd->obd_connected_clients);
-                cfs_atomic_dec(&obd->obd_connected_clients);
-        }
-        cfs_spin_unlock(&obd->obd_recovery_task_lock);
-        /** Cleanup req replay fields */
-        if (exp->exp_req_replay_needed) {
-                cfs_spin_lock(&exp->exp_lock);
-                exp->exp_req_replay_needed = 0;
-                cfs_spin_unlock(&exp->exp_lock);
-                LASSERT(cfs_atomic_read(&obd->obd_req_replay_clients));
-                cfs_atomic_dec(&obd->obd_req_replay_clients);
-        }
-        /** Cleanup lock replay data */
-        if (exp->exp_lock_replay_needed) {
-                cfs_spin_lock(&exp->exp_lock);
-                exp->exp_lock_replay_needed = 0;
-                cfs_spin_unlock(&exp->exp_lock);
-                LASSERT(cfs_atomic_read(&obd->obd_lock_replay_clients));
-                cfs_atomic_dec(&obd->obd_lock_replay_clients);
-        }
-}
-
 static void class_export_destroy(struct obd_export *exp)
 {
         struct obd_device *obd = exp->exp_obd;
         ENTRY;
 
         LASSERT_ATOMIC_ZERO(&exp->exp_refcount);
 static void class_export_destroy(struct obd_export *exp)
 {
         struct obd_device *obd = exp->exp_obd;
         ENTRY;
 
         LASSERT_ATOMIC_ZERO(&exp->exp_refcount);
-        LASSERT(obd != NULL);
+       LASSERT(obd != NULL);
 
         CDEBUG(D_IOCTL, "destroying export %p/%s for %s\n", exp,
                exp->exp_client_uuid.uuid, obd->obd_name);
 
 
         CDEBUG(D_IOCTL, "destroying export %p/%s for %s\n", exp,
                exp->exp_client_uuid.uuid, obd->obd_name);
 
-
         /* "Local" exports (lctl, LOV->{mdc,osc}) have no connection. */
         if (exp->exp_connection)
                 ptlrpc_put_connection_superhack(exp->exp_connection);
         /* "Local" exports (lctl, LOV->{mdc,osc}) have no connection. */
         if (exp->exp_connection)
                 ptlrpc_put_connection_superhack(exp->exp_connection);
@@ -826,7 +790,6 @@ void class_export_put(struct obd_export *exp)
 
                 /* release nid stat refererence */
                 lprocfs_exp_cleanup(exp);
 
                 /* release nid stat refererence */
                 lprocfs_exp_cleanup(exp);
-                class_export_recovery_cleanup(exp);
 
                 obd_zombie_export_add(exp);
         }
 
                 obd_zombie_export_add(exp);
         }
@@ -1147,6 +1110,39 @@ int class_connect(struct lustre_handle *conn, struct obd_device *obd,
 }
 EXPORT_SYMBOL(class_connect);
 
 }
 EXPORT_SYMBOL(class_connect);
 
+/* if export is involved in recovery then clean up related things */
+void class_export_recovery_cleanup(struct obd_export *exp)
+{
+        struct obd_device *obd = exp->exp_obd;
+
+        cfs_spin_lock(&obd->obd_recovery_task_lock);
+        if (exp->exp_delayed)
+                obd->obd_delayed_clients--;
+        if (obd->obd_recovering && exp->exp_in_recovery) {
+                cfs_spin_lock(&exp->exp_lock);
+                exp->exp_in_recovery = 0;
+                cfs_spin_unlock(&exp->exp_lock);
+                LASSERT_ATOMIC_POS(&obd->obd_connected_clients);
+                cfs_atomic_dec(&obd->obd_connected_clients);
+        }
+        cfs_spin_unlock(&obd->obd_recovery_task_lock);
+        /** Cleanup req replay fields */
+        if (exp->exp_req_replay_needed) {
+                cfs_spin_lock(&exp->exp_lock);
+                exp->exp_req_replay_needed = 0;
+                cfs_spin_unlock(&exp->exp_lock);
+                LASSERT(cfs_atomic_read(&obd->obd_req_replay_clients));
+                cfs_atomic_dec(&obd->obd_req_replay_clients);
+        }
+        /** Cleanup lock replay data */
+        if (exp->exp_lock_replay_needed) {
+                cfs_spin_lock(&exp->exp_lock);
+                exp->exp_lock_replay_needed = 0;
+                cfs_spin_unlock(&exp->exp_lock);
+                LASSERT(cfs_atomic_read(&obd->obd_lock_replay_clients));
+                cfs_atomic_dec(&obd->obd_lock_replay_clients);
+        }
+}
 
 /* This function removes 1-3 references from the export:
  * 1 - for export pointer passed
 
 /* This function removes 1-3 references from the export:
  * 1 - for export pointer passed
@@ -1185,6 +1181,7 @@ int class_disconnect(struct obd_export *export)
                              &export->exp_connection->c_peer.nid,
                              &export->exp_nid_hash);
 
                              &export->exp_connection->c_peer.nid,
                              &export->exp_nid_hash);
 
+        class_export_recovery_cleanup(export);
         class_unlink_export(export);
 no_disconn:
         class_export_put(export);
         class_unlink_export(export);
 no_disconn:
         class_export_put(export);
@@ -1282,32 +1279,26 @@ void class_disconnect_stale_exports(struct obd_device *obd,
                                     int (*test_export)(struct obd_export *))
 {
         cfs_list_t work_list;
                                     int (*test_export)(struct obd_export *))
 {
         cfs_list_t work_list;
-        cfs_list_t *pos, *n;
-        struct obd_export *exp;
+       struct obd_export *exp, *n;
         int evicted = 0;
         ENTRY;
 
         CFS_INIT_LIST_HEAD(&work_list);
         cfs_spin_lock(&obd->obd_dev_lock);
         int evicted = 0;
         ENTRY;
 
         CFS_INIT_LIST_HEAD(&work_list);
         cfs_spin_lock(&obd->obd_dev_lock);
-        cfs_list_for_each_safe(pos, n, &obd->obd_exports) {
-                int failed;
-
-                exp = cfs_list_entry(pos, struct obd_export, exp_obd_chain);
-
+       cfs_list_for_each_entry_safe(exp, n, &obd->obd_exports,
+                                    exp_obd_chain) {
                 /* don't count self-export as client */
                 if (obd_uuid_equals(&exp->exp_client_uuid,
                                     &exp->exp_obd->obd_uuid))
                         continue;
 
                 /* don't count self-export as client */
                 if (obd_uuid_equals(&exp->exp_client_uuid,
                                     &exp->exp_obd->obd_uuid))
                         continue;
 
-                if (test_export(exp))
-                        continue;
-
-                cfs_spin_lock(&exp->exp_lock);
-                failed = exp->exp_failed;
-                exp->exp_failed = 1;
-                cfs_spin_unlock(&exp->exp_lock);
-                if (failed)
-                        continue;
+               cfs_spin_lock(&exp->exp_lock);
+               if (test_export(exp)) {
+                       cfs_spin_unlock(&exp->exp_lock);
+                       continue;
+               }
+               exp->exp_failed = 1;
+               cfs_spin_unlock(&exp->exp_lock);
 
                 cfs_list_move(&exp->exp_obd_chain, &work_list);
                 evicted++;
 
                 cfs_list_move(&exp->exp_obd_chain, &work_list);
                 evicted++;
@@ -1351,8 +1342,8 @@ void class_fail_export(struct obd_export *exp)
         if (obd_dump_on_timeout)
                 libcfs_debug_dumplog();
 
         if (obd_dump_on_timeout)
                 libcfs_debug_dumplog();
 
-        /* need for safe call CDEBUG after obd_disconnect */
-        class_export_get(exp);
+       /* need for safe call CDEBUG after obd_disconnect */
+       class_export_get(exp);
 
         /* Most callers into obd_disconnect are removing their own reference
          * (request, for example) in addition to the one from the hash table.
 
         /* Most callers into obd_disconnect are removing their own reference
          * (request, for example) in addition to the one from the hash table.
@@ -1364,7 +1355,7 @@ void class_fail_export(struct obd_export *exp)
         else
                 CDEBUG(D_HA, "disconnected export %p/%s\n",
                        exp, exp->exp_client_uuid.uuid);
         else
                 CDEBUG(D_HA, "disconnected export %p/%s\n",
                        exp, exp->exp_client_uuid.uuid);
-        class_export_put(exp);
+       class_export_put(exp);
 }
 EXPORT_SYMBOL(class_fail_export);
 
 }
 EXPORT_SYMBOL(class_fail_export);