Whamcloud - gitweb
b=11658
authorgreen <green>
Wed, 16 May 2007 20:55:21 +0000 (20:55 +0000)
committergreen <green>
Wed, 16 May 2007 20:55:21 +0000 (20:55 +0000)
r=wangdi,adigler

Take import reference before releasing llog record semaphore

lustre/ChangeLog
lustre/include/obd_support.h
lustre/ptlrpc/recov_thread.c
lustre/tests/replay-single.sh

index 6284164..c698c04 100644 (file)
@@ -109,6 +109,11 @@ behaviour.
 Details    : This will achieve local-only flock/fcntl locks
             coherentness.
 
+Severity   : minor
+Frequency  : rare
+Bugzilla   : 11658
+Description: log_commit_thread vs filter_destroy race leads to crash
+Details    : Take import reference before releasing llog record semaphore
 
 --------------------------------------------------------------------------------
 
index ee2c6ba..c514d16 100644 (file)
@@ -169,6 +169,7 @@ extern int obd_race_state;
 #define OBD_FAIL_PTLRPC_BULK_PUT_NET     0x504
 #define OBD_FAIL_PTLRPC_DROP_RPC         0x505
 #define OBD_FAIL_PTLRPC_DELAY_SEND       0x506
+#define OBD_FAIL_PTLRPC_DELAY_RECOV      0x507
 
 #define OBD_FAIL_OBD_PING_NET            0x600
 #define OBD_FAIL_OBD_LOG_CANCEL_NET      0x601
index 2355264..b74c72e 100644 (file)
@@ -222,6 +222,7 @@ static int log_commit_thread(void *arg)
         struct llog_commit_master *lcm = arg;
         struct llog_commit_daemon *lcd;
         struct llog_canceld_ctxt *llcd, *n;
+        struct obd_import *import = NULL;
         ENTRY;
 
         OBD_ALLOC(lcd, sizeof(*lcd));
@@ -243,10 +244,13 @@ static int log_commit_thread(void *arg)
         CDEBUG(D_HA, "%s started\n", cfs_curproc_comm());
         do {
                 struct ptlrpc_request *request;
-                struct obd_import *import = NULL;
                 struct list_head *sending_list;
                 int rc = 0;
 
+                if (import)
+                        class_import_put(import);
+                import = NULL;
+
                 /* If we do not have enough pages available, allocate some */
                 while (atomic_read(&lcm->lcm_llcd_numfree) <
                        lcm->lcm_llcd_minfree) {
@@ -272,6 +276,8 @@ static int log_commit_thread(void *arg)
 
                 sending_list = &lcm->lcm_llcd_pending;
         resend:
+                if (import)
+                        class_import_put(import);
                 import = NULL;
                 if (lcm->lcm_flags & LLOG_LCM_FL_EXIT) {
                         lcm->lcm_llcd_maxfree = 0;
@@ -301,6 +307,8 @@ static int log_commit_thread(void *arg)
                                           typeof(*llcd), llcd_list);
                         LASSERT(llcd->llcd_lcm == lcm);
                         import = llcd->llcd_ctxt->loc_imp;
+                        if (import)
+                                class_import_get(import);
                 }
                 list_for_each_entry_safe(llcd, n, sending_list, llcd_list) {
                         LASSERT(llcd->llcd_lcm == lcm);
@@ -351,6 +359,8 @@ static int log_commit_thread(void *arg)
                                 continue;
                         }
 
+                        OBD_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_DELAY_RECOV, 10);
+
                         request = ptlrpc_prep_req(import, LUSTRE_LOG_VERSION,
                                                   OBD_LOG_CANCEL, 2, size,bufs);
                         if (request == NULL) {
@@ -404,6 +414,9 @@ static int log_commit_thread(void *arg)
                 }
         } while(1);
 
+        if (import)
+                class_import_put(import);
+
         /* If we are force exiting, just drop all of the cookies. */
         if (lcm->lcm_flags & LLOG_LCM_FL_EXIT_FORCE) {
                 spin_lock(&lcm->lcm_llcd_lock);
index 1d385f8..2a806e4 100755 (executable)
@@ -1122,5 +1122,23 @@ test_58() {
 }
 run_test 58 "test recovery from llog for setattr op (test llog_gen_rec)"
 
+# log_commit_thread vs filter_destroy race used to lead to import use after free
+# bug 11658
+test_59() {
+    mkdir $DIR/$tdir
+    createmany -o $DIR/$tdir/$tfile-%d 200
+    sync
+    unlinkmany $DIR/$tdir/$tfile-%d 200
+#define OBD_FAIL_PTLRPC_DELAY_RECOV       0x507
+    do_facet ost "sysctl -w lustre.fail_loc=0x507"
+    fail ost
+    fail mds
+    do_facet ost "sysctl -w lustre.fail_loc=0x0"
+    sleep 20
+    rmdir $DIR/$tdir
+}
+run_test 59 "test log_commit_thread vs filter_destroy race"
+
+
 equals_msg `basename $0`: test complete, cleaning up
 $CLEANUP