Whamcloud - gitweb
LU-8562 osp: fix precreate_cleanup_orphans/precreate_reserve race 11/22211/7
authorSergey Cheremencev <sergey.cheremencev@seagate.com>
Thu, 10 Nov 2016 15:30:53 +0000 (23:30 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Fri, 23 Dec 2016 05:04:38 +0000 (05:04 +0000)
osp_statfs_interpret can clear error in opd_pre_status despite of the
fact that osp_precreate_cleanup_orphans got error and doesn't know
exactly OST object last_id. Example:
1. mdt sends req "create objects x..y"
2. objects created. mdt gets OK
3. MDT->OST reconnection
4. MDT sends cleanup_orphans last_used_fid=x
5. OST removes x..y and sends reply OK and last_id=x
6. MDT->OST connection aborted. cleanup_orphans exits with EIO
7. osp_statfs_interpret changes opd_pre_status from EIO to 0
8. osp_precreate_reserve reserves object and changes last_used_id from x to x+1
9. connection restored. MDT sends cleanup_orphans last_id=x+1
In fine OST has a gap - object x was removed by cleanup_orphans.

So don't clear opd_pre_recovering until we get last_id from OST.

Change-Id: I1647053fdab9a0c9bf59a048b0814e7b2dec52f2
Signed-off-by: Sergey Cheremencev <sergey.cheremencev@seagate.com>
Seagate-bug-id: MRP-3693
Reviewed-on: https://review.whamcloud.com/22211
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Reviewed-by: John L. Hammond <john.hammond@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/osp/osp_precreate.c
lustre/tests/conf-sanity.sh

index 6910860..be02e5a 100644 (file)
@@ -901,10 +901,6 @@ out:
        if (req)
                ptlrpc_req_finished(req);
 
        if (req)
                ptlrpc_req_finished(req);
 
-       spin_lock(&d->opd_pre_lock);
-       d->opd_pre_recovering = 0;
-       spin_unlock(&d->opd_pre_lock);
-
        /*
         * If rc is zero, the pre-creation window should have been emptied.
         * Since waking up the herd would be useless without pre-created
        /*
         * If rc is zero, the pre-creation window should have been emptied.
         * Since waking up the herd would be useless without pre-created
@@ -923,6 +919,10 @@ out:
                } else {
                        wake_up(&d->opd_pre_user_waitq);
                }
                } else {
                        wake_up(&d->opd_pre_user_waitq);
                }
+       } else {
+               spin_lock(&d->opd_pre_lock);
+               d->opd_pre_recovering = 0;
+               spin_unlock(&d->opd_pre_lock);
        }
 
        RETURN(rc);
        }
 
        RETURN(rc);
@@ -1161,6 +1161,9 @@ static int osp_precreate_thread(void *_arg)
                 * need to be connected to OST
                 */
                while (osp_precreate_running(d)) {
                 * need to be connected to OST
                 */
                while (osp_precreate_running(d)) {
+                       if (d->opd_pre_recovering &&
+                           d->opd_imp_connected)
+                               break;
                        l_wait_event(d->opd_pre_waitq,
                                     !osp_precreate_running(d) ||
                                     d->opd_new_connection,
                        l_wait_event(d->opd_pre_waitq,
                                     !osp_precreate_running(d) ||
                                     d->opd_new_connection,
@@ -1202,8 +1205,10 @@ static int osp_precreate_thread(void *_arg)
                 * Clean up orphans or recreate missing objects.
                 */
                rc = osp_precreate_cleanup_orphans(&env, d);
                 * Clean up orphans or recreate missing objects.
                 */
                rc = osp_precreate_cleanup_orphans(&env, d);
-               if (rc != 0)
+               if (rc != 0) {
+                       schedule_timeout_interruptible(cfs_time_seconds(1));
                        continue;
                        continue;
+               }
                /*
                 * connected, can handle precreates now
                 */
                /*
                 * connected, can handle precreates now
                 */
index 95b7892..d9a9585 100755 (executable)
@@ -7014,6 +7014,32 @@ test_100() {
 }
 run_test 100 "check lshowmount lists MGS, MDT, OST and 0@lo"
 
 }
 run_test 100 "check lshowmount lists MGS, MDT, OST and 0@lo"
 
+test_101() {
+       local createmany_oid
+       local dev=$FSNAME-OST0000-osc-MDT0000
+       setup
+
+       createmany -o $DIR1/$tfile-%d 50000 &
+       createmany_oid=$!
+       # MDT->OST reconnection causes MDT<->OST last_id synchornisation
+       # via osp_precreate_cleanup_orphans.
+       for ((i = 0; i < 100; i++)); do
+               for ((k = 0; k < 10; k++)); do
+                       do_facet $SINGLEMDS "$LCTL --device $dev deactivate;" \
+                                           "$LCTL --device $dev activate"
+               done
+
+               ls -asl $MOUNT | grep '???' &&
+                       (kill -9 $createmany_oid &>/dev/null; \
+                        error "File hasn't object on OST")
+
+               kill -s 0 $createmany_oid || break
+       done
+       wait $createmany_oid
+       cleanup
+}
+run_test 101 "Race MDT->OST reconnection with create"
+
 if ! combined_mgs_mds ; then
        stop mgs
 fi
 if ! combined_mgs_mds ; then
        stop mgs
 fi