Whamcloud - gitweb
LU-8535 lod: RR policy should not allocate on same ost 90/22090/3
authorRahul Deshmkuh <rahul.deshmukh@seagate.com>
Wed, 24 Aug 2016 05:28:21 +0000 (10:58 +0530)
committerOleg Drokin <oleg.drokin@intel.com>
Mon, 26 Sep 2016 15:20:18 +0000 (15:20 +0000)
Problem: With Round Robin (RR) policy we should not allowed
to create objects on same ost but currently it is possible.

Solution: lod_check_and_reserve_ost() skips a check for
already used OST when speed=0 i.e. at the first round of
object allocation. Enabling the check unconditionally to
fix above mention problem.

This patch contains both re-producer and the fix.

Signed-off-by: Rahul Deshmukh <rahul.deshmukh@seagate.com>
Signed-off-by: Alexander Zarochentsev <alexander.zarochentsev@seagate.com>
Seagate-bug-id: MRP-3480
Change-Id: I80895f8d7cc0a146a098869842bbc256152e6c2e
Reviewed-on: http://review.whamcloud.com/22090
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/include/obd_support.h
lustre/lod/lod_qos.c
lustre/tests/sanityn.sh [changed mode: 0755->0644]

index f1412d3..bc0738e 100644 (file)
@@ -252,6 +252,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_MDS_INTENT_DELAY              0x160
 #define OBD_FAIL_MDS_XATTR_REP                 0x161
 #define OBD_FAIL_MDS_TRACK_OVERFLOW     0x162
+#define OBD_FAIL_MDS_LOV_CREATE_RACE    0x163
 
 /* layout lock */
 #define OBD_FAIL_MDS_NO_LL_GETATTR      0x170
index 6a7609e..5e3fe00 100644 (file)
@@ -883,7 +883,7 @@ static int lod_check_and_reserve_ost(const struct lu_env *env,
        /*
         * do not put >1 objects on a single OST
         */
-       if (speed && lod_qos_is_ost_used(env, ost_idx, stripe_idx))
+       if (lod_qos_is_ost_used(env, ost_idx, stripe_idx))
                goto out_return;
 
        o = lod_qos_declare_object_on(env, m, ost_idx, th);
@@ -899,6 +899,7 @@ static int lod_check_and_reserve_ost(const struct lu_env *env,
         */
        lod_qos_ost_in_use(env, stripe_idx, ost_idx);
        stripe[stripe_idx] = o;
+       OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_LOV_CREATE_RACE, 2);
        stripe_idx++;
        *s_idx = stripe_idx;
 
old mode 100755 (executable)
new mode 100644 (file)
index 42b93fd..e515691
@@ -3718,6 +3718,43 @@ test_92() {
 }
 run_test 92 "create remote directory under orphan directory"
 
+test_93() {
+       local rc1=0
+       local rc2=0
+       local old_rr
+
+       mkdir -p $DIR1/$tfile-1/
+       mkdir -p $DIR2/$tfile-2/
+       local old_rr=$(do_facet $SINGLEMDS lctl get_param -n \
+               'lod.lustre-MDT*/qos_threshold_rr' | sed -e 's/%//')
+       do_facet $SINGLEMDS lctl set_param -n \
+               'lod.lustre-MDT*/qos_threshold_rr' 100
+       #define OBD_FAIL_MDS_LOV_CREATE_RACE     0x163
+       do_facet $SINGLEMDS "lctl set_param fail_loc=0x00000163"
+
+       $SETSTRIPE -c -1 $DIR1/$tfile-1/file1 &
+       local PID1=$!
+       sleep 1
+       $SETSTRIPE -c -1 $DIR2/$tfile-2/file2 &
+       local PID2=$!
+       wait $PID2
+       wait $PID1
+       do_facet $SINGLEMDS "lctl set_param fail_loc=0x0"
+       do_facet $SINGLEMDS "lctl set_param -n \
+               'lod.lustre-MDT*/qos_threshold_rr' $old_rr"
+
+       $GETSTRIPE $DIR1/$tfile-1/file1
+       rc1=$($GETSTRIPE -q $DIR1/$tfile-1/file1 |
+               awk '{if (/[0-9]/) print $1 }' | sort | uniq -d | wc -l)
+       $GETSTRIPE $DIR2/$tfile-2/file2
+       rc2=$($GETSTRIPE -q $DIR2/$tfile-2/file2 |
+               awk '{if (/[0-9]/) print $1 }' | sort | uniq -d | wc -l)
+       echo "rc1=$rc1 and rc2=$rc2 "
+       [ $rc1 -eq 0 ] && [ $rc2 -eq 0 ] ||
+               error "object allocate on same ost detected"
+}
+run_test 93 "alloc_rr should not allocate on same ost"
+
 log "cleanup: ======================================================"
 
 # kill and wait in each test only guarentee script finish, but command in script