From bc3f670eb8ba2f96ae7e83e1a2c3a061c4be21a8 Mon Sep 17 00:00:00 2001 From: Rahul Deshmkuh Date: Wed, 24 Aug 2016 10:58:21 +0530 Subject: [PATCH] LU-8535 lod: RR policy should not allocate on same ost Problem: With Round Robin (RR) policy we should not allowed to create objects on same ost but currently it is possible. Solution: lod_check_and_reserve_ost() skips a check for already used OST when speed=0 i.e. at the first round of object allocation. Enabling the check unconditionally to fix above mention problem. This patch contains both re-producer and the fix. Signed-off-by: Rahul Deshmukh Signed-off-by: Alexander Zarochentsev Seagate-bug-id: MRP-3480 Change-Id: I80895f8d7cc0a146a098869842bbc256152e6c2e Reviewed-on: http://review.whamcloud.com/22090 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- lustre/include/obd_support.h | 1 + lustre/lod/lod_qos.c | 3 ++- lustre/tests/sanityn.sh | 37 +++++++++++++++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 1 deletion(-) mode change 100755 => 100644 lustre/tests/sanityn.sh diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index f1412d3..bc0738e 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -252,6 +252,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_MDS_INTENT_DELAY 0x160 #define OBD_FAIL_MDS_XATTR_REP 0x161 #define OBD_FAIL_MDS_TRACK_OVERFLOW 0x162 +#define OBD_FAIL_MDS_LOV_CREATE_RACE 0x163 /* layout lock */ #define OBD_FAIL_MDS_NO_LL_GETATTR 0x170 diff --git a/lustre/lod/lod_qos.c b/lustre/lod/lod_qos.c index 6a7609e..5e3fe00 100644 --- a/lustre/lod/lod_qos.c +++ b/lustre/lod/lod_qos.c @@ -883,7 +883,7 @@ static int lod_check_and_reserve_ost(const struct lu_env *env, /* * do not put >1 objects on a single OST */ - if (speed && lod_qos_is_ost_used(env, ost_idx, stripe_idx)) + if (lod_qos_is_ost_used(env, ost_idx, stripe_idx)) goto out_return; o = lod_qos_declare_object_on(env, m, ost_idx, th); @@ -899,6 +899,7 @@ static int lod_check_and_reserve_ost(const struct lu_env *env, */ lod_qos_ost_in_use(env, stripe_idx, ost_idx); stripe[stripe_idx] = o; + OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_LOV_CREATE_RACE, 2); stripe_idx++; *s_idx = stripe_idx; diff --git a/lustre/tests/sanityn.sh b/lustre/tests/sanityn.sh old mode 100755 new mode 100644 index 42b93fd..e515691 --- a/lustre/tests/sanityn.sh +++ b/lustre/tests/sanityn.sh @@ -3718,6 +3718,43 @@ test_92() { } run_test 92 "create remote directory under orphan directory" +test_93() { + local rc1=0 + local rc2=0 + local old_rr + + mkdir -p $DIR1/$tfile-1/ + mkdir -p $DIR2/$tfile-2/ + local old_rr=$(do_facet $SINGLEMDS lctl get_param -n \ + 'lod.lustre-MDT*/qos_threshold_rr' | sed -e 's/%//') + do_facet $SINGLEMDS lctl set_param -n \ + 'lod.lustre-MDT*/qos_threshold_rr' 100 + #define OBD_FAIL_MDS_LOV_CREATE_RACE 0x163 + do_facet $SINGLEMDS "lctl set_param fail_loc=0x00000163" + + $SETSTRIPE -c -1 $DIR1/$tfile-1/file1 & + local PID1=$! + sleep 1 + $SETSTRIPE -c -1 $DIR2/$tfile-2/file2 & + local PID2=$! + wait $PID2 + wait $PID1 + do_facet $SINGLEMDS "lctl set_param fail_loc=0x0" + do_facet $SINGLEMDS "lctl set_param -n \ + 'lod.lustre-MDT*/qos_threshold_rr' $old_rr" + + $GETSTRIPE $DIR1/$tfile-1/file1 + rc1=$($GETSTRIPE -q $DIR1/$tfile-1/file1 | + awk '{if (/[0-9]/) print $1 }' | sort | uniq -d | wc -l) + $GETSTRIPE $DIR2/$tfile-2/file2 + rc2=$($GETSTRIPE -q $DIR2/$tfile-2/file2 | + awk '{if (/[0-9]/) print $1 }' | sort | uniq -d | wc -l) + echo "rc1=$rc1 and rc2=$rc2 " + [ $rc1 -eq 0 ] && [ $rc2 -eq 0 ] || + error "object allocate on same ost detected" +} +run_test 93 "alloc_rr should not allocate on same ost" + log "cleanup: ======================================================" # kill and wait in each test only guarentee script finish, but command in script -- 1.8.3.1