From 047dfe489966c8816cbead1a3abbbb1564fdb7db Mon Sep 17 00:00:00 2001 From: Bruno Faccini Date: Sat, 2 Apr 2016 00:11:36 +0200 Subject: [PATCH] LU-7975 lod: fix delayed stripe error path & Client resend This patch is a follow-on to LU-4260/LU-4971/LU-5346 to fix cleanup in error path during layout/stripe creation. It is intended to fix special case when layout creation has been delayed and will be with a non-0 size (ie truncate). Problem has been unveiled due to the current limitation where a delayed layout creation with a non-0 size/truncate will not be resent if connection with OST is down, but is rather returning -EAGAIN/-EWOULDBLOCK upon next open, and finally MDS will crash for "(lod_object.c:700:lod_ah_init()) ASSERTION( lc->ldo_stripenr == 0 )" LBUG. Thus, patch also fixes this limitation by instead returning -EINPROGRESS,this to avoid using costly resend operation from MDS (will keep a ptlrpc thread busy) but offload this responsibility to Client side (in after_reply()) which will handle this process in a much less agressive way. Specific sanity/test_27F sub-test has been created to test both layout creation error path correct cleanup and Client resend. Signed-off-by: Bruno Faccini Change-Id: Ibc1734f52c071c66cb5974e3fe21d8819d725c2a Reviewed-on: http://review.whamcloud.com/19302 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Alex Zhuravlev Reviewed-by: Mike Pershin Reviewed-by: Oleg Drokin --- lustre/lod/lod_object.c | 17 +++++++---------- lustre/osp/osp_precreate.c | 26 ++++++++++++++++++++++++-- lustre/tests/sanity.sh | 40 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+), 12 deletions(-) diff --git a/lustre/lod/lod_object.c b/lustre/lod/lod_object.c index 62e8967..165d4bd 100644 --- a/lustre/lod/lod_object.c +++ b/lustre/lod/lod_object.c @@ -3344,22 +3344,14 @@ int lod_declare_striped_object(const struct lu_env *env, struct dt_object *dt, int rc; ENTRY; - if (OBD_FAIL_CHECK(OBD_FAIL_MDS_ALLOC_OBDO)) { - /* failed to create striping, let's reset - * config so that others don't get confused */ - lod_object_free_striping(env, lo); + if (OBD_FAIL_CHECK(OBD_FAIL_MDS_ALLOC_OBDO)) GOTO(out, rc = -ENOMEM); - } if (!dt_object_remote(next)) { /* choose OST and generate appropriate objects */ rc = lod_qos_prep_create(env, lo, attr, lovea, th); - if (rc) { - /* failed to create striping, let's reset - * config so that others don't get confused */ - lod_object_free_striping(env, lo); + if (rc) GOTO(out, rc); - } /* * declare storage for striping data @@ -3390,6 +3382,11 @@ int lod_declare_striped_object(const struct lu_env *env, struct dt_object *dt, rc = lod_declare_init_size(env, dt, th); out: + /* failed to create striping or to set initial size, let's reset + * config so that others don't get confused */ + if (rc) + lod_object_free_striping(env, lo); + RETURN(rc); } diff --git a/lustre/osp/osp_precreate.c b/lustre/osp/osp_precreate.c index b9f6760..d5b4022 100644 --- a/lustre/osp/osp_precreate.c +++ b/lustre/osp/osp_precreate.c @@ -1482,6 +1482,13 @@ int osp_object_truncate(const struct lu_env *env, struct dt_object *dt, * XXX: decide how do we do here with resend * if we don't resend, then client may see wrong file size * if we do resend, then MDS thread can get stuck for quite long + * and if we don't resend, then client will also get -EWOULDBLOCK !! + * (see LU-7975 and sanity/test_27F use cases) + * but let's decide not to resend/delay this truncate request to OST + * and allow Client to decide to resend, in a less agressive way from + * after_reply(), by returning -EINPROGRESS instead of + * -EAGAIN/-EWOULDBLOCK upon return from ptlrpc_queue_wait() at the + * end of this routine */ req->rq_no_resend = req->rq_no_delay = 1; @@ -1509,8 +1516,23 @@ int osp_object_truncate(const struct lu_env *env, struct dt_object *dt, ptlrpc_request_set_replen(req); rc = ptlrpc_queue_wait(req); - if (rc) - CERROR("can't punch object: %d\n", rc); + if (rc) { + /* -EWOULDBLOCK/-EAGAIN means OST is unreachable at the moment + * since we have decided not to resend/delay, but this could + * lead to wrong size to be seen at Client side and even process + * trying to open to exit/fail if not itself handling -EAGAIN. + * So it should be better to return -EINPROGRESS instead and + * leave the decision to resend at Client side in after_reply() + */ + if (rc == -EWOULDBLOCK) { + rc = -EINPROGRESS; + CDEBUG(D_HA, "returning -EINPROGRESS instead of " + "-EWOULDBLOCK/-EAGAIN to allow Client to " + "resend\n"); + } else { + CERROR("can't punch object: %d\n", rc); + } + } out: ptlrpc_req_finished(req); if (oa) diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index d9199d5..08ee3e2 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -2069,6 +2069,46 @@ test_27E() { } run_test 27E "check that default extended attribute size properly increases" +test_27F() { # LU-5346/LU-7975 + + [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + + [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.8.51) ]] && + skip "Need MDS version at least 2.8.51" && return + + test_mkdir -p $DIR/$tdir + rm -f $DIR/$tdir/f0 + $SETSTRIPE -c 2 $DIR/$tdir + + # stop all OSTs to reproduce situation for LU-7975 ticket + for num in $(seq $OSTCOUNT); do + stop ost$num + done + + # open/create f0 with O_LOV_DELAY_CREATE + # truncate f0 to a non-0 size + # close + multiop $DIR/$tdir/f0 oO_RDWR:O_CREAT:O_LOV_DELAY_CREATE:T1050000c + + $CHECKSTAT -s 1050000 $DIR/$tdir/f0 || error "checkstat failed" + # open/write it again to force delayed layout creation + cat /etc/hosts > $DIR/$tdir/f0 & + catpid=$! + + # restart OSTs + for num in $(seq $OSTCOUNT); do + start ost$num $(ostdevname $num) $OST_MOUNT_OPTS || + error "ost$num failed to start" + done + + wait $catpid || error "cat failed" + + cmp /etc/hosts $DIR/$tdir/f0 || error "cmp failed" + [[ $($GETSTRIPE -c $DIR/$tdir/f0) == 2 ]] || error "wrong stripecount" + +} +run_test 27F "Client resend delayed layout creation with non-zero size" + # createtest also checks that device nodes are created and # then visible correctly (#2091) test_28() { # bug 2091 -- 1.8.3.1