From: Robin Humble Date: Thu, 9 Aug 2018 05:33:04 +0000 (+1000) Subject: LU-11227 lod: lod_sync: don't attempt sync to inactive targets X-Git-Tag: 2.10.6-RC1~13 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=refs%2Fchanges%2F91%2F32991%2F4;p=fs%2Flustre-release.git LU-11227 lod: lod_sync: don't attempt sync to inactive targets chgrp on a client triggers lod_sync() which in turn loops over OST/MDT targets with dt_sync(). dt_sync() fails with -ENOTCONN when targets have been deactivated (ie. set to active=0). The client retries infinitely causing the client process to hang and considerably MDS network traffic, load, and disk i/o. the fix is to not attempt dt_sync() to ost/mdt targets that have been deactivated and also (because of possible races) to ignore connection errors. tested with Lustre 2.10.4. Signed-off-by: Robin Humble Change-Id: I617509cf7944541489f4fd9762c233b771132165 Reviewed-on: https://review.whamcloud.com/32991 Reviewed-by: Andreas Dilger Tested-by: Jenkins Tested-by: Maloo Reviewed-by: John L. Hammond --- diff --git a/lustre/lod/lod_dev.c b/lustre/lod/lod_dev.c index cf31d19..0a153bd 100644 --- a/lustre/lod/lod_dev.c +++ b/lustre/lod/lod_dev.c @@ -1394,11 +1394,16 @@ static int lod_sync(const struct lu_env *env, struct dt_device *dev) lod_foreach_ost(lod, i) { ost = OST_TGT(lod, i); LASSERT(ost && ost->ltd_ost); + if (!ost->ltd_active) + continue; rc = dt_sync(env, ost->ltd_ost); if (rc) { - CERROR("%s: can't sync ost %u: %d\n", - lod2obd(lod)->obd_name, i, rc); - break; + if (rc != -ENOTCONN) { + CERROR("%s: can't sync ost %u: %d\n", + lod2obd(lod)->obd_name, i, rc); + break; + } + rc = 0; } } lod_putref(lod, &lod->lod_ost_descs); @@ -1410,11 +1415,16 @@ static int lod_sync(const struct lu_env *env, struct dt_device *dev) lod_foreach_mdt(lod, i) { mdt = MDT_TGT(lod, i); LASSERT(mdt && mdt->ltd_mdt); + if (!mdt->ltd_active) + continue; rc = dt_sync(env, mdt->ltd_mdt); if (rc) { - CERROR("%s: can't sync mdt %u: %d\n", - lod2obd(lod)->obd_name, i, rc); - break; + if (rc != -ENOTCONN) { + CERROR("%s: can't sync mdt %u: %d\n", + lod2obd(lod)->obd_name, i, rc); + break; + } + rc = 0; } } lod_putref(lod, &lod->lod_mdt_descs);