From 7c099467eab7b64c00e6a14c9cab7f09153571c1 Mon Sep 17 00:00:00 2001 From: Robin Humble Date: Thu, 9 Aug 2018 15:33:04 +1000 Subject: [PATCH] LU-11227 lod: lod_sync: don't attempt sync to inactive targets chgrp on a client triggers lod_sync() which in turn loops over OST/MDT targets with dt_sync(). dt_sync() fails with -ENOTCONN when targets have been deactivated (ie. set to active=0). The client retries infinitely causing the client process to hang and considerably MDS network traffic, load, and disk i/o. the fix is to not attempt dt_sync() to ost/mdt targets that have been deactivated and also (because of possible races) to ignore connection errors. tested with Lustre 2.10.4. Signed-off-by: Robin Humble Change-Id: I617509cf7944541489f4fd9762c233b771132165 Reviewed-on: https://review.whamcloud.com/32964 Reviewed-by: Alex Zhuravlev Tested-by: Jenkins Reviewed-by: Andreas Dilger Reviewed-by: John L. Hammond Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/lod/lod_dev.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/lustre/lod/lod_dev.c b/lustre/lod/lod_dev.c index be0d7bd..8d94449 100644 --- a/lustre/lod/lod_dev.c +++ b/lustre/lod/lod_dev.c @@ -1409,11 +1409,16 @@ static int lod_sync(const struct lu_env *env, struct dt_device *dev) lod_foreach_ost(lod, i) { ost = OST_TGT(lod, i); LASSERT(ost && ost->ltd_ost); + if (!ost->ltd_active) + continue; rc = dt_sync(env, ost->ltd_ost); if (rc) { - CERROR("%s: can't sync ost %u: %d\n", - lod2obd(lod)->obd_name, i, rc); - break; + if (rc != -ENOTCONN) { + CERROR("%s: can't sync ost %u: %d\n", + lod2obd(lod)->obd_name, i, rc); + break; + } + rc = 0; } } lod_putref(lod, &lod->lod_ost_descs); @@ -1425,11 +1430,16 @@ static int lod_sync(const struct lu_env *env, struct dt_device *dev) lod_foreach_mdt(lod, i) { mdt = MDT_TGT(lod, i); LASSERT(mdt && mdt->ltd_mdt); + if (!mdt->ltd_active) + continue; rc = dt_sync(env, mdt->ltd_mdt); if (rc) { - CERROR("%s: can't sync mdt %u: %d\n", - lod2obd(lod)->obd_name, i, rc); - break; + if (rc != -ENOTCONN) { + CERROR("%s: can't sync mdt %u: %d\n", + lod2obd(lod)->obd_name, i, rc); + break; + } + rc = 0; } } lod_putref(lod, &lod->lod_mdt_descs); -- 1.8.3.1