X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Ftarget%2Ftgt_grant.c;h=72416e0f1f828463a0d5b8fe93ede8ecfa211684;hb=435d86191185b178e2df53fb9d46003e19a5e937;hp=8a513ed27b43f5283ec2b890efd7440464e10a70;hpb=e9389613eb29297f7b5a6b6cd896bafc7a5551f8;p=fs%2Flustre-release.git diff --git a/lustre/target/tgt_grant.c b/lustre/target/tgt_grant.c index 8a513ed..72416e0 100644 --- a/lustre/target/tgt_grant.c +++ b/lustre/target/tgt_grant.c @@ -23,7 +23,7 @@ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2012, 2016, Intel Corporation. + * Copyright (c) 2012, 2017, Intel Corporation. */ /* * lustre/target/tgt_grant.c @@ -71,7 +71,7 @@ * Author: Johann Lombardi */ -#define DEBUG_SUBSYSTEM S_FILTER +#define DEBUG_SUBSYSTEM S_CLASS #include #include @@ -90,9 +90,7 @@ static inline u64 tgt_grant_inflate(struct tg_grants_data *tgd, u64 val) * is thus inflated. We already significantly overestimate * overhead, no need to add the extent tax in this case */ return val << (tgd->tgd_blockbits - COMPAT_BSIZE_SHIFT); - /* client can deal with the block size, but does not support per-extent - * grant accounting, inflate grant by 100% for such clients */ - return val << 1; + return val; } /* Companion of tgt_grant_inflate() */ @@ -100,7 +98,7 @@ static inline u64 tgt_grant_deflate(struct tg_grants_data *tgd, u64 val) { if (tgd->tgd_blockbits > COMPAT_BSIZE_SHIFT) return val >> (tgd->tgd_blockbits - COMPAT_BSIZE_SHIFT); - return val >> 1; + return val; } /* Grant chunk is used as a unit for grant allocation. It should be inflated @@ -121,8 +119,8 @@ static inline u64 tgt_grant_chunk(struct obd_export *exp, if ((data == NULL && !(exp_grant_param_supp(exp))) || (data != NULL && !OCD_HAS_FLAG(data, GRANT_PARAM))) - /* Try to grant enough space to send a full-size RPC */ - return tgt_grant_inflate(tgd, chunk); + /* Try to grant enough space to send 2 full-size RPCs */ + return tgt_grant_inflate(tgd, chunk) << 1; /* Try to return enough to send two full-size RPCs * = 2 * (BRW_size + #extents_in_BRW * grant_tax) */ @@ -140,11 +138,6 @@ static int tgt_check_export_grants(struct obd_export *exp, u64 *dirty, struct tg_export_data *ted = &exp->exp_target_data; int level = D_CACHE; - if (exp->exp_obd->obd_self_export == exp) - CDEBUG(D_CACHE, "%s: processing self export: %ld %ld " - "%ld\n", exp->exp_obd->obd_name, ted->ted_grant, - ted->ted_pending, ted->ted_dirty); - if (ted->ted_grant < 0 || ted->ted_pending < 0 || ted->ted_dirty < 0) level = D_ERROR; CDEBUG_LIMIT(level, "%s: cli %s/%p dirty %ld pend %ld grant %ld\n", @@ -190,6 +183,7 @@ void tgt_grant_sanity_check(struct obd_device *obd, const char *func) struct lu_target *lut = obd->u.obt.obt_lut; struct tg_grants_data *tgd = &lut->lut_tgd; struct obd_export *exp; + struct tg_export_data *ted; u64 maxsize; u64 tot_dirty = 0; u64 tot_pending = 0; @@ -211,6 +205,15 @@ void tgt_grant_sanity_check(struct obd_device *obd, const char *func) spin_lock(&obd->obd_dev_lock); spin_lock(&tgd->tgd_grant_lock); + exp = obd->obd_self_export; + ted = &exp->exp_target_data; + CDEBUG(D_CACHE, "%s: processing self export: %ld %ld " + "%ld\n", obd->obd_name, ted->ted_grant, + ted->ted_pending, ted->ted_dirty); + tot_granted += ted->ted_grant + ted->ted_pending; + tot_pending += ted->ted_pending; + tot_dirty += ted->ted_dirty; + list_for_each_entry(exp, &obd->obd_exports, exp_obd_chain) { error = tgt_check_export_grants(exp, &tot_dirty, &tot_pending, &tot_granted, maxsize); @@ -277,14 +280,14 @@ EXPORT_SYMBOL(tgt_grant_sanity_check); * \retval negative value on error */ int tgt_statfs_internal(const struct lu_env *env, struct lu_target *lut, - struct obd_statfs *osfs, __u64 max_age, int *from_cache) + struct obd_statfs *osfs, time64_t max_age, int *from_cache) { struct tg_grants_data *tgd = &lut->lut_tgd; int rc = 0; ENTRY; spin_lock(&tgd->tgd_osfs_lock); - if (cfs_time_before_64(tgd->tgd_osfs_age, max_age) || max_age == 0) { + if (tgd->tgd_osfs_age < max_age || max_age == 0) { u64 unstable; /* statfs data are too old, get up-to-date one. @@ -310,6 +313,8 @@ int tgt_statfs_internal(const struct lu_env *env, struct lu_target *lut, if (unlikely(rc)) GOTO(out, rc); + osfs->os_namelen = min_t(__u32, osfs->os_namelen, NAME_MAX); + spin_lock(&tgd->tgd_grant_lock); spin_lock(&tgd->tgd_osfs_lock); /* calculate how much space was written while we released the @@ -339,7 +344,7 @@ int tgt_statfs_internal(const struct lu_env *env, struct lu_target *lut, /* finally udpate cached statfs data */ tgd->tgd_osfs = *osfs; - tgd->tgd_osfs_age = cfs_time_current_64(); + tgd->tgd_osfs_age = ktime_get_seconds(); tgd->tgd_statfs_inflight--; /* stop tracking */ if (tgd->tgd_statfs_inflight == 0) @@ -385,13 +390,13 @@ static void tgt_grant_statfs(const struct lu_env *env, struct obd_export *exp, struct tg_grants_data *tgd = &lut->lut_tgd; struct tgt_thread_info *tti; struct obd_statfs *osfs; - __u64 max_age; - int rc; + time64_t max_age; + int rc; if (force) max_age = 0; /* get fresh statfs data */ else - max_age = cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS); + max_age = ktime_get_seconds() - OBD_STATFS_CACHE_SECONDS; tti = tgt_th_info(env); osfs = &tti->tti_u.osfs; @@ -430,6 +435,7 @@ static u64 tgt_grant_space_left(struct obd_export *exp) u64 left; u64 avail; u64 unstable; + u64 reserved; ENTRY; assert_spin_locked(&tgd->tgd_grant_lock); @@ -440,16 +446,17 @@ static u64 tgt_grant_space_left(struct obd_export *exp) unstable = tgd->tgd_osfs_unstable; /* those might be accounted twice */ spin_unlock(&tgd->tgd_osfs_lock); - tot_granted = tgd->tgd_tot_granted; + reserved = left * tgd->tgd_reserved_pcnt / 100; + tot_granted = tgd->tgd_tot_granted + reserved; if (left < tot_granted) { int mask = (left + unstable < tot_granted - tgd->tgd_tot_pending) ? D_ERROR : D_CACHE; - CDEBUG_LIMIT(mask, "%s: cli %s/%p left %llu < tot_grant " - "%llu unstable %llu pending %llu " - "dirty %llu\n", + /* the below message is checked in sanityn.sh test_15 */ + CDEBUG_LIMIT(mask, + "%s: cli %s/%p left=%llu < tot_grant=%llu unstable=%llu pending=%llu dirty=%llu\n", obd->obd_name, exp->exp_client_uuid.uuid, exp, left, tot_granted, unstable, tgd->tgd_tot_pending, @@ -464,10 +471,10 @@ static u64 tgt_grant_space_left(struct obd_export *exp) /* Align left on block size */ left &= ~((1ULL << tgd->tgd_blockbits) - 1); - CDEBUG(D_CACHE, "%s: cli %s/%p avail %llu left %llu unstable " - "%llu tot_grant %llu pending %llu\n", obd->obd_name, - exp->exp_client_uuid.uuid, exp, avail, left, unstable, - tot_granted, tgd->tgd_tot_pending); + CDEBUG(D_CACHE, + "%s: cli %s/%p avail=%llu left=%llu unstable=%llu tot_grant=%llu pending=%llu\n", + obd->obd_name, exp->exp_client_uuid.uuid, exp, avail, left, + unstable, tot_granted, tgd->tgd_tot_pending); RETURN(left); } @@ -492,8 +499,7 @@ static void tgt_grant_incoming(const struct lu_env *env, struct obd_export *exp, struct tg_export_data *ted = &exp->exp_target_data; struct obd_device *obd = exp->exp_obd; struct tg_grants_data *tgd = &obd->u.obt.obt_lut->lut_tgd; - long dirty; - long dropped; + long long dirty, dropped; ENTRY; assert_spin_locked(&tgd->tgd_grant_lock); @@ -517,10 +523,19 @@ static void tgt_grant_incoming(const struct lu_env *env, struct obd_export *exp, /* inflate grant counters if required */ if (!exp_grant_param_supp(exp)) { + u64 tmp; oa->o_grant = tgt_grant_inflate(tgd, oa->o_grant); oa->o_dirty = tgt_grant_inflate(tgd, oa->o_dirty); - oa->o_dropped = tgt_grant_inflate(tgd, (u64)oa->o_dropped); - oa->o_undirty = tgt_grant_inflate(tgd, oa->o_undirty); + /* inflation can bump client's wish to >4GB which doesn't fit + * 32bit o_undirty, limit that .. */ + tmp = tgt_grant_inflate(tgd, oa->o_undirty); + if (tmp >= OBD_MAX_GRANT) + tmp = OBD_MAX_GRANT & ~(1ULL << tgd->tgd_blockbits); + oa->o_undirty = tmp; + tmp = tgt_grant_inflate(tgd, oa->o_dropped); + if (tmp >= OBD_MAX_GRANT) + tmp = OBD_MAX_GRANT & ~(1ULL << tgd->tgd_blockbits); + oa->o_dropped = tmp; } dirty = oa->o_dirty; @@ -535,13 +550,13 @@ static void tgt_grant_incoming(const struct lu_env *env, struct obd_export *exp, tgd->tgd_tot_dirty += dirty - ted->ted_dirty; if (ted->ted_grant < dropped) { CDEBUG(D_CACHE, - "%s: cli %s/%p reports %lu dropped > grant %lu\n", + "%s: cli %s/%p reports %llu dropped > grant %lu\n", obd->obd_name, exp->exp_client_uuid.uuid, exp, dropped, ted->ted_grant); dropped = 0; } if (tgd->tgd_tot_granted < dropped) { - CERROR("%s: cli %s/%p reports %lu dropped > tot_grant %llu\n", + CERROR("%s: cli %s/%p reports %llu dropped > tot_grant %llu\n", obd->obd_name, exp->exp_client_uuid.uuid, exp, dropped, tgd->tgd_tot_granted); dropped = 0; @@ -590,6 +605,14 @@ static void tgt_grant_shrink(struct obd_export *exp, struct obdo *oa, grant_shrink = oa->o_grant; + if (ted->ted_grant < grant_shrink) { + CDEBUG(D_CACHE, + "%s: cli %s/%p wants %lu shrinked > grant %lu\n", + obd->obd_name, exp->exp_client_uuid.uuid, exp, + grant_shrink, ted->ted_grant); + grant_shrink = ted->ted_grant; + } + ted->ted_grant -= grant_shrink; tgd->tgd_tot_granted -= grant_shrink; @@ -879,6 +902,9 @@ static long tgt_grant_alloc(struct obd_export *exp, u64 curgrant, ENTRY; + if (OBD_FAIL_CHECK(OBD_FAIL_TGT_NO_GRANT)) + RETURN(0); + /* When tgd_grant_compat_disable is set, we don't grant any space to * clients not supporting OBD_CONNECT_GRANT_PARAM. * Otherwise, space granted to such a client is inflated since it @@ -887,9 +913,10 @@ static long tgt_grant_alloc(struct obd_export *exp, u64 curgrant, tgd->tgd_grant_compat_disable) || left == 0 || exp->exp_failed) RETURN(0); - if (want > 0x7fffffff) { - CERROR("%s: client %s/%p requesting > 2GB grant %llu\n", - obd->obd_name, exp->exp_client_uuid.uuid, exp, want); + if (want > OBD_MAX_GRANT) { + CERROR("%s: client %s/%p requesting > max (%lu), %llu\n", + obd->obd_name, exp->exp_client_uuid.uuid, exp, + OBD_MAX_GRANT, want); RETURN(0); } @@ -924,6 +951,16 @@ static long tgt_grant_alloc(struct obd_export *exp, u64 curgrant, if ((grant > chunk) && conservative) grant = chunk; + /* + * Limit grant so that export' grant does not exceed what the + * client would like to have by more than grants for 2 full + * RPCs + */ + if (want + chunk <= ted->ted_grant) + RETURN(0); + if (ted->ted_grant + grant > want + chunk) + grant = want + chunk - ted->ted_grant; + tgd->tgd_tot_granted += grant; ted->ted_grant += grant; @@ -1046,26 +1083,34 @@ EXPORT_SYMBOL(tgt_grant_connect); void tgt_grant_discard(struct obd_export *exp) { struct obd_device *obd = exp->exp_obd; - struct tg_grants_data *tgd = &obd->u.obt.obt_lut->lut_tgd; + struct lu_target *lut = class_exp2tgt(exp); struct tg_export_data *ted = &exp->exp_target_data; + struct tg_grants_data *tgd; + if (!lut) + return; + + tgd = &lut->lut_tgd; spin_lock(&tgd->tgd_grant_lock); - LASSERTF(tgd->tgd_tot_granted >= ted->ted_grant, - "%s: tot_granted %llu cli %s/%p ted_grant %ld\n", - obd->obd_name, tgd->tgd_tot_granted, - exp->exp_client_uuid.uuid, exp, ted->ted_grant); + if (tgd->tgd_tot_granted < ted->ted_grant) { + CERROR("%s: tot_granted %llu < cli %s/%p ted_grant %ld\n", + obd->obd_name, tgd->tgd_tot_granted, + exp->exp_client_uuid.uuid, exp, ted->ted_grant); + } tgd->tgd_tot_granted -= ted->ted_grant; ted->ted_grant = 0; - LASSERTF(tgd->tgd_tot_pending >= ted->ted_pending, - "%s: tot_pending %llu cli %s/%p ted_pending %ld\n", - obd->obd_name, tgd->tgd_tot_pending, - exp->exp_client_uuid.uuid, exp, ted->ted_pending); + if (tgd->tgd_tot_pending < ted->ted_pending) { + CERROR("%s: tot_pending %llu < cli %s/%p ted_pending %ld\n", + obd->obd_name, tgd->tgd_tot_pending, + exp->exp_client_uuid.uuid, exp, ted->ted_pending); + } /* tgd_tot_pending is handled in tgt_grant_commit as bulk * commmits */ - LASSERTF(tgd->tgd_tot_dirty >= ted->ted_dirty, - "%s: tot_dirty %llu cli %s/%p ted_dirty %ld\n", - obd->obd_name, tgd->tgd_tot_dirty, - exp->exp_client_uuid.uuid, exp, ted->ted_dirty); + if (tgd->tgd_tot_dirty < ted->ted_dirty) { + CERROR("%s: tot_dirty %llu < cli %s/%p ted_dirty %ld\n", + obd->obd_name, tgd->tgd_tot_dirty, + exp->exp_client_uuid.uuid, exp, ted->ted_dirty); + } tgd->tgd_tot_dirty -= ted->ted_dirty; ted->ted_dirty = 0; spin_unlock(&tgd->tgd_grant_lock); @@ -1273,7 +1318,7 @@ EXPORT_SYMBOL(tgt_grant_prepare_write); * \retval >= 0 amount of grant space allocated to the precreate request * \retval -ENOSPC on failure */ -long tgt_grant_create(const struct lu_env *env, struct obd_export *exp, int *nr) +long tgt_grant_create(const struct lu_env *env, struct obd_export *exp, s64 *nr) { struct lu_target *lut = exp->exp_obd->u.obt.obt_lut; struct tg_grants_data *tgd = &lut->lut_tgd; @@ -1502,3 +1547,131 @@ int tgt_grant_commit_cb_add(struct thandle *th, struct obd_export *exp, RETURN(rc); } EXPORT_SYMBOL(tgt_grant_commit_cb_add); + +/** + * Show estimate of total amount of dirty data on clients. + * + * @kobj kobject embedded in obd_device + * @attr unused + * @buf buf used by sysfs to print out data + * + * Return: 0 on success + * negative value on error + */ +ssize_t tot_dirty_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); + struct tg_grants_data *tgd; + + tgd = &obd->u.obt.obt_lut->lut_tgd; + return scnprintf(buf, PAGE_SIZE, "%llu\n", tgd->tgd_tot_dirty); +} +EXPORT_SYMBOL(tot_dirty_show); + +/** + * Show total amount of space granted to clients. + * + * @kobj kobject embedded in obd_device + * @attr unused + * @buf buf used by sysfs to print out data + * + * Return: 0 on success + * negative value on error + */ +ssize_t tot_granted_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); + struct tg_grants_data *tgd; + + tgd = &obd->u.obt.obt_lut->lut_tgd; + return scnprintf(buf, PAGE_SIZE, "%llu\n", tgd->tgd_tot_granted); +} +EXPORT_SYMBOL(tot_granted_show); + +/** + * Show total amount of space used by IO in progress. + * + * @kobj kobject embedded in obd_device + * @attr unused + * @buf buf used by sysfs to print out data + * + * Return: 0 on success + * negative value on error + */ +ssize_t tot_pending_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); + struct tg_grants_data *tgd; + + tgd = &obd->u.obt.obt_lut->lut_tgd; + return scnprintf(buf, PAGE_SIZE, "%llu\n", tgd->tgd_tot_pending); +} +EXPORT_SYMBOL(tot_pending_show); + +/** + * Show if grants compatibility mode is disabled. + * + * When tgd_grant_compat_disable is set, we don't grant any space to clients + * not supporting OBD_CONNECT_GRANT_PARAM. Otherwise, space granted to such + * a client is inflated since it consumes PAGE_SIZE of grant space per + * block, (i.e. typically 4kB units), but underlaying file system might have + * block size bigger than page size, e.g. ZFS. See LU-2049 for details. + * + * @kobj kobject embedded in obd_device + * @attr unused + * @buf buf used by sysfs to print out data + * + * Return: string length of @buf output on success + */ +ssize_t grant_compat_disable_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); + struct tg_grants_data *tgd = &obd->u.obt.obt_lut->lut_tgd; + + return scnprintf(buf, PAGE_SIZE, "%u\n", tgd->tgd_grant_compat_disable); +} +EXPORT_SYMBOL(grant_compat_disable_show); + +/** + * Change grant compatibility mode. + * + * Setting tgd_grant_compat_disable prohibit any space granting to clients + * not supporting OBD_CONNECT_GRANT_PARAM. See details above. + * + * @kobj kobject embedded in obd_device + * @attr unused + * @buffer string which represents mode + * 1: disable compatibility mode + * 0: enable compatibility mode + * @count @buffer length + * + * Return: @count on success + * negative number on error + */ +ssize_t grant_compat_disable_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, size_t count) +{ + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); + struct tg_grants_data *tgd = &obd->u.obt.obt_lut->lut_tgd; + bool val; + int rc; + + rc = kstrtobool(buffer, &val); + if (rc) + return rc; + + tgd->tgd_grant_compat_disable = val; + + return count; +} +EXPORT_SYMBOL(grant_compat_disable_store);