* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2011, 2012, Whamcloud, Inc.
+ * Copyright (c) 2012, 2013, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
#include "ofd_internal.h"
-#define OFD_GRANT_CHUNK (2ULL * PTLRPC_MAX_BRW_SIZE)
-#define OFD_GRANT_SHRINK_LIMIT (16ULL * OFD_GRANT_CHUNK)
+/* At least enough to send a couple of 1MB RPCs, even if not max sized */
+#define OFD_GRANT_CHUNK (2ULL * DT_MAX_BRW_SIZE)
+
+/* Clients typically hold 2x their max_rpcs_in_flight of grant space */
+#define OFD_GRANT_SHRINK_LIMIT(exp) (2ULL * 8 * exp_max_brw_size(exp))
static inline obd_size ofd_grant_from_cli(struct obd_export *exp,
struct ofd_device *ofd, obd_size val)
static inline obd_size ofd_grant_chunk(struct obd_export *exp,
struct ofd_device *ofd)
{
- if (exp && ofd_obd(ofd)->obd_self_export == exp)
+ if (ofd_obd(ofd)->obd_self_export == exp)
/* Grant enough space to handle a big precreate request */
- return OST_MAX_PRECREATE * ofd->ofd_dt_conf.ddp_inodespace;
+ return OST_MAX_PRECREATE * ofd->ofd_dt_conf.ddp_inodespace / 2;
- if (exp && ofd_grant_compat(exp, ofd))
+ if (ofd_grant_compat(exp, ofd))
/* Try to grant enough space to send a full-size RPC */
- return PTLRPC_MAX_BRW_SIZE <<
+ return exp_max_brw_size(exp) <<
(ofd->ofd_blockbits - COMPAT_BSIZE_SHIFT);
- return OFD_GRANT_CHUNK;
+
+ /* Try to return enough to send two full RPCs, if needed */
+ return exp_max_brw_size(exp) * 2;
}
/**
maxsize = ofd->ofd_osfs.os_blocks << ofd->ofd_blockbits;
- cfs_spin_lock(&obd->obd_dev_lock);
- cfs_spin_lock(&ofd->ofd_grant_lock);
+ spin_lock(&obd->obd_dev_lock);
+ spin_lock(&ofd->ofd_grant_lock);
cfs_list_for_each_entry(exp, &obd->obd_exports, exp_obd_chain) {
int error = 0;
" > maxsize("LPU64")\n", obd->obd_name,
exp->exp_client_uuid.uuid, exp, fed->fed_grant,
fed->fed_pending, maxsize);
- cfs_spin_unlock(&obd->obd_dev_lock);
- cfs_spin_unlock(&ofd->ofd_grant_lock);
+ spin_unlock(&obd->obd_dev_lock);
+ spin_unlock(&ofd->ofd_grant_lock);
LBUG();
}
if (fed->fed_dirty > maxsize) {
CERROR("%s: cli %s/%p fed_dirty(%ld) > maxsize("LPU64
")\n", obd->obd_name, exp->exp_client_uuid.uuid,
exp, fed->fed_dirty, maxsize);
- cfs_spin_unlock(&obd->obd_dev_lock);
- cfs_spin_unlock(&ofd->ofd_grant_lock);
+ spin_unlock(&obd->obd_dev_lock);
+ spin_unlock(&ofd->ofd_grant_lock);
LBUG();
}
CDEBUG_LIMIT(error ? D_ERROR : D_CACHE, "%s: cli %s/%p dirty "
tot_pending += fed->fed_pending;
tot_dirty += fed->fed_dirty;
}
- cfs_spin_unlock(&obd->obd_dev_lock);
+ spin_unlock(&obd->obd_dev_lock);
fo_tot_granted = ofd->ofd_tot_granted;
fo_tot_pending = ofd->ofd_tot_pending;
fo_tot_dirty = ofd->ofd_tot_dirty;
if (tot_dirty > maxsize)
CERROR("%s: tot_dirty "LPU64" > maxsize "LPU64"\n",
func, tot_dirty, maxsize);
- cfs_spin_unlock(&ofd->ofd_grant_lock);
+ spin_unlock(&ofd->ofd_grant_lock);
}
/**
rc = ofd_statfs_internal(env, ofd, osfs, max_age, from_cache);
if (unlikely(rc)) {
- *from_cache = 0;
+ if (from_cache)
+ *from_cache = 0;
return;
}
obd_size unstable;
ENTRY;
- LASSERT_SPIN_LOCKED(&ofd->ofd_grant_lock);
+ assert_spin_locked(&ofd->ofd_grant_lock);
- cfs_spin_lock(&ofd->ofd_osfs_lock);
+ spin_lock(&ofd->ofd_osfs_lock);
/* get available space from cached statfs data */
left = ofd->ofd_osfs.os_bavail << ofd->ofd_blockbits;
unstable = ofd->ofd_osfs_unstable; /* those might be accounted twice */
- cfs_spin_unlock(&ofd->ofd_osfs_lock);
+ spin_unlock(&ofd->ofd_osfs_lock);
tot_granted = ofd->ofd_tot_granted;
D_ERROR : D_CACHE;
CDEBUG_LIMIT(mask, "%s: cli %s/%p left "LPU64" < tot_grant "
- LPU64" unstable "LPU64" pending "LPU64"\n",
+ LPU64" unstable "LPU64" pending "LPU64" "
+ "dirty "LPU64"\n",
obd->obd_name, exp->exp_client_uuid.uuid, exp,
left, tot_granted, unstable,
- ofd->ofd_tot_pending);
+ ofd->ofd_tot_pending, ofd->ofd_tot_dirty);
RETURN(0);
}
long dirty, dropped, grant_chunk;
ENTRY;
- LASSERT_SPIN_LOCKED(&ofd->ofd_grant_lock);
+ assert_spin_locked(&ofd->ofd_grant_lock);
if ((oa->o_valid & (OBD_MD_FLBLOCKS|OBD_MD_FLGRANT)) !=
(OBD_MD_FLBLOCKS|OBD_MD_FLGRANT)) {
CERROR("%s: cli %s/%p dirty %ld pend %ld grant %ld\n",
obd->obd_name, exp->exp_client_uuid.uuid, exp,
fed->fed_dirty, fed->fed_pending, fed->fed_grant);
- cfs_spin_unlock(&ofd->ofd_grant_lock);
+ spin_unlock(&ofd->ofd_grant_lock);
LBUG();
}
EXIT;
struct obd_device *obd = exp->exp_obd;
long grant_shrink;
- LASSERT_SPIN_LOCKED(&ofd->ofd_grant_lock);
-
+ assert_spin_locked(&ofd->ofd_grant_lock);
+ LASSERT(exp);
if (left_space >= ofd->ofd_tot_granted_clients *
- OFD_GRANT_SHRINK_LIMIT)
+ OFD_GRANT_SHRINK_LIMIT(exp))
return;
grant_shrink = ofd_grant_from_cli(exp, ofd, oa->o_grant);
ENTRY;
- LASSERT_SPIN_LOCKED(&ofd->ofd_grant_lock);
+ assert_spin_locked(&ofd->ofd_grant_lock);
if ((oa->o_valid & OBD_MD_FLFLAGS) &&
(oa->o_flags & OBD_FL_RECOV_RESEND)) {
CERROR("%s: cli %s/%p dirty %ld pend %ld grant %ld\n",
obd->obd_name, exp->exp_client_uuid.uuid, exp,
fed->fed_dirty, fed->fed_pending, fed->fed_grant);
- cfs_spin_unlock(&ofd->ofd_grant_lock);
+ spin_unlock(&ofd->ofd_grant_lock);
LBUG();
}
EXIT;
* \param curgrant - is the current grant claimed by the client
* \param want - is how much grant space the client would like to have
* \param left - is the remaining free space with granted space taken out
+ * \param conservative - is how server grants, if true, a certain amount, else
+ * server will grant as client requested.
*/
static long ofd_grant(struct obd_export *exp, obd_size curgrant,
- obd_size want, obd_size left)
+ obd_size want, obd_size left, bool conservative)
{
struct obd_device *obd = exp->exp_obd;
struct ofd_device *ofd = ofd_exp(exp);
/* client not supporting OBD_CONNECT_GRANT_PARAM works with a 4KB block
* size while the reality is different */
- curgrant = ofd_grant_from_cli(exp, ofd, curgrant);
- want = ofd_grant_from_cli(exp, ofd, want);
+ curgrant = ofd_grant_from_cli(exp, ofd, curgrant);
+ want = ofd_grant_from_cli(exp, ofd, want);
grant_chunk = ofd_grant_chunk(exp, ofd);
/* Grant some fraction of the client's requested grant space so that
if (curgrant >= want || curgrant >= fed->fed_grant + grant_chunk)
RETURN(0);
- if (!obd->obd_recovering)
+ if (obd->obd_recovering)
+ conservative = false;
+
+ if (conservative)
/* don't grant more than 1/8th of the remaining free space in
* one chunk */
left >>= 3;
grant = min(want, left);
- /* align grant on block size */
- grant &= ~((1ULL << ofd->ofd_blockbits) - 1);
+ /* round grant upt to the next block size */
+ grant = (grant + (1 << ofd->ofd_blockbits) - 1) &
+ ~((1ULL << ofd->ofd_blockbits) - 1);
if (!grant)
RETURN(0);
- /* Allow >OFD_GRANT_CHUNK size when clients reconnect due to a
- * server reboot. */
- if ((grant > grant_chunk) && (!obd->obd_recovering))
+ /* Limit to ofd_grant_chunk() if not reconnect/recovery */
+ if ((grant > grant_chunk) && conservative)
grant = grant_chunk;
ofd->ofd_tot_granted += grant;
CERROR("%s: cli %s/%p grant %ld want "LPU64" current "LPU64"\n",
obd->obd_name, exp->exp_client_uuid.uuid, exp,
fed->fed_grant, want, curgrant);
- cfs_spin_unlock(&ofd->ofd_grant_lock);
+ spin_unlock(&ofd->ofd_grant_lock);
LBUG();
}
* \param env - is the lu environment provided by the caller
* \param exp - is the client's export which is reconnecting
* \param want - is how much the client would like to get
+ * \param conservative - is how server grants to client, if true server will
+ * only grant certain amount, else server will grant client requested
+ * amount.
*/
long ofd_grant_connect(const struct lu_env *env, struct obd_export *exp,
- obd_size want)
+ obd_size want, bool conservative)
{
struct ofd_device *ofd = ofd_exp(exp);
struct filter_export_data *fed = &exp->exp_filter_data;
int force = 0; /* can use cached data */
/* don't grant space to client with read-only access */
- if ((exp->exp_connect_flags & OBD_CONNECT_RDONLY) ||
+ if ((exp_connect_flags(exp) & OBD_CONNECT_RDONLY) ||
ofd_grant_prohibit(exp, ofd))
return 0;
refresh:
ofd_grant_statfs(env, exp, force, &from_cache);
- cfs_spin_lock(&ofd->ofd_grant_lock);
+ spin_lock(&ofd->ofd_grant_lock);
/* Grab free space from cached info and take out space already granted
* to clients as well as reserved space */
/* get fresh statfs data if we are short in ungranted space */
if (from_cache && left < 32 * ofd_grant_chunk(exp, ofd)) {
- cfs_spin_unlock(&ofd->ofd_grant_lock);
+ spin_unlock(&ofd->ofd_grant_lock);
CDEBUG(D_CACHE, "fs has no space left and statfs too old\n");
force = 1;
goto refresh;
}
ofd_grant(exp, ofd_grant_to_cli(exp, ofd, (obd_size)fed->fed_grant),
- want, left);
+ want, left, conservative);
/* return to client its current grant */
grant = ofd_grant_to_cli(exp, ofd, (obd_size)fed->fed_grant);
ofd->ofd_tot_granted_clients++;
- cfs_spin_unlock(&ofd->ofd_grant_lock);
+ spin_unlock(&ofd->ofd_grant_lock);
CDEBUG(D_CACHE, "%s: cli %s/%p ocd_grant: %ld want: "LPU64" left: "
LPU64"\n", exp->exp_obd->obd_name, exp->exp_client_uuid.uuid,
struct ofd_device *ofd = ofd_exp(exp);
struct filter_export_data *fed = &exp->exp_filter_data;
- cfs_spin_lock(&ofd->ofd_grant_lock);
+ spin_lock(&ofd->ofd_grant_lock);
LASSERTF(ofd->ofd_tot_granted >= fed->fed_grant,
"%s: tot_granted "LPU64" cli %s/%p fed_grant %ld\n",
obd->obd_name, ofd->ofd_tot_granted,
exp->exp_client_uuid.uuid, exp, fed->fed_dirty);
ofd->ofd_tot_dirty -= fed->fed_dirty;
fed->fed_dirty = 0;
- cfs_spin_unlock(&ofd->ofd_grant_lock);
+ spin_unlock(&ofd->ofd_grant_lock);
}
/**
ofd_grant_statfs(env, exp, 1, NULL);
/* protect all grant counters */
- cfs_spin_lock(&ofd->ofd_grant_lock);
+ spin_lock(&ofd->ofd_grant_lock);
/* Grab free space from cached statfs data and take out space
* already granted to clients as well as reserved space */
* since we don't grant space back on reads, no point
* in running statfs, so just skip it and process
* incoming grant data directly. */
- cfs_spin_lock(&ofd->ofd_grant_lock);
+ spin_lock(&ofd->ofd_grant_lock);
do_shrink = 0;
}
else
oa->o_grant = 0;
- cfs_spin_unlock(&ofd->ofd_grant_lock);
+ spin_unlock(&ofd->ofd_grant_lock);
}
/**
/* get statfs information from OSD layer */
ofd_grant_statfs(env, exp, force, &from_cache);
- cfs_spin_lock(&ofd->ofd_grant_lock); /* protect all grant counters */
+ spin_lock(&ofd->ofd_grant_lock); /* protect all grant counters */
/* Grab free space from cached statfs data and take out space already
* granted to clients as well as reserved space */
/* Get fresh statfs data if we are short in ungranted space */
if (from_cache && left < 32 * ofd_grant_chunk(exp, ofd)) {
- cfs_spin_unlock(&ofd->ofd_grant_lock);
+ spin_unlock(&ofd->ofd_grant_lock);
CDEBUG(D_CACHE, "%s: fs has no space left and statfs too old\n",
obd->obd_name);
force = 1;
/* When close to free space exhaustion, trigger a sync to force
* writeback cache to consume required space immediately and release as
* much space as possible. */
- if (!obd->obd_recovering && force != 2 &&
- left < ofd_grant_chunk(NULL, ofd)) {
+ if (!obd->obd_recovering && force != 2 && left < OFD_GRANT_CHUNK) {
bool from_grant = true;
int i;
if (!from_grant) {
/* at least one network buffer requires acquiring grant
* space on the server */
- cfs_spin_unlock(&ofd->ofd_grant_lock);
+ spin_unlock(&ofd->ofd_grant_lock);
/* discard errors, at least we tried ... */
rc = dt_sync(env, ofd->ofd_osd);
force = 2;
ofd_grant_check(env, exp, oa, rnb, niocount, &left);
if (!(oa->o_valid & OBD_MD_FLGRANT)) {
- cfs_spin_unlock(&ofd->ofd_grant_lock);
+ spin_unlock(&ofd->ofd_grant_lock);
RETURN_EXIT;
}
ofd_grant_shrink(exp, oa, left);
else
/* grant more space back to the client if possible */
- oa->o_grant = ofd_grant(exp, oa->o_grant, oa->o_undirty, left);
- cfs_spin_unlock(&ofd->ofd_grant_lock);
+ oa->o_grant = ofd_grant(exp, oa->o_grant, oa->o_undirty, left,
+ true);
+ spin_unlock(&ofd->ofd_grant_lock);
}
/**
struct filter_export_data *fed = &exp->exp_filter_data;
obd_size left = 0;
unsigned long wanted;
-
ENTRY;
info->fti_used = 0;
ofd_grant_statfs(env, exp, 1, NULL);
/* protect all grant counters */
- cfs_spin_lock(&ofd->ofd_grant_lock);
+ spin_lock(&ofd->ofd_grant_lock);
/* fail precreate request if there is not enough blocks available for
* writing */
if (ofd->ofd_osfs.os_bavail - (fed->fed_grant >> ofd->ofd_blockbits) <
(ofd->ofd_osfs.os_blocks >> 10)) {
- cfs_spin_unlock(&ofd->ofd_grant_lock);
+ spin_unlock(&ofd->ofd_grant_lock);
CDEBUG(D_RPCTRACE, "%s: not enough space for create "LPU64"\n",
- ofd_obd(ofd)->obd_name,
+ ofd_name(ofd),
ofd->ofd_osfs.os_bavail * ofd->ofd_osfs.os_blocks);
RETURN(-ENOSPC);
}
if (*nr == 0) {
/* we really have no space any more for precreation,
* fail the precreate request with ENOSPC */
- cfs_spin_unlock(&ofd->ofd_grant_lock);
+ spin_unlock(&ofd->ofd_grant_lock);
RETURN(-ENOSPC);
}
/* compute space needed for the new number of creations */
fed->fed_pending += info->fti_used;
ofd->ofd_tot_pending += info->fti_used;
- /* grant more space (twice as much as needed for this request) for
- * precreate purpose if possible */
- ofd_grant(exp, fed->fed_grant, wanted * 2, left);
- cfs_spin_unlock(&ofd->ofd_grant_lock);
+ /* grant more space for precreate purpose if possible. */
+ wanted = OST_MAX_PRECREATE * ofd->ofd_dt_conf.ddp_inodespace / 2;
+ if (wanted > fed->fed_grant) {
+ /* always try to book enough space to handle a large precreate
+ * request */
+ wanted -= fed->fed_grant;
+ ofd_grant(exp, fed->fed_grant, wanted, left, false);
+ }
+ spin_unlock(&ofd->ofd_grant_lock);
RETURN(0);
}
if (pending == 0)
RETURN_EXIT;
- cfs_spin_lock(&ofd->ofd_grant_lock);
+ spin_lock(&ofd->ofd_grant_lock);
/* Don't update statfs data for errors raised before commit (e.g.
* bulk transfer failed, ...) since we know those writes have not been
* processed. For other errors hit during commit, we cannot really tell
* In any case, this should not be fatal since we always get fresh
* statfs data before failing a request with ENOSPC */
if (rc == 0) {
- cfs_spin_lock(&ofd->ofd_osfs_lock);
+ spin_lock(&ofd->ofd_osfs_lock);
/* Take pending out of cached statfs data */
ofd->ofd_osfs.os_bavail -= min_t(obd_size,
ofd->ofd_osfs.os_bavail,
/* someone is running statfs and want to be notified of
* writes happening meanwhile */
ofd->ofd_osfs_inflight += pending;
- cfs_spin_unlock(&ofd->ofd_osfs_lock);
+ spin_unlock(&ofd->ofd_osfs_lock);
}
if (exp->exp_filter_data.fed_pending < pending) {
CERROR("%s: cli %s/%p fed_pending(%lu) < grant_used(%lu)\n",
exp->exp_obd->obd_name, exp->exp_client_uuid.uuid, exp,
exp->exp_filter_data.fed_pending, pending);
- cfs_spin_unlock(&ofd->ofd_grant_lock);
+ spin_unlock(&ofd->ofd_grant_lock);
LBUG();
}
exp->exp_filter_data.fed_pending -= pending;
"\n", exp->exp_obd->obd_name,
exp->exp_client_uuid.uuid, exp, ofd->ofd_tot_granted,
pending);
- cfs_spin_unlock(&ofd->ofd_grant_lock);
+ spin_unlock(&ofd->ofd_grant_lock);
LBUG();
}
ofd->ofd_tot_granted -= pending;
CERROR("%s: cli %s/%p tot_pending("LPU64") < grant_used(%lu)"
"\n", exp->exp_obd->obd_name, exp->exp_client_uuid.uuid,
exp, ofd->ofd_tot_pending, pending);
- cfs_spin_unlock(&ofd->ofd_grant_lock);
+ spin_unlock(&ofd->ofd_grant_lock);
LBUG();
}
ofd->ofd_tot_pending -= pending;
- cfs_spin_unlock(&ofd->ofd_grant_lock);
+ spin_unlock(&ofd->ofd_grant_lock);
EXIT;
}