From: Johann Lombardi Date: Mon, 26 Jan 2015 16:04:51 +0000 (+0100) Subject: LU-2049 grant: delay grant releasing until commit X-Git-Tag: 2.7.62~15 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=31b7404f436241436fb0abdec2b6cd678c674d82;ds=sidebyside LU-2049 grant: delay grant releasing until commit Grant space acquired for a bulk write is released from the grant accounting at the end of request processing. At that point, the additional space consumed by the write request is believed to be taken into account in any subsequent statfs call. However, it does not seem to be the case with all backend filesystems and more particularly ZFS which seems to provide reliable space information only once the transaction associated with the bulk write has committed. This creates a hole in the grant space management where we can end up allocating more grant space than really available. This patch postpones grant releasing until transaction commit time. This is done by registering a commit callback in charge of this operation. The patch also removes the implicit use of info->fti_used and stores the amount of grant space to be released in obdo::o_grant_used. Signed-off-by: Johann Lombardi Change-Id: Id99b8712ffc1e5f103df4835b698127619b8ba85 Signed-off-by: Jinshan Xiong Reviewed-on: http://review.whamcloud.com/13531 Reviewed-by: Andreas Dilger Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Oleg Drokin --- diff --git a/lustre/ofd/ofd_dev.c b/lustre/ofd/ofd_dev.c index f45388a..c278b91 100644 --- a/lustre/ofd/ofd_dev.c +++ b/lustre/ofd/ofd_dev.c @@ -1561,6 +1561,7 @@ static int ofd_create_hdl(struct tgt_session_info *tsi) struct ofd_seq *oseq; int rc = 0, diff; int sync_trans = 0; + long granted = 0; ENTRY; @@ -1698,10 +1699,12 @@ static int ofd_create_hdl(struct tgt_session_info *tsi) if (!(oa->o_valid & OBD_MD_FLFLAGS) || !(oa->o_flags & OBD_FL_DELORPHAN)) { /* don't enforce grant during orphan recovery */ - rc = ofd_grant_create(tsi->tsi_env, - ofd_obd(ofd)->obd_self_export, - &diff); - if (rc) { + granted = ofd_grant_create(tsi->tsi_env, + ofd_obd(ofd)->obd_self_export, + &diff); + if (granted < 0) { + rc = granted; + granted = 0; CDEBUG(D_HA, "%s: failed to acquire grant " "space for precreate (%d): rc = %d\n", ofd_name(ofd), diff, rc); @@ -1768,9 +1771,11 @@ static int ofd_create_hdl(struct tgt_session_info *tsi) ofd_name(ofd), rc); if (!(oa->o_valid & OBD_MD_FLFLAGS) || - !(oa->o_flags & OBD_FL_DELORPHAN)) - ofd_grant_commit(tsi->tsi_env, - ofd_obd(ofd)->obd_self_export, rc); + !(oa->o_flags & OBD_FL_DELORPHAN)) { + ofd_grant_commit(ofd_obd(ofd)->obd_self_export, granted, + rc); + granted = 0; + } ostid_set_id(&rep_oa->o_oi, ofd_seq_last_oid(oseq)); } diff --git a/lustre/ofd/ofd_grant.c b/lustre/ofd/ofd_grant.c index fd2f101..f982487 100644 --- a/lustre/ofd/ofd_grant.c +++ b/lustre/ofd/ofd_grant.c @@ -173,6 +173,45 @@ void ofd_grant_sanity_check(struct obd_device *obd, const char *func) tot_pending += fed->fed_pending; tot_dirty += fed->fed_dirty; } + + /* exports about to be unlinked should also be taken into account since + * they might still hold pending grant space to be released at + * commit time */ + list_for_each_entry(exp, &obd->obd_unlinked_exports, exp_obd_chain) { + struct filter_export_data *fed; + int error = 0; + + fed = &exp->exp_filter_data; + + if (fed->fed_grant < 0 || fed->fed_pending < 0 || + fed->fed_dirty < 0) + error = 1; + if (fed->fed_grant + fed->fed_pending > maxsize) { + CERROR("%s: cli %s/%p fed_grant(%ld) + fed_pending(%ld)" + " > maxsize("LPU64")\n", obd->obd_name, + exp->exp_client_uuid.uuid, exp, fed->fed_grant, + fed->fed_pending, maxsize); + spin_unlock(&obd->obd_dev_lock); + spin_unlock(&ofd->ofd_grant_lock); + LBUG(); + } + if (fed->fed_dirty > maxsize) { + CERROR("%s: cli %s/%p fed_dirty(%ld) > maxsize("LPU64 + ")\n", obd->obd_name, exp->exp_client_uuid.uuid, + exp, fed->fed_dirty, maxsize); + spin_unlock(&obd->obd_dev_lock); + spin_unlock(&ofd->ofd_grant_lock); + LBUG(); + } + CDEBUG_LIMIT(error ? D_ERROR : D_CACHE, "%s: cli %s/%p dirty " + "%ld pend %ld grant %ld\n", obd->obd_name, + exp->exp_client_uuid.uuid, exp, fed->fed_dirty, + fed->fed_pending, fed->fed_grant); + tot_granted += fed->fed_grant + fed->fed_pending; + tot_pending += fed->fed_pending; + tot_dirty += fed->fed_dirty; + } + spin_unlock(&obd->obd_dev_lock); fo_tot_granted = ofd->ofd_tot_granted; fo_tot_pending = ofd->ofd_tot_pending; @@ -595,7 +634,10 @@ static void ofd_grant_check(const struct lu_env *env, struct obd_export *exp, exp->exp_client_uuid.uuid, exp, i, bytes); } - /* record space used for the I/O, will be used in ofd_grant_commmit() */ + /* record in o_grant_used the actual space reserved for the I/O, will be + * used later in ofd_grant_commmit() */ + oa->o_grant_used = granted + ungranted; + /* Now substract what the clients has used already. We don't subtract * this from the tot_granted yet, so that other client's can't grab * that space before we have actually allocated our blocks. That @@ -603,9 +645,9 @@ static void ofd_grant_check(const struct lu_env *env, struct obd_export *exp, info->fti_used = granted + ungranted; *left -= ungranted; fed->fed_grant -= granted; - fed->fed_pending += info->fti_used; + fed->fed_pending += oa->o_grant_used; ofd->ofd_tot_granted += ungranted; - ofd->ofd_tot_pending += info->fti_used; + ofd->ofd_tot_pending += oa->o_grant_used; CDEBUG(D_CACHE, "%s: cli %s/%p granted: %lu ungranted: %lu grant: %lu dirty: %lu" @@ -829,7 +871,7 @@ void ofd_grant_discard(struct obd_export *exp) obd->obd_name, ofd->ofd_tot_pending, exp->exp_client_uuid.uuid, exp, fed->fed_pending); /* ofd_tot_pending is handled in ofd_grant_commit as bulk - * finishes */ + * commmits */ LASSERTF(ofd->ofd_tot_dirty >= fed->fed_dirty, "%s: tot_dirty "LPU64" cli %s/%p fed_dirty %ld\n", obd->obd_name, ofd->ofd_tot_dirty, @@ -1022,20 +1064,18 @@ refresh: * export currently) * \param[in] nr number of objects to be created * - * \retval 0 for success + * \retval >= 0 amount of grant space allocated to the precreate request * \retval -ENOSPC on failure */ -int ofd_grant_create(const struct lu_env *env, struct obd_export *exp, int *nr) +long ofd_grant_create(const struct lu_env *env, struct obd_export *exp, int *nr) { - struct ofd_thread_info *info = ofd_info(env); struct ofd_device *ofd = ofd_exp(exp); struct filter_export_data *fed = &exp->exp_filter_data; u64 left = 0; unsigned long wanted; + unsigned long granted; ENTRY; - info->fti_used = 0; - if (exp->exp_obd->obd_recovering || ofd->ofd_dt_conf.ddp_inodespace == 0) /* don't enforce grant during recovery */ @@ -1090,9 +1130,9 @@ int ofd_grant_create(const struct lu_env *env, struct obd_export *exp, int *nr) left -= wanted - fed->fed_grant; fed->fed_grant = 0; } - info->fti_used = wanted; - fed->fed_pending += info->fti_used; - ofd->ofd_tot_pending += info->fti_used; + granted = wanted; + fed->fed_pending += granted; + ofd->ofd_tot_pending += granted; /* grant more space for precreate purpose if possible. */ wanted = OST_MAX_PRECREATE * ofd->ofd_dt_conf.ddp_inodespace / 2; @@ -1103,7 +1143,7 @@ int ofd_grant_create(const struct lu_env *env, struct obd_export *exp, int *nr) ofd_grant_alloc(exp, fed->fed_grant, wanted, left, false); } spin_unlock(&ofd->ofd_grant_lock); - RETURN(0); + RETURN(granted); } /** @@ -1111,22 +1151,18 @@ int ofd_grant_create(const struct lu_env *env, struct obd_export *exp, int *nr) * * Update pending grant counter once buffers have been written to the disk. * - * \param[in] env LU environment provided by the caller * \param[in] exp export of the client which sent the request + * \param[in] pending amount of reserved space to be released * \param[in] rc return code of pre-commit operations */ -void ofd_grant_commit(const struct lu_env *env, struct obd_export *exp, +void ofd_grant_commit(struct obd_export *exp, unsigned long pending, int rc) { struct ofd_device *ofd = ofd_exp(exp); - struct ofd_thread_info *info = ofd_info(env); - unsigned long pending; - ENTRY; /* get space accounted in tot_pending for the I/O, set in * ofd_grant_check() */ - pending = info->fti_used; if (pending == 0) RETURN_EXIT; @@ -1180,3 +1216,75 @@ void ofd_grant_commit(const struct lu_env *env, struct obd_export *exp, spin_unlock(&ofd->ofd_grant_lock); EXIT; } + +struct ofd_grant_cb { + /* commit callback structure */ + struct dt_txn_commit_cb ogc_cb; + /* export associated with the bulk write */ + struct obd_export *ogc_exp; + /* pending grant to be released */ + unsigned long ogc_granted; +}; + +/** + * Callback function for grant releasing + * + * Release grant space reserved by the client node. + * + * \param[in] env execution environment + * \param[in] th transaction handle + * \param[in] cb callback data + * \param[in] err error code + */ +static void ofd_grant_commit_cb(struct lu_env *env, struct thandle *th, + struct dt_txn_commit_cb *cb, int err) +{ + struct ofd_grant_cb *ogc; + + ogc = container_of(cb, struct ofd_grant_cb, ogc_cb); + + ofd_grant_commit(ogc->ogc_exp, ogc->ogc_granted, err); + class_export_cb_put(ogc->ogc_exp); + OBD_FREE_PTR(ogc); +} + +/** + * Add callback for grant releasing + * + * Register a commit callback to release grant space. + * + * \param[in] th transaction handle + * \param[in] exp OBD export of client + * \param[in] granted amount of grant space to be released upon commit + * + * \retval 0 on successful callback adding + * \retval negative value on error + */ +int ofd_grant_commit_cb_add(struct thandle *th, struct obd_export *exp, + unsigned long granted) +{ + struct ofd_grant_cb *ogc; + struct dt_txn_commit_cb *dcb; + int rc; + ENTRY; + + OBD_ALLOC_PTR(ogc); + if (ogc == NULL) + RETURN(-ENOMEM); + + ogc->ogc_exp = class_export_cb_get(exp); + ogc->ogc_granted = granted; + + dcb = &ogc->ogc_cb; + dcb->dcb_func = ofd_grant_commit_cb; + INIT_LIST_HEAD(&dcb->dcb_linkage); + strlcpy(dcb->dcb_name, "ofd_grant_commit_cb", sizeof(dcb->dcb_name)); + + rc = dt_trans_cb_add(th, dcb); + if (rc) { + class_export_cb_put(ogc->ogc_exp); + OBD_FREE_PTR(ogc); + } + + RETURN(rc); +} diff --git a/lustre/ofd/ofd_internal.h b/lustre/ofd/ofd_internal.h index 9891730..38df4e1 100644 --- a/lustre/ofd/ofd_internal.h +++ b/lustre/ofd/ofd_internal.h @@ -141,7 +141,8 @@ struct ofd_device { struct obd_statfs ofd_osfs; __u64 ofd_osfs_age; int ofd_blockbits; - /* writes which might be be accounted twice in ofd_osfs.os_bavail */ + /* writes between prep & commit which might be accounted twice in + * ofd_osfs.os_bavail */ u64 ofd_osfs_unstable; /* counters used during statfs update, protected by ofd_osfs_lock. @@ -479,8 +480,11 @@ void ofd_grant_prepare_read(const struct lu_env *env, struct obd_export *exp, void ofd_grant_prepare_write(const struct lu_env *env, struct obd_export *exp, struct obdo *oa, struct niobuf_remote *rnb, int niocount); -void ofd_grant_commit(const struct lu_env *env, struct obd_export *exp, int rc); -int ofd_grant_create(const struct lu_env *env, struct obd_export *exp, int *nr); +void ofd_grant_commit(struct obd_export *exp, unsigned long grant_used, int rc); +int ofd_grant_commit_cb_add(struct thandle *th, struct obd_export *exp, + unsigned long grant); +long ofd_grant_create(const struct lu_env *env, struct obd_export *exp, + int *nr); /* ofd_fmd.c */ int ofd_fmd_init(void); diff --git a/lustre/ofd/ofd_io.c b/lustre/ofd/ofd_io.c index 892abb8..9c887fe 100644 --- a/lustre/ofd/ofd_io.c +++ b/lustre/ofd/ofd_io.c @@ -654,7 +654,7 @@ err: ofd_read_unlock(env, fo); ofd_object_put(env, fo); /* ofd_grant_prepare_write() was called, so we must commit */ - ofd_grant_commit(env, exp, rc); + ofd_grant_commit(exp, oa->o_grant_used, rc); out: /* let's still process incoming grant information packed in the oa, * but without enforcing grant since we won't proceed with the write. @@ -999,6 +999,7 @@ static int ofd_soft_sync_cb_add(struct thandle *th, struct obd_export *exp) * \param[in] objcount always 1 * \param[in] niocount number of local buffers * \param[in] lnb local buffers + * \param[in] granted grant space consumed for the bulk I/O * \param[in] old_rc result of processing at this point * * \retval 0 on successful commit @@ -1008,9 +1009,9 @@ static int ofd_commitrw_write(const struct lu_env *env, struct obd_export *exp, struct ofd_device *ofd, const struct lu_fid *fid, struct lu_attr *la, struct filter_fid *ff, int objcount, - int niocount, struct niobuf_local *lnb, int old_rc) + int niocount, struct niobuf_local *lnb, + unsigned long granted, int old_rc) { - struct ofd_thread_info *info = ofd_info(env); struct ofd_object *fo; struct dt_object *o; struct thandle *th; @@ -1107,6 +1108,11 @@ out_stop: cb_registered = true; } + if (rc == 0 && granted > 0) { + if (ofd_grant_commit_cb_add(th, exp, granted) == 0) + granted = 0; + } + ofd_trans_stop(env, ofd, th, rc); if (rc == -ENOSPC && retries++ < 3) { CDEBUG(D_INODE, "retry after force commit, retries:%d\n", @@ -1127,7 +1133,8 @@ out: ofd_object_put(env, fo); /* second put is pair to object_get in ofd_preprw_write */ ofd_object_put(env, fo); - ofd_grant_commit(env, info->fti_exp, old_rc); + if (granted > 0) + ofd_grant_commit(exp, granted, old_rc); RETURN(rc); } @@ -1190,7 +1197,8 @@ int ofd_commitrw(const struct lu_env *env, int cmd, struct obd_export *exp, } rc = ofd_commitrw_write(env, exp, ofd, fid, &info->fti_attr, - ff, objcount, npages, lnb, old_rc); + ff, objcount, npages, lnb, + oa->o_grant_used, old_rc); if (rc == 0) obdo_from_la(oa, &info->fti_attr, OFD_VALID_FLAGS | LA_GID | LA_UID); diff --git a/lustre/ofd/ofd_obd.c b/lustre/ofd/ofd_obd.c index 8bc5f50..cc73792 100644 --- a/lustre/ofd/ofd_obd.c +++ b/lustre/ofd/ofd_obd.c @@ -722,7 +722,7 @@ int ofd_statfs_internal(const struct lu_env *env, struct ofd_device *ofd, unstable = ofd->ofd_osfs_inflight - unstable; ofd->ofd_osfs_unstable = 0; if (unstable) { - /* some writes completed while we were running statfs + /* some writes committed while we were running statfs * w/o the ofd_osfs_lock. Those ones got added to * the cached statfs data that we are about to crunch. * Take them into account in the new statfs data */ @@ -1072,6 +1072,7 @@ static int ofd_echo_create(const struct lu_env *env, struct obd_export *exp, u64 seq = ostid_seq(&oa->o_oi); struct ofd_seq *oseq; int rc = 0, diff = 1; + long granted; u64 next_id; int count; @@ -1099,8 +1100,10 @@ static int ofd_echo_create(const struct lu_env *env, struct obd_export *exp, } mutex_lock(&oseq->os_create_lock); - rc = ofd_grant_create(env, ofd_obd(ofd)->obd_self_export, &diff); - if (rc < 0) { + granted = ofd_grant_create(env, ofd_obd(ofd)->obd_self_export, &diff); + if (granted < 0) { + rc = granted; + granted = 0; CDEBUG(D_HA, "%s: failed to acquire grant space for " "precreate (%d): rc = %d\n", ofd_name(ofd), diff, rc); diff = 0; @@ -1120,7 +1123,7 @@ static int ofd_echo_create(const struct lu_env *env, struct obd_export *exp, rc = 0; } - ofd_grant_commit(env, ofd_obd(ofd)->obd_self_export, rc); + ofd_grant_commit(ofd_obd(ofd)->obd_self_export, granted, rc); out: mutex_unlock(&oseq->os_create_lock); ofd_seq_put(env, oseq);