From e9389613eb29297f7b5a6b6cd896bafc7a5551f8 Mon Sep 17 00:00:00 2001 From: Mikhal Pershin Date: Tue, 6 Dec 2016 14:06:48 +0300 Subject: [PATCH] LU-9184 grants: move grant code to the target Make grants code common for MDT and OST. - move grants field from OFD to the lu_target - same for statfs - make ofd_grants.c independent from OFD - move ofd_grants.c to the tgt_grants.c This work was done for Data-on-MDT feature but it is needed also for better space control on MDT as well, e.g. for lfsck, HSM, etc. Signed-off-by: Mikhal Pershin Change-Id: I43930b32b271c9e850aa2783795e7cce12830cd5 Reviewed-on: https://review.whamcloud.com/25820 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Niu Yawei Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- lustre/include/lu_target.h | 65 ++ lustre/include/lustre_export.h | 9 +- lustre/ofd/Makefile.in | 2 +- lustre/ofd/lproc_ofd.c | 37 +- lustre/ofd/ofd_dev.c | 91 +-- lustre/ofd/ofd_internal.h | 76 --- lustre/ofd/ofd_io.c | 14 +- lustre/ofd/ofd_obd.c | 168 +---- lustre/ptlrpc/Makefile.in | 4 +- lustre/target/Makefile.am | 2 +- lustre/{ofd/ofd_grant.c => target/tgt_grant.c} | 904 ++++++++++++++----------- lustre/target/tgt_handler.c | 1 - lustre/target/tgt_internal.h | 1 + 13 files changed, 689 insertions(+), 685 deletions(-) rename lustre/{ofd/ofd_grant.c => target/tgt_grant.c} (60%) diff --git a/lustre/include/lu_target.h b/lustre/include/lu_target.h index 4464c74..307fff8 100644 --- a/lustre/include/lu_target.h +++ b/lustre/include/lu_target.h @@ -116,9 +116,42 @@ struct target_distribute_txn_data { void *tdtd_show_retrievers_cbdata; }; +struct tg_grants_data { + /* grants: all values in bytes */ + /* grant lock to protect all grant counters */ + spinlock_t tgd_grant_lock; + /* total amount of dirty data reported by clients in incoming obdo */ + u64 tgd_tot_dirty; + /* sum of filesystem space granted to clients for async writes */ + u64 tgd_tot_granted; + /* grant used by I/Os in progress (between prepare and commit) */ + u64 tgd_tot_pending; + /* number of clients using grants */ + int tgd_tot_granted_clients; + /* shall we grant space to clients not + * supporting OBD_CONNECT_GRANT_PARAM? */ + int tgd_grant_compat_disable; + /* protect all statfs-related counters */ + spinlock_t tgd_osfs_lock; + __u64 tgd_osfs_age; + int tgd_blockbits; + /* counters used during statfs update, protected by ofd_osfs_lock. + * record when some statfs refresh are in progress */ + int tgd_statfs_inflight; + /* writes between prep & commit which might be accounted twice in + * ofd_osfs.os_bavail */ + u64 tgd_osfs_unstable; + /* track writes completed while statfs refresh is underway. + * tracking is only effective when ofd_statfs_inflight > 1 */ + u64 tgd_osfs_inflight; + /* statfs optimization: we cache a bit */ + struct obd_statfs tgd_osfs; +}; + struct lu_target { struct obd_device *lut_obd; struct dt_device *lut_bottom; + struct dt_device_param lut_dt_conf; struct target_distribute_txn_data *lut_tdtd; struct ptlrpc_thread lut_tdtd_commit_thread; @@ -165,6 +198,9 @@ struct lu_target { /** cross MDT locks which should trigger Sync-on-Lock-Cancel */ spinlock_t lut_slc_locks_guard; struct list_head lut_slc_locks; + + /* target grants fields */ + struct tg_grants_data lut_tgd; }; /* number of slots in reply bitmap */ @@ -459,6 +495,35 @@ int tgt_add_reply_data(const struct lu_env *env, struct lu_target *tgt, struct tg_reply_data *tgt_lookup_reply_by_xid(struct tg_export_data *ted, __u64 xid); +/* target/tgt_grant.c */ +static inline int exp_grant_param_supp(struct obd_export *exp) +{ + return !!(exp_connect_flags(exp) & OBD_CONNECT_GRANT_PARAM); +} + +/* Blocksize used for client not supporting OBD_CONNECT_GRANT_PARAM. + * That's 4KB=2^12 which is the biggest block size known to work whatever + * the client's page size is. */ +#define COMPAT_BSIZE_SHIFT 12 + +void tgt_grant_sanity_check(struct obd_device *obd, const char *func); +void tgt_grant_connect(const struct lu_env *env, struct obd_export *exp, + struct obd_connect_data *data, bool new_conn); +void tgt_grant_discard(struct obd_export *exp); +void tgt_grant_prepare_read(const struct lu_env *env, struct obd_export *exp, + struct obdo *oa); +void tgt_grant_prepare_write(const struct lu_env *env, struct obd_export *exp, + struct obdo *oa, struct niobuf_remote *rnb, + int niocount); +void tgt_grant_commit(struct obd_export *exp, unsigned long grant_used, int rc); +int tgt_grant_commit_cb_add(struct thandle *th, struct obd_export *exp, + unsigned long grant); +long tgt_grant_create(const struct lu_env *env, struct obd_export *exp, + int *nr); +int tgt_statfs_internal(const struct lu_env *env, struct lu_target *lut, + struct obd_statfs *osfs, __u64 max_age, + int *from_cache); + /* target/update_trans.c */ int distribute_txn_init(const struct lu_env *env, struct lu_target *lut, diff --git a/lustre/include/lustre_export.h b/lustre/include/lustre_export.h index 410b185..b5ca7f7 100644 --- a/lustre/include/lustre_export.h +++ b/lustre/include/lustre_export.h @@ -94,6 +94,11 @@ struct tg_export_data { int ted_reply_max; /* high water mark */ int ted_release_xid; int ted_release_tag; + /* grants */ + long ted_dirty; /* in bytes */ + long ted_grant; /* in bytes */ + long ted_pending; /* bytes just being written */ + __u8 ted_pagebits; /* log2 of client page size */ }; /** @@ -116,16 +121,12 @@ struct filter_export_data { struct tg_export_data fed_ted; spinlock_t fed_lock; /**< protects fed_mod_list */ __u64 fed_lastid_gen; - long fed_dirty; /* in bytes */ - long fed_grant; /* in bytes */ struct list_head fed_mod_list; /* files being modified */ - long fed_pending; /* bytes just being written */ /* count of SOFT_SYNC RPCs, which will be reset after * ofd_soft_sync_limit number of RPCs, and trigger a sync. */ atomic_t fed_soft_sync_count; int fed_mod_count;/* items in fed_writing list */ __u32 fed_group; - __u8 fed_pagebits; /* log2 of client page size */ }; struct mgs_export_data { diff --git a/lustre/ofd/Makefile.in b/lustre/ofd/Makefile.in index 3d624f9..1d10108 100644 --- a/lustre/ofd/Makefile.in +++ b/lustre/ofd/Makefile.in @@ -1,7 +1,7 @@ MODULES := ofd ofd-objs := ofd_dev.o ofd_obd.o ofd_fs.o ofd_trans.o ofd_objects.o ofd_io.o -ofd-objs += lproc_ofd.o ofd_fmd.o ofd_grant.o ofd_dlm.o ofd_lvb.o +ofd-objs += lproc_ofd.o ofd_fmd.o ofd_dlm.o ofd_lvb.o EXTRA_DIST = $(ofd-objs:%.o=%.c) ofd_internal.h diff --git a/lustre/ofd/lproc_ofd.c b/lustre/ofd/lproc_ofd.c index 19abd74..7609167 100644 --- a/lustre/ofd/lproc_ofd.c +++ b/lustre/ofd/lproc_ofd.c @@ -81,11 +81,11 @@ LPROC_SEQ_FOPS_RO(ofd_seqs); static int ofd_tot_dirty_seq_show(struct seq_file *m, void *data) { struct obd_device *obd = m->private; - struct ofd_device *ofd; + struct tg_grants_data *tgd; LASSERT(obd != NULL); - ofd = ofd_dev(obd->obd_lu_dev); - seq_printf(m, "%llu\n", ofd->ofd_tot_dirty); + tgd = &obd->u.obt.obt_lut->lut_tgd; + seq_printf(m, "%llu\n", tgd->tgd_tot_dirty); return 0; } LPROC_SEQ_FOPS_RO(ofd_tot_dirty); @@ -102,11 +102,11 @@ LPROC_SEQ_FOPS_RO(ofd_tot_dirty); static int ofd_tot_granted_seq_show(struct seq_file *m, void *data) { struct obd_device *obd = m->private; - struct ofd_device *ofd; + struct tg_grants_data *tgd; LASSERT(obd != NULL); - ofd = ofd_dev(obd->obd_lu_dev); - seq_printf(m, "%llu\n", ofd->ofd_tot_granted); + tgd = &obd->u.obt.obt_lut->lut_tgd; + seq_printf(m, "%llu\n", tgd->tgd_tot_granted); return 0; } LPROC_SEQ_FOPS_RO(ofd_tot_granted); @@ -123,11 +123,11 @@ LPROC_SEQ_FOPS_RO(ofd_tot_granted); static int ofd_tot_pending_seq_show(struct seq_file *m, void *data) { struct obd_device *obd = m->private; - struct ofd_device *ofd; + struct tg_grants_data *tgd; LASSERT(obd != NULL); - ofd = ofd_dev(obd->obd_lu_dev); - seq_printf(m, "%llu\n", ofd->ofd_tot_pending); + tgd = &obd->u.obt.obt_lut->lut_tgd; + seq_printf(m, "%llu\n", tgd->tgd_tot_pending); return 0; } LPROC_SEQ_FOPS_RO(ofd_tot_pending); @@ -147,7 +147,7 @@ static int ofd_grant_precreate_seq_show(struct seq_file *m, void *data) LASSERT(obd != NULL); seq_printf(m, "%ld\n", - obd->obd_self_export->exp_filter_data.fed_grant); + obd->obd_self_export->exp_target_data.ted_grant); return 0; } LPROC_SEQ_FOPS_RO(ofd_grant_precreate); @@ -532,7 +532,8 @@ ofd_brw_size_seq_write(struct file *file, const char __user *buffer, if (val <= 0) return -EINVAL; - if (val > DT_MAX_BRW_SIZE || val < (1 << ofd->ofd_blockbits)) + if (val > DT_MAX_BRW_SIZE || + val < (1 << ofd->ofd_lut.lut_tgd.tgd_blockbits)) return -ERANGE; spin_lock(&ofd->ofd_flags_lock); @@ -635,7 +636,7 @@ LPROC_SEQ_FOPS(ofd_sync_lock_cancel); /** * Show if grants compatibility mode is disabled. * - * When ofd_grant_compat_disable is set, we don't grant any space to clients + * When tgd_grant_compat_disable is set, we don't grant any space to clients * not supporting OBD_CONNECT_GRANT_PARAM. Otherwise, space granted to such * a client is inflated since it consumes PAGE_SIZE of grant space per * block, (i.e. typically 4kB units), but underlaying file system might have @@ -650,16 +651,16 @@ LPROC_SEQ_FOPS(ofd_sync_lock_cancel); static int ofd_grant_compat_disable_seq_show(struct seq_file *m, void *data) { struct obd_device *obd = m->private; - struct ofd_device *ofd = ofd_dev(obd->obd_lu_dev); + struct tg_grants_data *tgd = &obd->u.obt.obt_lut->lut_tgd; - seq_printf(m, "%u\n", ofd->ofd_grant_compat_disable); + seq_printf(m, "%u\n", tgd->tgd_grant_compat_disable); return 0; } /** * Change grant compatibility mode. * - * Setting ofd_grant_compat_disable prohibit any space granting to clients + * Setting tgd_grant_compat_disable prohibit any space granting to clients * not supporting OBD_CONNECT_GRANT_PARAM. See details above. * * \param[in] file proc file @@ -679,7 +680,7 @@ ofd_grant_compat_disable_seq_write(struct file *file, { struct seq_file *m = file->private_data; struct obd_device *obd = m->private; - struct ofd_device *ofd = ofd_dev(obd->obd_lu_dev); + struct tg_grants_data *tgd = &obd->u.obt.obt_lut->lut_tgd; __s64 val; int rc; @@ -690,9 +691,7 @@ ofd_grant_compat_disable_seq_write(struct file *file, if (val < 0) return -EINVAL; - spin_lock(&ofd->ofd_flags_lock); - ofd->ofd_grant_compat_disable = !!val; - spin_unlock(&ofd->ofd_flags_lock); + tgd->tgd_grant_compat_disable = !!val; return count; } diff --git a/lustre/ofd/ofd_dev.c b/lustre/ofd/ofd_dev.c index 2f91b17..c654a30 100644 --- a/lustre/ofd/ofd_dev.c +++ b/lustre/ofd/ofd_dev.c @@ -647,14 +647,14 @@ static int ofd_recovery_complete(const struct lu_env *env, * Grant space for object precreation on the self export. * The initial reserved space (i.e. 10MB for zfs and 280KB for ldiskfs) * is enough to create 10k objects. More space is then acquired for - * precreation in ofd_grant_create(). + * precreation in tgt_grant_create(). */ memset(&oti->fti_ocd, 0, sizeof(oti->fti_ocd)); oti->fti_ocd.ocd_grant = OST_MAX_PRECREATE / 2; - oti->fti_ocd.ocd_grant *= ofd->ofd_dt_conf.ddp_inodespace; + oti->fti_ocd.ocd_grant *= ofd->ofd_lut.lut_dt_conf.ddp_inodespace; oti->fti_ocd.ocd_connect_flags = OBD_CONNECT_GRANT | OBD_CONNECT_GRANT_PARAM; - ofd_grant_connect(env, dev->ld_obd->obd_self_export, &oti->fti_ocd, + tgt_grant_connect(env, dev->ld_obd->obd_self_export, &oti->fti_ocd, true); rc = next->ld_ops->ldo_recovery_complete(env, next); RETURN(rc); @@ -939,7 +939,7 @@ static int ofd_set_info_hdl(struct tgt_session_info *tsi) *repbody = *body; /** handle grant shrink, similar to a read request */ - ofd_grant_prepare_read(tsi->tsi_env, tsi->tsi_exp, + tgt_grant_prepare_read(tsi->tsi_env, tsi->tsi_exp, &repbody->oa); } else if (KEY_IS(KEY_EVICT_BY_NID)) { if (vallen > 0) @@ -1698,9 +1698,9 @@ static int ofd_create_hdl(struct tgt_session_info *tsi) if (!(oa->o_valid & OBD_MD_FLFLAGS) || !(oa->o_flags & OBD_FL_DELORPHAN)) { /* don't enforce grant during orphan recovery */ - granted = ofd_grant_create(tsi->tsi_env, - ofd_obd(ofd)->obd_self_export, - &diff); + granted = tgt_grant_create(tsi->tsi_env, + ofd_obd(ofd)->obd_self_export, + &diff); if (granted < 0) { rc = granted; granted = 0; @@ -1776,8 +1776,8 @@ static int ofd_create_hdl(struct tgt_session_info *tsi) if (!(oa->o_valid & OBD_MD_FLFLAGS) || !(oa->o_flags & OBD_FL_DELORPHAN)) { - ofd_grant_commit(ofd_obd(ofd)->obd_self_export, granted, - rc); + tgt_grant_commit(ofd_obd(ofd)->obd_self_export, + granted, rc); granted = 0; } @@ -2857,7 +2857,6 @@ static void ofd_key_exit(const struct lu_context *ctx, info->fti_xid = 0; info->fti_pre_version = 0; - info->fti_used = 0; memset(&info->fti_attr, 0, sizeof info->fti_attr); } @@ -2890,6 +2889,7 @@ static int ofd_init0(const struct lu_env *env, struct ofd_device *m, const char *dev = lustre_cfg_string(cfg, 0); struct ofd_thread_info *info = NULL; struct obd_device *obd; + struct tg_grants_data *tgd = &m->ofd_lut.lut_tgd; struct obd_statfs *osfs; struct lu_fid fid; struct nm_config_file *nodemap_config; @@ -2918,21 +2918,22 @@ static int ofd_init0(const struct lu_env *env, struct ofd_device *m, m->ofd_raid_degraded = 0; m->ofd_syncjournal = 0; ofd_slc_set(m); - m->ofd_grant_compat_disable = 0; + tgd->tgd_grant_compat_disable = 0; m->ofd_soft_sync_limit = OFD_SOFT_SYNC_LIMIT_DEFAULT; /* statfs data */ - spin_lock_init(&m->ofd_osfs_lock); - m->ofd_osfs_age = cfs_time_shift_64(-1000); - m->ofd_osfs_unstable = 0; - m->ofd_statfs_inflight = 0; - m->ofd_osfs_inflight = 0; + spin_lock_init(&tgd->tgd_osfs_lock); + tgd->tgd_osfs_age = cfs_time_shift_64(-1000); + tgd->tgd_osfs_unstable = 0; + tgd->tgd_statfs_inflight = 0; + tgd->tgd_osfs_inflight = 0; /* grant data */ - spin_lock_init(&m->ofd_grant_lock); - m->ofd_tot_dirty = 0; - m->ofd_tot_granted = 0; - m->ofd_tot_pending = 0; + spin_lock_init(&tgd->tgd_grant_lock); + tgd->tgd_tot_dirty = 0; + tgd->tgd_tot_granted = 0; + tgd->tgd_tot_pending = 0; + m->ofd_seq_count = 0; init_waitqueue_head(&m->ofd_inconsistency_thread.t_ctl_waitq); INIT_LIST_HEAD(&m->ofd_inconsistency_list); @@ -2978,30 +2979,6 @@ static int ofd_init0(const struct lu_env *env, struct ofd_device *m, ofd_procfs_add_brw_stats_symlink(m); - /* populate cached statfs data */ - osfs = &ofd_info(env)->fti_u.osfs; - rc = ofd_statfs_internal(env, m, osfs, 0, NULL); - if (rc != 0) { - CERROR("%s: can't get statfs data, rc %d\n", obd->obd_name, rc); - GOTO(err_fini_stack, rc); - } - if (!is_power_of_2(osfs->os_bsize)) { - CERROR("%s: blocksize (%d) is not a power of 2\n", - obd->obd_name, osfs->os_bsize); - GOTO(err_fini_stack, rc = -EPROTO); - } - m->ofd_blockbits = fls(osfs->os_bsize) - 1; - - if (ONE_MB_BRW_SIZE < (1U << m->ofd_blockbits)) - m->ofd_brw_size = 1U << m->ofd_blockbits; - else - m->ofd_brw_size = ONE_MB_BRW_SIZE; - - m->ofd_cksum_types_supported = cksum_types_supported_server(); - m->ofd_precreate_batch = OFD_PRECREATE_BATCH_DEFAULT; - if (osfs->os_bsize * osfs->os_blocks < OFD_PRECREATE_SMALL_FS) - m->ofd_precreate_batch = OFD_PRECREATE_BATCH_SMALL; - snprintf(info->fti_u.name, sizeof(info->fti_u.name), "%s-%s", "filter"/*LUSTRE_OST_NAME*/, obd->obd_uuid.uuid); m->ofd_namespace = ldlm_namespace_new(obd, info->fti_u.name, @@ -3019,7 +2996,7 @@ static int ofd_init0(const struct lu_env *env, struct ofd_device *m, ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL, "filter_ldlm_cb_client", &obd->obd_ldlm_client); - dt_conf_get(env, m->ofd_osd, &m->ofd_dt_conf); + dt_conf_get(env, m->ofd_osd, &m->ofd_lut.lut_dt_conf); rc = tgt_init(env, &m->ofd_lut, obd, m->ofd_osd, ofd_common_slice, OBD_FAIL_OST_ALL_REQUEST_NET, @@ -3027,6 +3004,30 @@ static int ofd_init0(const struct lu_env *env, struct ofd_device *m, if (rc) GOTO(err_free_ns, rc); + /* populate cached statfs data */ + osfs = &ofd_info(env)->fti_u.osfs; + rc = tgt_statfs_internal(env, &m->ofd_lut, osfs, 0, NULL); + if (rc != 0) { + CERROR("%s: can't get statfs data, rc %d\n", obd->obd_name, rc); + GOTO(err_fini_lut, rc); + } + if (!is_power_of_2(osfs->os_bsize)) { + CERROR("%s: blocksize (%d) is not a power of 2\n", + obd->obd_name, osfs->os_bsize); + GOTO(err_fini_lut, rc = -EPROTO); + } + tgd->tgd_blockbits = fls(osfs->os_bsize) - 1; + + if (ONE_MB_BRW_SIZE < (1U << tgd->tgd_blockbits)) + m->ofd_brw_size = 1U << tgd->tgd_blockbits; + else + m->ofd_brw_size = ONE_MB_BRW_SIZE; + + m->ofd_cksum_types_supported = cksum_types_supported_server(); + m->ofd_precreate_batch = OFD_PRECREATE_BATCH_DEFAULT; + if (osfs->os_bsize * osfs->os_blocks < OFD_PRECREATE_SMALL_FS) + m->ofd_precreate_batch = OFD_PRECREATE_BATCH_SMALL; + rc = ofd_fs_setup(env, m, obd); if (rc) GOTO(err_fini_lut, rc); diff --git a/lustre/ofd/ofd_internal.h b/lustre/ofd/ofd_internal.h index 6f1ed7f..c838029 100644 --- a/lustre/ofd/ofd_internal.h +++ b/lustre/ofd/ofd_internal.h @@ -113,7 +113,6 @@ struct ofd_device { struct dt_device ofd_dt_dev; struct dt_device *ofd_osd; struct obd_export *ofd_osd_exp; - struct dt_device_param ofd_dt_conf; /* DLM name-space for meta-data locks maintained by this server */ struct ldlm_namespace *ofd_namespace; @@ -131,36 +130,6 @@ struct ofd_device { int ofd_precreate_batch; spinlock_t ofd_batch_lock; - /* protect all statfs-related counters */ - spinlock_t ofd_osfs_lock; - /* statfs optimization: we cache a bit */ - struct obd_statfs ofd_osfs; - __u64 ofd_osfs_age; - int ofd_blockbits; - /* counters used during statfs update, protected by ofd_osfs_lock. - * record when some statfs refresh are in progress */ - int ofd_statfs_inflight; - - /* writes between prep & commit which might be accounted twice in - * ofd_osfs.os_bavail */ - u64 ofd_osfs_unstable; - - /* track writes completed while statfs refresh is underway. - * tracking is only effective when ofd_statfs_inflight > 1 */ - u64 ofd_osfs_inflight; - - /* grants: all values in bytes */ - /* grant lock to protect all grant counters */ - spinlock_t ofd_grant_lock; - /* total amount of dirty data reported by clients in incoming obdo */ - u64 ofd_tot_dirty; - /* sum of filesystem space granted to clients for async writes */ - u64 ofd_tot_granted; - /* grant used by I/Os in progress (between prepare and commit) */ - u64 ofd_tot_pending; - /* number of clients using grants */ - int ofd_tot_granted_clients; - /* preferred BRW size, decided by storage type and capability */ __u32 ofd_brw_size; /* checksum types supported on this node */ @@ -174,9 +143,6 @@ struct ofd_device { unsigned long ofd_raid_degraded:1, /* sync journal on writes */ ofd_syncjournal:1, - /* shall we grant space to clients not - * supporting OBD_CONNECT_GRANT_PARAM? */ - ofd_grant_compat_disable:1, /* Protected by ofd_lastid_rwsem. */ ofd_lastid_rebuilding:1, ofd_record_fid_accessed:1, @@ -312,8 +278,6 @@ struct ofd_thread_info { struct lu_buf fti_buf; loff_t fti_off; - /* Space used by the I/O, used by grant code */ - unsigned long fti_used; struct ost_lvb fti_lvb; union { struct lfsck_req_local fti_lrl; @@ -332,9 +296,6 @@ int ofd_fiemap_get(const struct lu_env *env, struct ofd_device *ofd, /* ofd_obd.c */ extern struct obd_ops ofd_obd_ops; -int ofd_statfs_internal(const struct lu_env *env, struct ofd_device *ofd, - struct obd_statfs *osfs, __u64 max_age, - int *from_cache); int ofd_destroy_by_fid(const struct lu_env *env, struct ofd_device *ofd, const struct lu_fid *fid, int orphan); int ofd_statfs(const struct lu_env *env, struct obd_export *exp, @@ -431,43 +392,6 @@ struct ofd_object *ofd_object_find_exists(const struct lu_env *env, return fo; } -/* ofd_grants.c */ -static inline int ofd_grant_param_supp(struct obd_export *exp) -{ - return !!(exp_connect_flags(exp) & OBD_CONNECT_GRANT_PARAM); -} - -/* Blocksize used for client not supporting OBD_CONNECT_GRANT_PARAM. - * That's 4KB=2^12 which is the biggest block size known to work whatever - * the client's page size is. */ -#define COMPAT_BSIZE_SHIFT 12 - -static inline int ofd_grant_prohibit(struct obd_export *exp, - struct ofd_device *ofd) -{ - /* When ofd_grant_compat_disable is set, we don't grant any space to - * clients not supporting OBD_CONNECT_GRANT_PARAM. - * Otherwise, space granted to such a client is inflated since it - * consumes PAGE_SIZE of grant space per block */ - return !!(ofd_obd(ofd)->obd_self_export != exp && - !ofd_grant_param_supp(exp) && ofd->ofd_grant_compat_disable); -} - -void ofd_grant_sanity_check(struct obd_device *obd, const char *func); -void ofd_grant_connect(const struct lu_env *env, struct obd_export *exp, - struct obd_connect_data *data, bool new_conn); -void ofd_grant_discard(struct obd_export *exp); -void ofd_grant_prepare_read(const struct lu_env *env, struct obd_export *exp, - struct obdo *oa); -void ofd_grant_prepare_write(const struct lu_env *env, struct obd_export *exp, - struct obdo *oa, struct niobuf_remote *rnb, - int niocount); -void ofd_grant_commit(struct obd_export *exp, unsigned long grant_used, int rc); -int ofd_grant_commit_cb_add(struct thandle *th, struct obd_export *exp, - unsigned long grant); -long ofd_grant_create(const struct lu_env *env, struct obd_export *exp, - int *nr); - /* ofd_fmd.c */ int ofd_fmd_init(void); void ofd_fmd_exit(void); diff --git a/lustre/ofd/ofd_io.c b/lustre/ofd/ofd_io.c index dcd6c96..bbaf086 100644 --- a/lustre/ofd/ofd_io.c +++ b/lustre/ofd/ofd_io.c @@ -625,7 +625,7 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp, /* Process incoming grant info, set OBD_BRW_GRANTED flag and grant some * space back if possible */ - ofd_grant_prepare_write(env, exp, oa, rnb, obj->ioo_bufcnt); + tgt_grant_prepare_write(env, exp, oa, rnb, obj->ioo_bufcnt); /* parse remote buffers to local buffers and prepare the latter */ *nr_local = 0; @@ -658,13 +658,13 @@ err: dt_bufs_put(env, ofd_object_child(fo), lnb, *nr_local); ofd_read_unlock(env, fo); ofd_object_put(env, fo); - /* ofd_grant_prepare_write() was called, so we must commit */ - ofd_grant_commit(exp, oa->o_grant_used, rc); + /* tgt_grant_prepare_write() was called, so we must commit */ + tgt_grant_commit(exp, oa->o_grant_used, rc); out: /* let's still process incoming grant information packed in the oa, * but without enforcing grant since we won't proceed with the write. * Just like a read request actually. */ - ofd_grant_prepare_read(env, exp, oa); + tgt_grant_prepare_read(env, exp, oa); return rc; } @@ -745,7 +745,7 @@ int ofd_preprw(const struct lu_env *env, int cmd, struct obd_export *exp, rc = ofd_preprw_write(env, exp, ofd, fid, &info->fti_attr, oa, objcount, obj, rnb, nr_local, lnb, jobid); } else if (cmd == OBD_BRW_READ) { - ofd_grant_prepare_read(env, exp, oa); + tgt_grant_prepare_read(env, exp, oa); rc = ofd_preprw_read(env, exp, ofd, fid, &info->fti_attr, oa, obj->ioo_bufcnt, rnb, nr_local, lnb, jobid); @@ -1133,7 +1133,7 @@ out_stop: } if (rc == 0 && granted > 0) { - if (ofd_grant_commit_cb_add(th, exp, granted) == 0) + if (tgt_grant_commit_cb_add(th, exp, granted) == 0) granted = 0; } @@ -1160,7 +1160,7 @@ out: /* second put is pair to object_get in ofd_preprw_write */ ofd_object_put(env, fo); if (granted > 0) - ofd_grant_commit(exp, granted, old_rc); + tgt_grant_commit(exp, granted, old_rc); RETURN(rc); } diff --git a/lustre/ofd/ofd_obd.c b/lustre/ofd/ofd_obd.c index 8cf8e2a..cd9f673 100644 --- a/lustre/ofd/ofd_obd.c +++ b/lustre/ofd/ofd_obd.c @@ -202,23 +202,24 @@ static int ofd_parse_connect_data(const struct lu_env *env, } if (OCD_HAS_FLAG(data, GRANT_PARAM)) { + struct dt_device_param *ddp = &ofd->ofd_lut.lut_dt_conf; + /* client is reporting its page size, for future use */ - exp->exp_filter_data.fed_pagebits = data->ocd_grant_blkbits; - data->ocd_grant_blkbits = ofd->ofd_blockbits; + exp->exp_target_data.ted_pagebits = data->ocd_grant_blkbits; + data->ocd_grant_blkbits = ofd->ofd_lut.lut_tgd.tgd_blockbits; /* ddp_inodespace may not be power-of-two value, eg. for ldiskfs * it's LDISKFS_DIR_REC_LEN(20) = 28. */ - data->ocd_grant_inobits = - fls(ofd->ofd_dt_conf.ddp_inodespace - 1); + data->ocd_grant_inobits = fls(ddp->ddp_inodespace - 1); /* ocd_grant_tax_kb is in 1K byte blocks */ - data->ocd_grant_tax_kb = ofd->ofd_dt_conf.ddp_extent_tax >> 10; - data->ocd_grant_max_blks = ofd->ofd_dt_conf.ddp_max_extent_blks; + data->ocd_grant_tax_kb = ddp->ddp_extent_tax >> 10; + data->ocd_grant_max_blks = ddp->ddp_max_extent_blks; } if (OCD_HAS_FLAG(data, GRANT)) { - /* Save connect_data we have so far because ofd_grant_connect() + /* Save connect_data we have so far because tgt_grant_connect() * uses it to calculate grant. */ exp->exp_connect_data = *data; - ofd_grant_connect(env, exp, data, new_connection); + tgt_grant_connect(env, exp, data, new_connection); } if (data->ocd_connect_flags & OBD_CONNECT_INDEX) { @@ -270,7 +271,7 @@ static int ofd_parse_connect_data(const struct lu_env *env, } if (data->ocd_connect_flags & OBD_CONNECT_MAXBYTES) - data->ocd_maxbytes = ofd->ofd_dt_conf.ddp_maxbytes; + data->ocd_maxbytes = ofd->ofd_lut.lut_dt_conf.ddp_maxbytes; if (OCD_HAS_FLAG(data, PINGLESS)) { if (ptlrpc_pinger_suppress_pings()) { @@ -429,11 +430,11 @@ int ofd_obd_disconnect(struct obd_export *exp) class_export_get(exp); if (!(exp->exp_flags & OBD_OPT_FORCE)) - ofd_grant_sanity_check(ofd_obd(ofd), __FUNCTION__); + tgt_grant_sanity_check(ofd_obd(ofd), __func__); rc = server_disconnect_export(exp); - ofd_grant_discard(exp); + tgt_grant_discard(exp); /* Do not erase record for recoverable client. */ if (exp->exp_obd->obd_replayable && @@ -502,10 +503,10 @@ static int ofd_destroy_export(struct obd_export *exp) { struct ofd_device *ofd = ofd_exp(exp); - if (exp->exp_filter_data.fed_pending) + if (exp->exp_target_data.ted_pending) CERROR("%s: cli %s/%p has %lu pending on destroyed export" "\n", exp->exp_obd->obd_name, exp->exp_client_uuid.uuid, - exp, exp->exp_filter_data.fed_pending); + exp, exp->exp_target_data.ted_pending); target_destroy_export(exp); @@ -522,14 +523,14 @@ static int ofd_destroy_export(struct obd_export *exp) * discard grants once we're sure no more * interaction with the client is possible */ - ofd_grant_discard(exp); + tgt_grant_discard(exp); ofd_fmd_cleanup(exp); if (exp_connect_flags(exp) & OBD_CONNECT_GRANT) - ofd->ofd_tot_granted_clients--; + ofd->ofd_lut.lut_tgd.tgd_tot_granted_clients--; if (!(exp->exp_flags & OBD_OPT_FORCE)) - ofd_grant_sanity_check(exp->exp_obd, __FUNCTION__); + tgt_grant_sanity_check(exp->exp_obd, __func__); LASSERT(list_empty(&exp->exp_filter_data.fed_mod_list)); return 0; @@ -693,105 +694,6 @@ static int ofd_get_info(const struct lu_env *env, struct obd_export *exp, } /** - * Get file system statistics of OST server. - * - * Helper function for ofd_statfs(), also used by grant code. - * Implements caching for statistics to avoid calling OSD device each time. - * - * \param[in] env execution environment - * \param[in] ofd OFD device - * \param[out] osfs statistic data to return - * \param[in] max_age maximum age for cached data - * \param[in] from_cache show that data was get from cache or not - * - * \retval 0 if successful - * \retval negative value on error - */ -int ofd_statfs_internal(const struct lu_env *env, struct ofd_device *ofd, - struct obd_statfs *osfs, __u64 max_age, int *from_cache) -{ - int rc = 0; - ENTRY; - - spin_lock(&ofd->ofd_osfs_lock); - if (cfs_time_before_64(ofd->ofd_osfs_age, max_age) || max_age == 0) { - u64 unstable; - - /* statfs data are too old, get up-to-date one. - * we must be cautious here since multiple threads might be - * willing to update statfs data concurrently and we must - * grant that cached statfs data are always consistent */ - - if (ofd->ofd_statfs_inflight == 0) - /* clear inflight counter if no users, although it would - * take a while to overflow this 64-bit counter ... */ - ofd->ofd_osfs_inflight = 0; - /* notify ofd_grant_commit() that we want to track writes - * completed as of now */ - ofd->ofd_statfs_inflight++; - /* record value of inflight counter before running statfs to - * compute the diff once statfs is completed */ - unstable = ofd->ofd_osfs_inflight; - spin_unlock(&ofd->ofd_osfs_lock); - - /* statfs can sleep ... hopefully not for too long since we can - * call it fairly often as space fills up */ - rc = dt_statfs(env, ofd->ofd_osd, osfs); - if (unlikely(rc)) - GOTO(out, rc); - - spin_lock(&ofd->ofd_grant_lock); - spin_lock(&ofd->ofd_osfs_lock); - /* calculate how much space was written while we released the - * ofd_osfs_lock */ - unstable = ofd->ofd_osfs_inflight - unstable; - ofd->ofd_osfs_unstable = 0; - if (unstable) { - /* some writes committed while we were running statfs - * w/o the ofd_osfs_lock. Those ones got added to - * the cached statfs data that we are about to crunch. - * Take them into account in the new statfs data */ - osfs->os_bavail -= min_t(u64, osfs->os_bavail, - unstable >> ofd->ofd_blockbits); - /* However, we don't really know if those writes got - * accounted in the statfs call, so tell - * ofd_grant_space_left() there is some uncertainty - * on the accounting of those writes. - * The purpose is to prevent spurious error messages in - * ofd_grant_space_left() since those writes might be - * accounted twice. */ - ofd->ofd_osfs_unstable += unstable; - } - /* similarly, there is some uncertainty on write requests - * between prepare & commit */ - ofd->ofd_osfs_unstable += ofd->ofd_tot_pending; - spin_unlock(&ofd->ofd_grant_lock); - - /* finally udpate cached statfs data */ - ofd->ofd_osfs = *osfs; - ofd->ofd_osfs_age = cfs_time_current_64(); - - ofd->ofd_statfs_inflight--; /* stop tracking */ - if (ofd->ofd_statfs_inflight == 0) - ofd->ofd_osfs_inflight = 0; - spin_unlock(&ofd->ofd_osfs_lock); - - if (from_cache) - *from_cache = 0; - } else { - /* use cached statfs data */ - *osfs = ofd->ofd_osfs; - spin_unlock(&ofd->ofd_osfs_lock); - if (from_cache) - *from_cache = 1; - } - GOTO(out, rc); - -out: - return rc; -} - -/** * Implementation of obd_ops::o_statfs. * * This function returns information about a storage file system. @@ -817,11 +719,12 @@ int ofd_statfs(const struct lu_env *env, struct obd_export *exp, { struct obd_device *obd = class_exp2obd(exp); struct ofd_device *ofd = ofd_exp(exp); + struct tg_grants_data *tgd = &ofd->ofd_lut.lut_tgd; int rc; ENTRY; - rc = ofd_statfs_internal(env, ofd, osfs, max_age, NULL); + rc = tgt_statfs_internal(env, &ofd->ofd_lut, osfs, max_age, NULL); if (unlikely(rc)) GOTO(out, rc); @@ -831,25 +734,26 @@ int ofd_statfs(const struct lu_env *env, struct obd_export *exp, CDEBUG(D_SUPER | D_CACHE, "blocks cached %llu granted %llu" " pending %llu free %llu avail %llu\n", - ofd->ofd_tot_dirty, ofd->ofd_tot_granted, ofd->ofd_tot_pending, - osfs->os_bfree << ofd->ofd_blockbits, - osfs->os_bavail << ofd->ofd_blockbits); + tgd->tgd_tot_dirty, tgd->tgd_tot_granted, + tgd->tgd_tot_pending, + osfs->os_bfree << tgd->tgd_blockbits, + osfs->os_bavail << tgd->tgd_blockbits); osfs->os_bavail -= min_t(u64, osfs->os_bavail, - ((ofd->ofd_tot_dirty + ofd->ofd_tot_pending + - osfs->os_bsize - 1) >> ofd->ofd_blockbits)); + ((tgd->tgd_tot_dirty + tgd->tgd_tot_pending + + osfs->os_bsize - 1) >> tgd->tgd_blockbits)); /* The QoS code on the MDS does not care about space reserved for * precreate, so take it out. */ if (exp_connect_flags(exp) & OBD_CONNECT_MDS) { - struct filter_export_data *fed; + struct tg_export_data *ted; - fed = &obd->obd_self_export->exp_filter_data; + ted = &obd->obd_self_export->exp_target_data; osfs->os_bavail -= min_t(u64, osfs->os_bavail, - fed->fed_grant >> ofd->ofd_blockbits); + ted->ted_grant >> tgd->tgd_blockbits); } - ofd_grant_sanity_check(obd, __FUNCTION__); + tgt_grant_sanity_check(obd, __func__); CDEBUG(D_CACHE, "%llu blocks: %llu free, %llu avail; " "%llu objects: %llu free; state %x\n", osfs->os_blocks, osfs->os_bfree, osfs->os_bavail, @@ -866,16 +770,16 @@ int ofd_statfs(const struct lu_env *env, struct obd_export *exp, if (ofd->ofd_raid_degraded) osfs->os_state |= OS_STATE_DEGRADED; - if (obd->obd_self_export != exp && !ofd_grant_param_supp(exp) && - ofd->ofd_blockbits > COMPAT_BSIZE_SHIFT) { + if (obd->obd_self_export != exp && !exp_grant_param_supp(exp) && + tgd->tgd_blockbits > COMPAT_BSIZE_SHIFT) { /* clients which don't support OBD_CONNECT_GRANT_PARAM * should not see a block size > page size, otherwise * cl_lost_grant goes mad. Therefore, we emulate a 4KB (=2^12) * block size which is the biggest block size known to work * with all client's page size. */ - osfs->os_blocks <<= ofd->ofd_blockbits - COMPAT_BSIZE_SHIFT; - osfs->os_bfree <<= ofd->ofd_blockbits - COMPAT_BSIZE_SHIFT; - osfs->os_bavail <<= ofd->ofd_blockbits - COMPAT_BSIZE_SHIFT; + osfs->os_blocks <<= tgd->tgd_blockbits - COMPAT_BSIZE_SHIFT; + osfs->os_bfree <<= tgd->tgd_blockbits - COMPAT_BSIZE_SHIFT; + osfs->os_bavail <<= tgd->tgd_blockbits - COMPAT_BSIZE_SHIFT; osfs->os_bsize = 1 << COMPAT_BSIZE_SHIFT; } @@ -1140,7 +1044,7 @@ static int ofd_echo_create(const struct lu_env *env, struct obd_export *exp, } mutex_lock(&oseq->os_create_lock); - granted = ofd_grant_create(env, ofd_obd(ofd)->obd_self_export, &diff); + granted = tgt_grant_create(env, ofd_obd(ofd)->obd_self_export, &diff); if (granted < 0) { rc = granted; granted = 0; @@ -1163,7 +1067,7 @@ static int ofd_echo_create(const struct lu_env *env, struct obd_export *exp, rc = 0; } - ofd_grant_commit(ofd_obd(ofd)->obd_self_export, granted, rc); + tgt_grant_commit(ofd_obd(ofd)->obd_self_export, granted, rc); out: mutex_unlock(&oseq->os_create_lock); ofd_seq_put(env, oseq); diff --git a/lustre/ptlrpc/Makefile.in b/lustre/ptlrpc/Makefile.in index 2426dfa..f1966fa 100644 --- a/lustre/ptlrpc/Makefile.in +++ b/lustre/ptlrpc/Makefile.in @@ -13,8 +13,8 @@ ldlm_objs += $(LDLM)ldlm_reclaim.o target_objs := $(TARGET)tgt_main.o $(TARGET)tgt_lastrcvd.o target_objs += $(TARGET)tgt_handler.o $(TARGET)out_handler.o target_objs += $(TARGET)out_lib.o $(TARGET)update_trans.o -target_objs += $(TARGET)update_records.o -target_objs += $(TARGET)update_recovery.o +target_objs += $(TARGET)update_records.o $(TARGET)update_recovery.o +target_objs += $(TARGET)tgt_grant.o ptlrpc_objs := client.o recover.o connection.o niobuf.o pack_generic.o ptlrpc_objs += events.o ptlrpc_module.o service.o pinger.o diff --git a/lustre/target/Makefile.am b/lustre/target/Makefile.am index c090bdd..af1f153 100644 --- a/lustre/target/Makefile.am +++ b/lustre/target/Makefile.am @@ -32,7 +32,7 @@ MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ EXTRA_DIST = tgt_main.c tgt_lastrcvd.c tgt_handler.c tgt_internal.h \ - out_handler.c out_lib.c barrier.c + tgt_grant.c out_handler.c out_lib.c barrier.c EXTRA_DIST += update_trans.c EXTRA_DIST += update_records.c EXTRA_DIST += update_recovery.c diff --git a/lustre/ofd/ofd_grant.c b/lustre/target/tgt_grant.c similarity index 60% rename from lustre/ofd/ofd_grant.c rename to lustre/target/tgt_grant.c index 0f073fb..8a513ed 100644 --- a/lustre/ofd/ofd_grant.c +++ b/lustre/target/tgt_grant.c @@ -26,14 +26,15 @@ * Copyright (c) 2012, 2016, Intel Corporation. */ /* - * lustre/ofd/ofd_grant.c + * lustre/target/tgt_grant.c * - * This file provides code related to grant space management on Object Storage - * Targets (OSTs). Grant is a mechanism used by client nodes to reserve disk - * space on OSTs for the data writeback cache. The Lustre client is thus assured - * that enough space will be available when flushing dirty pages asynchronously. - * Each client node is granted an initial amount of reserved space at connect - * time and gets additional space back from OST in bulk write reply. + * This file provides code related to grant space management on Lustre Targets + * (OSTs and MDTs). Grant is a mechanism used by client nodes to reserve disk + * space on a target for the data writeback cache. The Lustre client is thus + * assured that enough space will be available when flushing dirty pages + * asynchronously. Each client node is granted an initial amount of reserved + * space at connect time and gets additional space back from target in bulk + * write reply. * * We actually support three different cases: * - The client supports the new grant parameters (i.e. OBD_CONNECT_GRANT_PARAM) @@ -72,30 +73,33 @@ #define DEBUG_SUBSYSTEM S_FILTER -#include "ofd_internal.h" +#include +#include + +#include "tgt_internal.h" /* Clients typically hold 2x their max_rpcs_in_flight of grant space */ -#define OFD_GRANT_SHRINK_LIMIT(exp) (2ULL * 8 * exp_max_brw_size(exp)) +#define TGT_GRANT_SHRINK_LIMIT(exp) (2ULL * 8 * exp_max_brw_size(exp)) /* Helpers to inflate/deflate grants for clients that do not support the grant * parameters */ -static inline u64 ofd_grant_inflate(struct ofd_device *ofd, u64 val) +static inline u64 tgt_grant_inflate(struct tg_grants_data *tgd, u64 val) { - if (ofd->ofd_blockbits > COMPAT_BSIZE_SHIFT) + if (tgd->tgd_blockbits > COMPAT_BSIZE_SHIFT) /* Client does not support such large block size, grant * is thus inflated. We already significantly overestimate * overhead, no need to add the extent tax in this case */ - return val << (ofd->ofd_blockbits - COMPAT_BSIZE_SHIFT); + return val << (tgd->tgd_blockbits - COMPAT_BSIZE_SHIFT); /* client can deal with the block size, but does not support per-extent * grant accounting, inflate grant by 100% for such clients */ return val << 1; } -/* Companion of ofd_grant_inflate() */ -static inline u64 ofd_grant_deflate(struct ofd_device *ofd, u64 val) +/* Companion of tgt_grant_inflate() */ +static inline u64 tgt_grant_deflate(struct tg_grants_data *tgd, u64 val) { - if (ofd->ofd_blockbits > COMPAT_BSIZE_SHIFT) - return val >> (ofd->ofd_blockbits - COMPAT_BSIZE_SHIFT); + if (tgd->tgd_blockbits > COMPAT_BSIZE_SHIFT) + return val >> (tgd->tgd_blockbits - COMPAT_BSIZE_SHIFT); return val >> 1; } @@ -103,32 +107,69 @@ static inline u64 ofd_grant_deflate(struct ofd_device *ofd, u64 val) * if the client does not support the grant paramaters. * Check connection flag against \a data if not NULL. This is used during * connection creation where exp->exp_connect_data isn't populated yet */ -static inline u64 ofd_grant_chunk(struct obd_export *exp, - struct ofd_device *ofd, +static inline u64 tgt_grant_chunk(struct obd_export *exp, + struct lu_target *lut, struct obd_connect_data *data) { + struct tg_grants_data *tgd = &lut->lut_tgd; u64 chunk = exp_max_brw_size(exp); u64 tax; - if (ofd_obd(ofd)->obd_self_export == exp) + if (exp->exp_obd->obd_self_export == exp) /* Grant enough space to handle a big precreate request */ - return OST_MAX_PRECREATE * ofd->ofd_dt_conf.ddp_inodespace / 2; + return OST_MAX_PRECREATE * lut->lut_dt_conf.ddp_inodespace / 2; - if ((data == NULL && !ofd_grant_param_supp(exp)) || + if ((data == NULL && !(exp_grant_param_supp(exp))) || (data != NULL && !OCD_HAS_FLAG(data, GRANT_PARAM))) /* Try to grant enough space to send a full-size RPC */ - return ofd_grant_inflate(ofd, chunk); + return tgt_grant_inflate(tgd, chunk); /* Try to return enough to send two full-size RPCs * = 2 * (BRW_size + #extents_in_BRW * grant_tax) */ - tax = 1ULL << ofd->ofd_blockbits; /* block size */ - tax *= ofd->ofd_dt_conf.ddp_max_extent_blks; /* max extent size */ + tax = 1ULL << tgd->tgd_blockbits; /* block size */ + tax *= lut->lut_dt_conf.ddp_max_extent_blks; /* max extent size */ tax = (chunk + tax - 1) / tax; /* #extents in a RPC */ - tax *= ofd->ofd_dt_conf.ddp_extent_tax; /* extent tax for a RPC */ + tax *= lut->lut_dt_conf.ddp_extent_tax; /* extent tax for a RPC */ chunk = (chunk + tax) * 2; /* we said two full RPCs */ return chunk; } +static int tgt_check_export_grants(struct obd_export *exp, u64 *dirty, + u64 *pending, u64 *granted, u64 maxsize) +{ + struct tg_export_data *ted = &exp->exp_target_data; + int level = D_CACHE; + + if (exp->exp_obd->obd_self_export == exp) + CDEBUG(D_CACHE, "%s: processing self export: %ld %ld " + "%ld\n", exp->exp_obd->obd_name, ted->ted_grant, + ted->ted_pending, ted->ted_dirty); + + if (ted->ted_grant < 0 || ted->ted_pending < 0 || ted->ted_dirty < 0) + level = D_ERROR; + CDEBUG_LIMIT(level, "%s: cli %s/%p dirty %ld pend %ld grant %ld\n", + exp->exp_obd->obd_name, exp->exp_client_uuid.uuid, exp, + ted->ted_dirty, ted->ted_pending, ted->ted_grant); + + if (ted->ted_grant + ted->ted_pending > maxsize) { + CERROR("%s: cli %s/%p ted_grant(%ld) + ted_pending(%ld)" + " > maxsize(%llu)\n", exp->exp_obd->obd_name, + exp->exp_client_uuid.uuid, exp, ted->ted_grant, + ted->ted_pending, maxsize); + return -EFAULT; + } + if (ted->ted_dirty > maxsize) { + CERROR("%s: cli %s/%p ted_dirty(%ld) > maxsize(%llu)\n", + exp->exp_obd->obd_name, exp->exp_client_uuid.uuid, + exp, ted->ted_dirty, maxsize); + return -EFAULT; + } + *granted += ted->ted_grant + ted->ted_pending; + *pending += ted->ted_pending; + *dirty += ted->ted_dirty; + return 0; +} + /** * Perform extra sanity checks for grant accounting. * @@ -144,9 +185,10 @@ static inline u64 ofd_grant_chunk(struct obd_export *exp, * verified * \param[in] func caller's function name */ -void ofd_grant_sanity_check(struct obd_device *obd, const char *func) +void tgt_grant_sanity_check(struct obd_device *obd, const char *func) { - struct ofd_device *ofd = ofd_dev(obd->obd_lu_dev); + struct lu_target *lut = obd->u.obt.obt_lut; + struct tg_grants_data *tgd = &lut->lut_tgd; struct obd_export *exp; u64 maxsize; u64 tot_dirty = 0; @@ -155,6 +197,7 @@ void ofd_grant_sanity_check(struct obd_device *obd, const char *func) u64 fo_tot_granted; u64 fo_tot_pending; u64 fo_tot_dirty; + int error; if (list_empty(&obd->obd_exports)) return; @@ -164,93 +207,38 @@ void ofd_grant_sanity_check(struct obd_device *obd, const char *func) if (obd->obd_num_exports > 100) return; - maxsize = ofd->ofd_osfs.os_blocks << ofd->ofd_blockbits; + maxsize = tgd->tgd_osfs.os_blocks << tgd->tgd_blockbits; spin_lock(&obd->obd_dev_lock); - spin_lock(&ofd->ofd_grant_lock); + spin_lock(&tgd->tgd_grant_lock); list_for_each_entry(exp, &obd->obd_exports, exp_obd_chain) { - struct filter_export_data *fed; - int error = 0; - - fed = &exp->exp_filter_data; - - if (obd->obd_self_export == exp) - CDEBUG(D_CACHE, "%s: processing self export: %ld %ld " - "%ld\n", obd->obd_name, fed->fed_grant, - fed->fed_pending, fed->fed_dirty); - - if (fed->fed_grant < 0 || fed->fed_pending < 0 || - fed->fed_dirty < 0) - error = 1; - if (fed->fed_grant + fed->fed_pending > maxsize) { - CERROR("%s: cli %s/%p fed_grant(%ld) + fed_pending(%ld)" - " > maxsize(%llu)\n", obd->obd_name, - exp->exp_client_uuid.uuid, exp, fed->fed_grant, - fed->fed_pending, maxsize); + error = tgt_check_export_grants(exp, &tot_dirty, &tot_pending, + &tot_granted, maxsize); + if (error < 0) { spin_unlock(&obd->obd_dev_lock); - spin_unlock(&ofd->ofd_grant_lock); + spin_unlock(&tgd->tgd_grant_lock); LBUG(); } - if (fed->fed_dirty > maxsize) { - CERROR("%s: cli %s/%p fed_dirty(%ld) > maxsize(%llu" - ")\n", obd->obd_name, exp->exp_client_uuid.uuid, - exp, fed->fed_dirty, maxsize); - spin_unlock(&obd->obd_dev_lock); - spin_unlock(&ofd->ofd_grant_lock); - LBUG(); - } - CDEBUG_LIMIT(error ? D_ERROR : D_CACHE, "%s: cli %s/%p dirty " - "%ld pend %ld grant %ld\n", obd->obd_name, - exp->exp_client_uuid.uuid, exp, fed->fed_dirty, - fed->fed_pending, fed->fed_grant); - tot_granted += fed->fed_grant + fed->fed_pending; - tot_pending += fed->fed_pending; - tot_dirty += fed->fed_dirty; } /* exports about to be unlinked should also be taken into account since * they might still hold pending grant space to be released at * commit time */ list_for_each_entry(exp, &obd->obd_unlinked_exports, exp_obd_chain) { - struct filter_export_data *fed; - int error = 0; - - fed = &exp->exp_filter_data; - - if (fed->fed_grant < 0 || fed->fed_pending < 0 || - fed->fed_dirty < 0) - error = 1; - if (fed->fed_grant + fed->fed_pending > maxsize) { - CERROR("%s: cli %s/%p fed_grant(%ld) + fed_pending(%ld)" - " > maxsize(%llu)\n", obd->obd_name, - exp->exp_client_uuid.uuid, exp, fed->fed_grant, - fed->fed_pending, maxsize); - spin_unlock(&obd->obd_dev_lock); - spin_unlock(&ofd->ofd_grant_lock); - LBUG(); - } - if (fed->fed_dirty > maxsize) { - CERROR("%s: cli %s/%p fed_dirty(%ld) > maxsize(%llu" - ")\n", obd->obd_name, exp->exp_client_uuid.uuid, - exp, fed->fed_dirty, maxsize); + error = tgt_check_export_grants(exp, &tot_dirty, &tot_pending, + &tot_granted, maxsize); + if (error < 0) { spin_unlock(&obd->obd_dev_lock); - spin_unlock(&ofd->ofd_grant_lock); + spin_unlock(&tgd->tgd_grant_lock); LBUG(); } - CDEBUG_LIMIT(error ? D_ERROR : D_CACHE, "%s: cli %s/%p dirty " - "%ld pend %ld grant %ld\n", obd->obd_name, - exp->exp_client_uuid.uuid, exp, fed->fed_dirty, - fed->fed_pending, fed->fed_grant); - tot_granted += fed->fed_grant + fed->fed_pending; - tot_pending += fed->fed_pending; - tot_dirty += fed->fed_dirty; } - fo_tot_granted = ofd->ofd_tot_granted; - fo_tot_pending = ofd->ofd_tot_pending; - fo_tot_dirty = ofd->ofd_tot_dirty; + fo_tot_granted = tgd->tgd_tot_granted; + fo_tot_pending = tgd->tgd_tot_pending; + fo_tot_dirty = tgd->tgd_tot_dirty; spin_unlock(&obd->obd_dev_lock); - spin_unlock(&ofd->ofd_grant_lock); + spin_unlock(&tgd->tgd_grant_lock); if (tot_granted != fo_tot_granted) CERROR("%s: tot_granted %llu != fo_tot_granted %llu\n", @@ -271,11 +259,113 @@ void ofd_grant_sanity_check(struct obd_device *obd, const char *func) CERROR("%s: tot_dirty %llu > maxsize %llu\n", func, tot_dirty, maxsize); } +EXPORT_SYMBOL(tgt_grant_sanity_check); + +/** + * Get file system statistics of target. + * + * Helper function for statfs(), also used by grant code. + * Implements caching for statistics to avoid calling OSD device each time. + * + * \param[in] env execution environment + * \param[in] lut LU target + * \param[out] osfs statistic data to return + * \param[in] max_age maximum age for cached data + * \param[in] from_cache show that data was get from cache or not + * + * \retval 0 if successful + * \retval negative value on error + */ +int tgt_statfs_internal(const struct lu_env *env, struct lu_target *lut, + struct obd_statfs *osfs, __u64 max_age, int *from_cache) +{ + struct tg_grants_data *tgd = &lut->lut_tgd; + int rc = 0; + ENTRY; + + spin_lock(&tgd->tgd_osfs_lock); + if (cfs_time_before_64(tgd->tgd_osfs_age, max_age) || max_age == 0) { + u64 unstable; + + /* statfs data are too old, get up-to-date one. + * we must be cautious here since multiple threads might be + * willing to update statfs data concurrently and we must + * grant that cached statfs data are always consistent */ + + if (tgd->tgd_statfs_inflight == 0) + /* clear inflight counter if no users, although it would + * take a while to overflow this 64-bit counter ... */ + tgd->tgd_osfs_inflight = 0; + /* notify tgt_grant_commit() that we want to track writes + * completed as of now */ + tgd->tgd_statfs_inflight++; + /* record value of inflight counter before running statfs to + * compute the diff once statfs is completed */ + unstable = tgd->tgd_osfs_inflight; + spin_unlock(&tgd->tgd_osfs_lock); + + /* statfs can sleep ... hopefully not for too long since we can + * call it fairly often as space fills up */ + rc = dt_statfs(env, lut->lut_bottom, osfs); + if (unlikely(rc)) + GOTO(out, rc); + + spin_lock(&tgd->tgd_grant_lock); + spin_lock(&tgd->tgd_osfs_lock); + /* calculate how much space was written while we released the + * tgd_osfs_lock */ + unstable = tgd->tgd_osfs_inflight - unstable; + tgd->tgd_osfs_unstable = 0; + if (unstable) { + /* some writes committed while we were running statfs + * w/o the tgd_osfs_lock. Those ones got added to + * the cached statfs data that we are about to crunch. + * Take them into account in the new statfs data */ + osfs->os_bavail -= min_t(u64, osfs->os_bavail, + unstable >> tgd->tgd_blockbits); + /* However, we don't really know if those writes got + * accounted in the statfs call, so tell + * tgt_grant_space_left() there is some uncertainty + * on the accounting of those writes. + * The purpose is to prevent spurious error messages in + * tgt_grant_space_left() since those writes might be + * accounted twice. */ + tgd->tgd_osfs_unstable += unstable; + } + /* similarly, there is some uncertainty on write requests + * between prepare & commit */ + tgd->tgd_osfs_unstable += tgd->tgd_tot_pending; + spin_unlock(&tgd->tgd_grant_lock); + + /* finally udpate cached statfs data */ + tgd->tgd_osfs = *osfs; + tgd->tgd_osfs_age = cfs_time_current_64(); + + tgd->tgd_statfs_inflight--; /* stop tracking */ + if (tgd->tgd_statfs_inflight == 0) + tgd->tgd_osfs_inflight = 0; + spin_unlock(&tgd->tgd_osfs_lock); + + if (from_cache) + *from_cache = 0; + } else { + /* use cached statfs data */ + *osfs = tgd->tgd_osfs; + spin_unlock(&tgd->tgd_osfs_lock); + if (from_cache) + *from_cache = 1; + } + GOTO(out, rc); + +out: + return rc; +} +EXPORT_SYMBOL(tgt_statfs_internal); /** * Update cached statfs information from the OSD layer * - * Refresh statfs information cached in ofd::ofd_osfs if the cache is older + * Refresh statfs information cached in tgd::tgd_osfs if the cache is older * than 1s or if force is set. The OSD layer is in charge of estimating data & * metadata overhead. * This function can sleep so it should not be called with any spinlock held. @@ -287,12 +377,14 @@ void ofd_grant_sanity_check(struct obd_device *obd, const char *func) * \param[out] from_cache returns whether the statfs information are * taken from cache */ -static void ofd_grant_statfs(const struct lu_env *env, struct obd_export *exp, +static void tgt_grant_statfs(const struct lu_env *env, struct obd_export *exp, int force, int *from_cache) { struct obd_device *obd = exp->exp_obd; - struct ofd_device *ofd = ofd_exp(exp); - struct obd_statfs *osfs = &ofd_info(env)->fti_u.osfs; + struct lu_target *lut = obd->u.obt.obt_lut; + struct tg_grants_data *tgd = &lut->lut_tgd; + struct tgt_thread_info *tti; + struct obd_statfs *osfs; __u64 max_age; int rc; @@ -301,7 +393,9 @@ static void ofd_grant_statfs(const struct lu_env *env, struct obd_export *exp, else max_age = cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS); - rc = ofd_statfs_internal(env, ofd, osfs, max_age, from_cache); + tti = tgt_th_info(env); + osfs = &tti->tti_u.osfs; + rc = tgt_statfs_internal(env, lut, osfs, max_age, from_cache); if (unlikely(rc)) { if (from_cache) *from_cache = 0; @@ -310,8 +404,8 @@ static void ofd_grant_statfs(const struct lu_env *env, struct obd_export *exp, CDEBUG(D_CACHE, "%s: cli %s/%p free: %llu avail: %llu\n", obd->obd_name, exp->exp_client_uuid.uuid, exp, - osfs->os_bfree << ofd->ofd_blockbits, - osfs->os_bavail << ofd->ofd_blockbits); + osfs->os_bfree << tgd->tgd_blockbits, + osfs->os_bavail << tgd->tgd_blockbits); } /** @@ -319,37 +413,38 @@ static void ofd_grant_statfs(const struct lu_env *env, struct obd_export *exp, * removing grant space already booked by clients. * * This is done by accessing cached statfs data previously populated by - * ofd_grant_statfs(), from which we withdraw the space already granted to + * tgt_grant_statfs(), from which we withdraw the space already granted to * clients and the reserved space. - * Caller must hold ofd_grant_lock spinlock. + * Caller must hold tgd_grant_lock spinlock. * * \param[in] exp export associated with the device for which the amount * of available space is requested * \retval amount of non-allocated space, in bytes */ -static u64 ofd_grant_space_left(struct obd_export *exp) +static u64 tgt_grant_space_left(struct obd_export *exp) { - struct obd_device *obd = exp->exp_obd; - struct ofd_device *ofd = ofd_exp(exp); - u64 tot_granted; - u64 left; - u64 avail; - u64 unstable; + struct obd_device *obd = exp->exp_obd; + struct lu_target *lut = obd->u.obt.obt_lut; + struct tg_grants_data *tgd = &lut->lut_tgd; + u64 tot_granted; + u64 left; + u64 avail; + u64 unstable; ENTRY; - assert_spin_locked(&ofd->ofd_grant_lock); + assert_spin_locked(&tgd->tgd_grant_lock); - spin_lock(&ofd->ofd_osfs_lock); + spin_lock(&tgd->tgd_osfs_lock); /* get available space from cached statfs data */ - left = ofd->ofd_osfs.os_bavail << ofd->ofd_blockbits; - unstable = ofd->ofd_osfs_unstable; /* those might be accounted twice */ - spin_unlock(&ofd->ofd_osfs_lock); + left = tgd->tgd_osfs.os_bavail << tgd->tgd_blockbits; + unstable = tgd->tgd_osfs_unstable; /* those might be accounted twice */ + spin_unlock(&tgd->tgd_osfs_lock); - tot_granted = ofd->ofd_tot_granted; + tot_granted = tgd->tgd_tot_granted; if (left < tot_granted) { int mask = (left + unstable < - tot_granted - ofd->ofd_tot_pending) ? + tot_granted - tgd->tgd_tot_pending) ? D_ERROR : D_CACHE; CDEBUG_LIMIT(mask, "%s: cli %s/%p left %llu < tot_grant " @@ -357,7 +452,8 @@ static u64 ofd_grant_space_left(struct obd_export *exp) "dirty %llu\n", obd->obd_name, exp->exp_client_uuid.uuid, exp, left, tot_granted, unstable, - ofd->ofd_tot_pending, ofd->ofd_tot_dirty); + tgd->tgd_tot_pending, + tgd->tgd_tot_dirty); RETURN(0); } @@ -366,12 +462,12 @@ static u64 ofd_grant_space_left(struct obd_export *exp) left -= tot_granted; /* Align left on block size */ - left &= ~((1ULL << ofd->ofd_blockbits) - 1); + left &= ~((1ULL << tgd->tgd_blockbits) - 1); CDEBUG(D_CACHE, "%s: cli %s/%p avail %llu left %llu unstable " "%llu tot_grant %llu pending %llu\n", obd->obd_name, exp->exp_client_uuid.uuid, exp, avail, left, unstable, - tot_granted, ofd->ofd_tot_pending); + tot_granted, tgd->tgd_tot_pending); RETURN(left); } @@ -384,23 +480,23 @@ static u64 ofd_grant_space_left(struct obd_export *exp) * inflate all grant counters passed in the request if the client does not * support the grant parameters. * We will later calculate the client's new grant and return it. - * Caller must hold ofd_grant_lock spinlock. + * Caller must hold tgd_grant_lock spinlock. * * \param[in] env LU environment supplying osfs storage * \param[in] exp export for which we received the request * \param[in,out] oa incoming obdo sent by the client */ -static void ofd_grant_incoming(const struct lu_env *env, struct obd_export *exp, +static void tgt_grant_incoming(const struct lu_env *env, struct obd_export *exp, struct obdo *oa, long chunk) { - struct filter_export_data *fed; - struct ofd_device *ofd = ofd_exp(exp); - struct obd_device *obd = exp->exp_obd; - long dirty; - long dropped; + struct tg_export_data *ted = &exp->exp_target_data; + struct obd_device *obd = exp->exp_obd; + struct tg_grants_data *tgd = &obd->u.obt.obt_lut->lut_tgd; + long dirty; + long dropped; ENTRY; - assert_spin_locked(&ofd->ofd_grant_lock); + assert_spin_locked(&tgd->tgd_grant_lock); if ((oa->o_valid & (OBD_MD_FLBLOCKS|OBD_MD_FLGRANT)) != (OBD_MD_FLBLOCKS|OBD_MD_FLGRANT)) { @@ -408,59 +504,57 @@ static void ofd_grant_incoming(const struct lu_env *env, struct obd_export *exp, RETURN_EXIT; } - fed = &exp->exp_filter_data; - /* Add some margin, since there is a small race if other RPCs arrive * out-or-order and have already consumed some grant. We want to * leave this here in case there is a large error in accounting. */ CDEBUG(D_CACHE, "%s: cli %s/%p reports grant %llu dropped %u, local %lu\n", obd->obd_name, exp->exp_client_uuid.uuid, exp, oa->o_grant, - oa->o_dropped, fed->fed_grant); + oa->o_dropped, ted->ted_grant); if ((long long)oa->o_dirty < 0) oa->o_dirty = 0; /* inflate grant counters if required */ - if (!ofd_grant_param_supp(exp)) { - oa->o_grant = ofd_grant_inflate(ofd, oa->o_grant); - oa->o_dirty = ofd_grant_inflate(ofd, oa->o_dirty); - oa->o_dropped = ofd_grant_inflate(ofd, (u64)oa->o_dropped); - oa->o_undirty = ofd_grant_inflate(ofd, oa->o_undirty); + if (!exp_grant_param_supp(exp)) { + oa->o_grant = tgt_grant_inflate(tgd, oa->o_grant); + oa->o_dirty = tgt_grant_inflate(tgd, oa->o_dirty); + oa->o_dropped = tgt_grant_inflate(tgd, (u64)oa->o_dropped); + oa->o_undirty = tgt_grant_inflate(tgd, oa->o_undirty); } dirty = oa->o_dirty; dropped = oa->o_dropped; /* Update our accounting now so that statfs takes it into account. - * Note that fed_dirty is only approximate and can become incorrect + * Note that ted_dirty is only approximate and can become incorrect * if RPCs arrive out-of-order. No important calculations depend - * on fed_dirty however, but we must check sanity to not assert. */ - if (dirty > fed->fed_grant + 4 * chunk) - dirty = fed->fed_grant + 4 * chunk; - ofd->ofd_tot_dirty += dirty - fed->fed_dirty; - if (fed->fed_grant < dropped) { + * on ted_dirty however, but we must check sanity to not assert. */ + if (dirty > ted->ted_grant + 4 * chunk) + dirty = ted->ted_grant + 4 * chunk; + tgd->tgd_tot_dirty += dirty - ted->ted_dirty; + if (ted->ted_grant < dropped) { CDEBUG(D_CACHE, "%s: cli %s/%p reports %lu dropped > grant %lu\n", obd->obd_name, exp->exp_client_uuid.uuid, exp, dropped, - fed->fed_grant); + ted->ted_grant); dropped = 0; } - if (ofd->ofd_tot_granted < dropped) { + if (tgd->tgd_tot_granted < dropped) { CERROR("%s: cli %s/%p reports %lu dropped > tot_grant %llu\n", obd->obd_name, exp->exp_client_uuid.uuid, exp, - dropped, ofd->ofd_tot_granted); + dropped, tgd->tgd_tot_granted); dropped = 0; } - ofd->ofd_tot_granted -= dropped; - fed->fed_grant -= dropped; - fed->fed_dirty = dirty; + tgd->tgd_tot_granted -= dropped; + ted->ted_grant -= dropped; + ted->ted_dirty = dirty; - if (fed->fed_dirty < 0 || fed->fed_grant < 0 || fed->fed_pending < 0) { + if (ted->ted_dirty < 0 || ted->ted_grant < 0 || ted->ted_pending < 0) { CERROR("%s: cli %s/%p dirty %ld pend %ld grant %ld\n", obd->obd_name, exp->exp_client_uuid.uuid, exp, - fed->fed_dirty, fed->fed_pending, fed->fed_grant); - spin_unlock(&ofd->ofd_grant_lock); + ted->ted_dirty, ted->ted_pending, ted->ted_grant); + spin_unlock(&tgd->tgd_grant_lock); LBUG(); } EXIT; @@ -473,36 +567,35 @@ static void ofd_grant_incoming(const struct lu_env *env, struct obd_export *exp, * shrinking). This function proceeds with the shrink request when there is * less ungranted space remaining than the amount all of the connected clients * would consume if they used their full grant. - * Caller must hold ofd_grant_lock spinlock. + * Caller must hold tgd_grant_lock spinlock. * * \param[in] exp export releasing grant space * \param[in,out] oa incoming obdo sent by the client * \param[in] left_space remaining free space with space already granted * taken out */ -static void ofd_grant_shrink(struct obd_export *exp, struct obdo *oa, +static void tgt_grant_shrink(struct obd_export *exp, struct obdo *oa, u64 left_space) { - struct filter_export_data *fed; - struct ofd_device *ofd = ofd_exp(exp); - struct obd_device *obd = exp->exp_obd; - long grant_shrink; + struct tg_export_data *ted = &exp->exp_target_data; + struct obd_device *obd = exp->exp_obd; + struct tg_grants_data *tgd = &obd->u.obt.obt_lut->lut_tgd; + long grant_shrink; - assert_spin_locked(&ofd->ofd_grant_lock); + assert_spin_locked(&tgd->tgd_grant_lock); LASSERT(exp); - if (left_space >= ofd->ofd_tot_granted_clients * - OFD_GRANT_SHRINK_LIMIT(exp)) + if (left_space >= tgd->tgd_tot_granted_clients * + TGT_GRANT_SHRINK_LIMIT(exp)) return; grant_shrink = oa->o_grant; - fed = &exp->exp_filter_data; - fed->fed_grant -= grant_shrink; - ofd->ofd_tot_granted -= grant_shrink; + ted->ted_grant -= grant_shrink; + tgd->tgd_tot_granted -= grant_shrink; - CDEBUG(D_CACHE, "%s: cli %s/%p shrink %ld fed_grant %ld total %llu\n", + CDEBUG(D_CACHE, "%s: cli %s/%p shrink %ld ted_grant %ld total %llu\n", obd->obd_name, exp->exp_client_uuid.uuid, exp, grant_shrink, - fed->fed_grant, ofd->ofd_tot_granted); + ted->ted_grant, tgd->tgd_tot_granted); /* client has just released some grant, don't grant any space back */ oa->o_grant = 0; @@ -520,25 +613,26 @@ static void ofd_grant_shrink(struct obd_export *exp, struct obdo *oa, * \param[in] exp export associated which the write request * if NULL, then size estimate is done for server-side * grant allocation. - * \param[in] ofd ofd device handling the request + * \param[in] lut LU target handling the request * \param[in] rnb network buffer to estimate size of * * \retval space (in bytes) that will be consumed to write the * network buffer */ -static inline u64 ofd_grant_rnb_size(struct obd_export *exp, - struct ofd_device *ofd, +static inline u64 tgt_grant_rnb_size(struct obd_export *exp, + struct lu_target *lut, struct niobuf_remote *rnb) { + struct tg_grants_data *tgd = &lut->lut_tgd; u64 blksize; u64 bytes; u64 end; - if (exp && !ofd_grant_param_supp(exp) && - ofd->ofd_blockbits > COMPAT_BSIZE_SHIFT) + if (exp && !exp_grant_param_supp(exp) && + tgd->tgd_blockbits > COMPAT_BSIZE_SHIFT) blksize = 1ULL << COMPAT_BSIZE_SHIFT; else - blksize = 1ULL << ofd->ofd_blockbits; + blksize = 1ULL << tgd->tgd_blockbits; /* The network buffer might span several blocks, align it on block * boundaries */ @@ -548,18 +642,18 @@ static inline u64 ofd_grant_rnb_size(struct obd_export *exp, if (end) bytes += blksize - end; - if (exp == NULL || ofd_grant_param_supp(exp)) { + if (exp == NULL || exp_grant_param_supp(exp)) { /* add per-extent insertion cost */ u64 max_ext; int nr_ext; - max_ext = blksize * ofd->ofd_dt_conf.ddp_max_extent_blks; + max_ext = blksize * lut->lut_dt_conf.ddp_max_extent_blks; nr_ext = (bytes + max_ext - 1) / max_ext; - bytes += nr_ext * ofd->ofd_dt_conf.ddp_extent_tax; + bytes += nr_ext * lut->lut_dt_conf.ddp_extent_tax; } else { /* Inflate grant space if client does not support extent-based * grant allocation */ - bytes = ofd_grant_inflate(ofd, (u64)bytes); + bytes = tgt_grant_inflate(tgd, (u64)bytes); } return bytes; @@ -576,8 +670,8 @@ static inline u64 ofd_grant_rnb_size(struct obd_export *exp, * right on through. * The OBD_BRW_GRANTED flag will be set in the rnb_flags of each network * buffer which has been granted enough space to proceed. Buffers without - * this flag will fail to be written with -ENOSPC (see ofd_preprw_write(). - * Caller must hold ofd_grant_lock spinlock. + * this flag will fail to be written with -ENOSPC (see tgt_preprw_write(). + * Caller must hold tgd_grant_lock spinlock. * * \param[in] env LU environment passed by the caller * \param[in] exp export identifying the client which sent the RPC @@ -588,22 +682,22 @@ static inline u64 ofd_grant_rnb_size(struct obd_export *exp, * \param[in] left the remaining free space with space already granted * taken out */ -static void ofd_grant_check(const struct lu_env *env, struct obd_export *exp, +static void tgt_grant_check(const struct lu_env *env, struct obd_export *exp, struct obdo *oa, struct niobuf_remote *rnb, int niocount, u64 *left) { - struct filter_export_data *fed = &exp->exp_filter_data; - struct obd_device *obd = exp->exp_obd; - struct ofd_device *ofd = ofd_exp(exp); - unsigned long ungranted = 0; - unsigned long granted = 0; - int i; - bool skip = false; - struct ofd_thread_info *info = ofd_info(env); + struct tg_export_data *ted = &exp->exp_target_data; + struct obd_device *obd = exp->exp_obd; + struct lu_target *lut = obd->u.obt.obt_lut; + struct tg_grants_data *tgd = &lut->lut_tgd; + unsigned long ungranted = 0; + unsigned long granted = 0; + int i; + bool skip = false; ENTRY; - assert_spin_locked(&ofd->ofd_grant_lock); + assert_spin_locked(&tgd->tgd_grant_lock); if (obd->obd_recovering) { /* Replaying write. Grant info have been processed already so no @@ -620,13 +714,13 @@ static void ofd_grant_check(const struct lu_env *env, struct obd_export *exp, skip = true; CDEBUG(D_CACHE, "Recoverable resend arrived, skipping " "accounting\n"); - } else if (ofd_grant_param_supp(exp) && oa->o_grant_used > 0) { + } else if (exp_grant_param_supp(exp) && oa->o_grant_used > 0) { /* Client supports the new grant parameters and is telling us * how much grant space it consumed for this bulk write. * Although all rnbs are supposed to have the OBD_BRW_FROM_GRANT * flag set, we will scan the rnb list and looks for non-cache * I/O in case it changes in the future */ - if (fed->fed_grant >= oa->o_grant_used) { + if (ted->ted_grant >= oa->o_grant_used) { /* skip grant accounting for rnbs with * OBD_BRW_FROM_GRANT and just used grant consumption * claimed in the request */ @@ -638,13 +732,13 @@ static void ofd_grant_check(const struct lu_env *env, struct obd_export *exp, CERROR("%s: cli %s claims %lu GRANT, real grant %lu\n", exp->exp_obd->obd_name, exp->exp_client_uuid.uuid, - (unsigned long)oa->o_grant_used, fed->fed_grant); + (unsigned long)oa->o_grant_used, ted->ted_grant); /* check whether we can fill the gap with unallocated * grant */ - if (*left > (oa->o_grant_used - fed->fed_grant)) { + if (*left > (oa->o_grant_used - ted->ted_grant)) { /* ouf .. we are safe for now */ - granted = fed->fed_grant; + granted = ted->ted_grant; ungranted = oa->o_grant_used - granted; *left -= ungranted; skip = true; @@ -666,8 +760,8 @@ static void ofd_grant_check(const struct lu_env *env, struct obd_export *exp, /* compute how much grant space is actually needed for * this rnb, inflate grant if required */ - bytes = ofd_grant_rnb_size(exp, ofd, &rnb[i]); - if (fed->fed_grant >= granted + bytes) { + bytes = tgt_grant_rnb_size(exp, lut, &rnb[i]); + if (ted->ted_grant >= granted + bytes) { granted += bytes; rnb[i].rnb_flags |= OBD_BRW_GRANTED; continue; @@ -676,7 +770,7 @@ static void ofd_grant_check(const struct lu_env *env, struct obd_export *exp, CDEBUG(D_CACHE, "%s: cli %s/%p claims %ld+%d GRANT, " "real grant %lu idx %d\n", obd->obd_name, exp->exp_client_uuid.uuid, exp, granted, bytes, - fed->fed_grant, i); + ted->ted_grant, i); } if (obd->obd_recovering) @@ -686,11 +780,11 @@ static void ofd_grant_check(const struct lu_env *env, struct obd_export *exp, rnb[i].rnb_flags); /* Consume grant space on the server. - * Unlike above, ofd_grant_rnb_size() is called with exp = NULL + * Unlike above, tgt_grant_rnb_size() is called with exp = NULL * so that the required grant space isn't inflated. This is * done on purpose since the server can deal with large block * size, unlike some clients */ - bytes = ofd_grant_rnb_size(NULL, ofd, &rnb[i]); + bytes = tgt_grant_rnb_size(NULL, lut, &rnb[i]); if (*left > bytes) { /* if enough space, pretend it was granted */ ungranted += bytes; @@ -707,49 +801,48 @@ static void ofd_grant_check(const struct lu_env *env, struct obd_export *exp, * BRW_GRANTED are already mapped and we can ignore this error. */ rnb[i].rnb_flags &= ~OBD_BRW_GRANTED; - CDEBUG(D_CACHE,"%s: cli %s/%p idx %d no space for %d\n", + CDEBUG(D_CACHE, "%s: cli %s/%p idx %d no space for %d\n", obd->obd_name, exp->exp_client_uuid.uuid, exp, i, bytes); } /* record in o_grant_used the actual space reserved for the I/O, will be - * used later in ofd_grant_commmit() */ + * used later in tgt_grant_commmit() */ oa->o_grant_used = granted + ungranted; - info->fti_used = granted + ungranted; - /* record space used for the I/O, will be used in ofd_grant_commmit() */ + /* record space used for the I/O, will be used in tgt_grant_commmit() */ /* Now substract what the clients has used already. We don't subtract * this from the tot_granted yet, so that other client's can't grab * that space before we have actually allocated our blocks. That - * happens in ofd_grant_commit() after the writes are done. */ - fed->fed_grant -= granted; - fed->fed_pending += oa->o_grant_used; - ofd->ofd_tot_granted += ungranted; - ofd->ofd_tot_pending += oa->o_grant_used; + * happens in tgt_grant_commit() after the writes are done. */ + ted->ted_grant -= granted; + ted->ted_pending += oa->o_grant_used; + tgd->tgd_tot_granted += ungranted; + tgd->tgd_tot_pending += oa->o_grant_used; CDEBUG(D_CACHE, "%s: cli %s/%p granted: %lu ungranted: %lu grant: %lu dirty: %lu" "\n", obd->obd_name, exp->exp_client_uuid.uuid, exp, - granted, ungranted, fed->fed_grant, fed->fed_dirty); + granted, ungranted, ted->ted_grant, ted->ted_dirty); if (obd->obd_recovering || (oa->o_valid & OBD_MD_FLGRANT) == 0) /* don't update dirty accounting during recovery or * if grant information got discarded (e.g. during resend) */ RETURN_EXIT; - if (fed->fed_dirty < granted) { - CWARN("%s: cli %s/%p claims granted %lu > fed_dirty %lu\n", + if (ted->ted_dirty < granted) { + CWARN("%s: cli %s/%p claims granted %lu > ted_dirty %lu\n", obd->obd_name, exp->exp_client_uuid.uuid, exp, - granted, fed->fed_dirty); - granted = fed->fed_dirty; + granted, ted->ted_dirty); + granted = ted->ted_dirty; } - ofd->ofd_tot_dirty -= granted; - fed->fed_dirty -= granted; + tgd->tgd_tot_dirty -= granted; + ted->ted_dirty -= granted; - if (fed->fed_dirty < 0 || fed->fed_grant < 0 || fed->fed_pending < 0) { + if (ted->ted_dirty < 0 || ted->ted_grant < 0 || ted->ted_pending < 0) { CERROR("%s: cli %s/%p dirty %ld pend %ld grant %ld\n", obd->obd_name, exp->exp_client_uuid.uuid, exp, - fed->fed_dirty, fed->fed_pending, fed->fed_grant); - spin_unlock(&ofd->ofd_grant_lock); + ted->ted_dirty, ted->ted_pending, ted->ted_grant); + spin_unlock(&tgd->tgd_grant_lock); LBUG(); } EXIT; @@ -760,7 +853,7 @@ static void ofd_grant_check(const struct lu_env *env, struct obd_export *exp, * * Calculate how much grant space to return to client, based on how much space * is currently free and how much of that is already granted. - * Caller must hold ofd_grant_lock spinlock. + * Caller must hold tgd_grant_lock spinlock. * * \param[in] exp export of the client which sent the request * \param[in] curgrant current grant claimed by the client @@ -775,18 +868,23 @@ static void ofd_grant_check(const struct lu_env *env, struct obd_export *exp, * * \retval amount of grant space allocated */ -static long ofd_grant_alloc(struct obd_export *exp, u64 curgrant, +static long tgt_grant_alloc(struct obd_export *exp, u64 curgrant, u64 want, u64 left, long chunk, bool conservative) { - struct obd_device *obd = exp->exp_obd; - struct ofd_device *ofd = ofd_exp(exp); - struct filter_export_data *fed = &exp->exp_filter_data; - u64 grant; + struct obd_device *obd = exp->exp_obd; + struct tg_grants_data *tgd = &obd->u.obt.obt_lut->lut_tgd; + struct tg_export_data *ted = &exp->exp_target_data; + u64 grant; ENTRY; - if (ofd_grant_prohibit(exp, ofd) || left == 0 || exp->exp_failed) + /* When tgd_grant_compat_disable is set, we don't grant any space to + * clients not supporting OBD_CONNECT_GRANT_PARAM. + * Otherwise, space granted to such a client is inflated since it + * consumes PAGE_SIZE of grant space per block */ + if ((obd->obd_self_export != exp && !exp_grant_param_supp(exp) && + tgd->tgd_grant_compat_disable) || left == 0 || exp->exp_failed) RETURN(0); if (want > 0x7fffffff) { @@ -804,7 +902,7 @@ static long ofd_grant_alloc(struct obd_export *exp, u64 curgrant, * has and what we think it has, don't grant very much and let the * client consume its grant first. Either it just has lots of RPCs * in flight, or it was evicted and its grants will soon be used up. */ - if (curgrant >= want || curgrant >= fed->fed_grant + chunk) + if (curgrant >= want || curgrant >= ted->ted_grant + chunk) RETURN(0); if (obd->obd_recovering) @@ -816,8 +914,8 @@ static long ofd_grant_alloc(struct obd_export *exp, u64 curgrant, left >>= 3; grant = min(want - curgrant, left); /* round grant up to the next block size */ - grant = (grant + (1 << ofd->ofd_blockbits) - 1) & - ~((1ULL << ofd->ofd_blockbits) - 1); + grant = (grant + (1 << tgd->tgd_blockbits) - 1) & + ~((1ULL << tgd->tgd_blockbits) - 1); if (!grant) RETURN(0); @@ -826,14 +924,14 @@ static long ofd_grant_alloc(struct obd_export *exp, u64 curgrant, if ((grant > chunk) && conservative) grant = chunk; - ofd->ofd_tot_granted += grant; - fed->fed_grant += grant; + tgd->tgd_tot_granted += grant; + ted->ted_grant += grant; - if (fed->fed_grant < 0) { + if (ted->ted_grant < 0) { CERROR("%s: cli %s/%p grant %ld want %llu current %llu\n", obd->obd_name, exp->exp_client_uuid.uuid, exp, - fed->fed_grant, want, curgrant); - spin_unlock(&ofd->ofd_grant_lock); + ted->ted_grant, want, curgrant); + spin_unlock(&tgd->tgd_grant_lock); LBUG(); } @@ -844,7 +942,7 @@ static long ofd_grant_alloc(struct obd_export *exp, u64 curgrant, CDEBUG(D_CACHE, "%s: cli %s/%p tot cached:%llu granted:%llu" " num_exports: %d\n", obd->obd_name, exp->exp_client_uuid.uuid, - exp, ofd->ofd_tot_dirty, ofd->ofd_tot_granted, + exp, tgd->tgd_tot_dirty, tgd->tgd_tot_granted, obd->obd_num_exports); RETURN(grant); @@ -854,8 +952,8 @@ static long ofd_grant_alloc(struct obd_export *exp, u64 curgrant, * Handle grant space allocation on client connection & reconnection. * * A new non-readonly connection gets an initial grant allocation equals to - * ofd_grant_chunk() (i.e. twice the max BRW size in most of the cases). - * On reconnection, grant counters between client & OST are resynchronized + * tgt_grant_chunk() (i.e. twice the max BRW size in most of the cases). + * On reconnection, grant counters between client & target are resynchronized * and additional space might be granted back if possible. * * \param[in] env LU environment provided by the caller @@ -865,21 +963,22 @@ static long ofd_grant_alloc(struct obd_export *exp, u64 curgrant, * \param[in] new_conn must set to true if this is a new connection and false * for a reconnection */ -void ofd_grant_connect(const struct lu_env *env, struct obd_export *exp, +void tgt_grant_connect(const struct lu_env *env, struct obd_export *exp, struct obd_connect_data *data, bool new_conn) { - struct ofd_device *ofd = ofd_exp(exp); - struct filter_export_data *fed = &exp->exp_filter_data; - u64 left = 0; - u64 want; - long chunk; - int from_cache; - int force = 0; /* can use cached data */ + struct lu_target *lut = exp->exp_obd->u.obt.obt_lut; + struct tg_grants_data *tgd = &lut->lut_tgd; + struct tg_export_data *ted = &exp->exp_target_data; + u64 left = 0; + u64 want; + long chunk; + int from_cache; + int force = 0; /* can use cached data */ /* don't grant space to client with read-only access */ if (OCD_HAS_FLAG(data, RDONLY) || (!OCD_HAS_FLAG(data, GRANT_PARAM) && - ofd->ofd_grant_compat_disable)) { + tgd->tgd_grant_compat_disable)) { data->ocd_grant = 0; data->ocd_connect_flags &= ~(OBD_CONNECT_GRANT | OBD_CONNECT_GRANT_PARAM); @@ -889,43 +988,42 @@ void ofd_grant_connect(const struct lu_env *env, struct obd_export *exp, if (OCD_HAS_FLAG(data, GRANT_PARAM)) want = data->ocd_grant; else - want = ofd_grant_inflate(ofd, data->ocd_grant); - chunk = ofd_grant_chunk(exp, ofd, data); + want = tgt_grant_inflate(tgd, data->ocd_grant); + chunk = tgt_grant_chunk(exp, lut, data); refresh: - ofd_grant_statfs(env, exp, force, &from_cache); + tgt_grant_statfs(env, exp, force, &from_cache); - spin_lock(&ofd->ofd_grant_lock); + spin_lock(&tgd->tgd_grant_lock); /* Grab free space from cached info and take out space already granted * to clients as well as reserved space */ - left = ofd_grant_space_left(exp); + left = tgt_grant_space_left(exp); /* get fresh statfs data if we are short in ungranted space */ if (from_cache && left < 32 * chunk) { - spin_unlock(&ofd->ofd_grant_lock); + spin_unlock(&tgd->tgd_grant_lock); CDEBUG(D_CACHE, "fs has no space left and statfs too old\n"); force = 1; goto refresh; } - ofd_grant_alloc(exp, (u64)fed->fed_grant, want, left, chunk, new_conn); + tgt_grant_alloc(exp, (u64)ted->ted_grant, want, left, chunk, new_conn); /* return to client its current grant */ if (OCD_HAS_FLAG(data, GRANT_PARAM)) - data->ocd_grant = fed->fed_grant; + data->ocd_grant = ted->ted_grant; else /* deflate grant */ - data->ocd_grant = ofd_grant_deflate(ofd, - (u64)fed->fed_grant); + data->ocd_grant = tgt_grant_deflate(tgd, (u64)ted->ted_grant); /* reset dirty accounting */ - ofd->ofd_tot_dirty -= fed->fed_dirty; - fed->fed_dirty = 0; + tgd->tgd_tot_dirty -= ted->ted_dirty; + ted->ted_dirty = 0; if (new_conn && OCD_HAS_FLAG(data, GRANT)) - ofd->ofd_tot_granted_clients++; + tgd->tgd_tot_granted_clients++; - spin_unlock(&ofd->ofd_grant_lock); + spin_unlock(&tgd->tgd_grant_lock); CDEBUG(D_CACHE, "%s: cli %s/%p ocd_grant: %d want: %llu left: %llu\n", exp->exp_obd->obd_name, exp->exp_client_uuid.uuid, @@ -933,44 +1031,46 @@ refresh: EXIT; } +EXPORT_SYMBOL(tgt_grant_connect); /** * Release all grant space attached to a given export. * * Remove a client from the grant accounting totals. We also remove * the export from the obd device under the osfs and dev locks to ensure - * that the ofd_grant_sanity_check() calculations are always valid. + * that the tgt_grant_sanity_check() calculations are always valid. * The client should do something similar when it invalidates its import. * * \param[in] exp client's export to remove from grant accounting */ -void ofd_grant_discard(struct obd_export *exp) +void tgt_grant_discard(struct obd_export *exp) { - struct obd_device *obd = exp->exp_obd; - struct ofd_device *ofd = ofd_exp(exp); - struct filter_export_data *fed = &exp->exp_filter_data; - - spin_lock(&ofd->ofd_grant_lock); - LASSERTF(ofd->ofd_tot_granted >= fed->fed_grant, - "%s: tot_granted %llu cli %s/%p fed_grant %ld\n", - obd->obd_name, ofd->ofd_tot_granted, - exp->exp_client_uuid.uuid, exp, fed->fed_grant); - ofd->ofd_tot_granted -= fed->fed_grant; - fed->fed_grant = 0; - LASSERTF(ofd->ofd_tot_pending >= fed->fed_pending, - "%s: tot_pending %llu cli %s/%p fed_pending %ld\n", - obd->obd_name, ofd->ofd_tot_pending, - exp->exp_client_uuid.uuid, exp, fed->fed_pending); - /* ofd_tot_pending is handled in ofd_grant_commit as bulk + struct obd_device *obd = exp->exp_obd; + struct tg_grants_data *tgd = &obd->u.obt.obt_lut->lut_tgd; + struct tg_export_data *ted = &exp->exp_target_data; + + spin_lock(&tgd->tgd_grant_lock); + LASSERTF(tgd->tgd_tot_granted >= ted->ted_grant, + "%s: tot_granted %llu cli %s/%p ted_grant %ld\n", + obd->obd_name, tgd->tgd_tot_granted, + exp->exp_client_uuid.uuid, exp, ted->ted_grant); + tgd->tgd_tot_granted -= ted->ted_grant; + ted->ted_grant = 0; + LASSERTF(tgd->tgd_tot_pending >= ted->ted_pending, + "%s: tot_pending %llu cli %s/%p ted_pending %ld\n", + obd->obd_name, tgd->tgd_tot_pending, + exp->exp_client_uuid.uuid, exp, ted->ted_pending); + /* tgd_tot_pending is handled in tgt_grant_commit as bulk * commmits */ - LASSERTF(ofd->ofd_tot_dirty >= fed->fed_dirty, - "%s: tot_dirty %llu cli %s/%p fed_dirty %ld\n", - obd->obd_name, ofd->ofd_tot_dirty, - exp->exp_client_uuid.uuid, exp, fed->fed_dirty); - ofd->ofd_tot_dirty -= fed->fed_dirty; - fed->fed_dirty = 0; - spin_unlock(&ofd->ofd_grant_lock); + LASSERTF(tgd->tgd_tot_dirty >= ted->ted_dirty, + "%s: tot_dirty %llu cli %s/%p ted_dirty %ld\n", + obd->obd_name, tgd->tgd_tot_dirty, + exp->exp_client_uuid.uuid, exp, ted->ted_dirty); + tgd->tgd_tot_dirty -= ted->ted_dirty; + ted->ted_dirty = 0; + spin_unlock(&tgd->tgd_grant_lock); } +EXPORT_SYMBOL(tgt_grant_discard); /** * Process grant information from incoming bulk read request. @@ -985,12 +1085,14 @@ void ofd_grant_discard(struct obd_export *exp) * \param[in] exp is the export of the client which sent the request * \param[in,out] oa is the incoming obdo sent by the client */ -void ofd_grant_prepare_read(const struct lu_env *env, +void tgt_grant_prepare_read(const struct lu_env *env, struct obd_export *exp, struct obdo *oa) { - struct ofd_device *ofd = ofd_exp(exp); + struct lu_target *lut = exp->exp_obd->u.obt.obt_lut; + struct tg_grants_data *tgd = &lut->lut_tgd; int do_shrink; u64 left = 0; + ENTRY; if (!oa) @@ -1007,14 +1109,14 @@ void ofd_grant_prepare_read(const struct lu_env *env, * available space remains on the backend filesystem. * Shrink requests are not so common, we always get fresh * statfs information. */ - ofd_grant_statfs(env, exp, 1, NULL); + tgt_grant_statfs(env, exp, 1, NULL); /* protect all grant counters */ - spin_lock(&ofd->ofd_grant_lock); + spin_lock(&tgd->tgd_grant_lock); /* Grab free space from cached statfs data and take out space * already granted to clients as well as reserved space */ - left = ofd_grant_space_left(exp); + left = tgt_grant_space_left(exp); /* all set now to proceed with shrinking */ do_shrink = 1; @@ -1023,26 +1125,27 @@ void ofd_grant_prepare_read(const struct lu_env *env, * since we don't grant space back on reads, no point * in running statfs, so just skip it and process * incoming grant data directly. */ - spin_lock(&ofd->ofd_grant_lock); + spin_lock(&tgd->tgd_grant_lock); do_shrink = 0; } /* extract incoming grant information provided by the client and * inflate grant counters if required */ - ofd_grant_incoming(env, exp, oa, ofd_grant_chunk(exp, ofd, NULL)); + tgt_grant_incoming(env, exp, oa, tgt_grant_chunk(exp, lut, NULL)); /* unlike writes, we don't return grants back on reads unless a grant * shrink request was packed and we decided to turn it down. */ if (do_shrink) - ofd_grant_shrink(exp, oa, left); + tgt_grant_shrink(exp, oa, left); else oa->o_grant = 0; - if (!ofd_grant_param_supp(exp)) - oa->o_grant = ofd_grant_deflate(ofd, oa->o_grant); - spin_unlock(&ofd->ofd_grant_lock); + if (!exp_grant_param_supp(exp)) + oa->o_grant = tgt_grant_deflate(tgd, oa->o_grant); + spin_unlock(&tgd->tgd_grant_lock); EXIT; } +EXPORT_SYMBOL(tgt_grant_prepare_read); /** * Process grant information from incoming bulk write request. @@ -1054,8 +1157,8 @@ void ofd_grant_prepare_read(const struct lu_env *env, * proceed further and should fail with -ENOSPC. * Whenever possible, additional grant space will be returned to the client * in the bulk write reply. - * ofd_grant_prepare_write() must be called before writting any buffers to - * the backend storage. This function works in pair with ofd_grant_commit() + * tgt_grant_prepare_write() must be called before writting any buffers to + * the backend storage. This function works in pair with tgt_grant_commit() * which must be invoked once all buffers have been written to disk in order * to release space from the pending grant counter. * @@ -1065,32 +1168,33 @@ void ofd_grant_prepare_read(const struct lu_env *env, * \param[in] rnb list of network buffers * \param[in] niocount number of network buffers in the list */ -void ofd_grant_prepare_write(const struct lu_env *env, +void tgt_grant_prepare_write(const struct lu_env *env, struct obd_export *exp, struct obdo *oa, struct niobuf_remote *rnb, int niocount) { struct obd_device *obd = exp->exp_obd; - struct ofd_device *ofd = ofd_exp(exp); + struct lu_target *lut = obd->u.obt.obt_lut; + struct tg_grants_data *tgd = &lut->lut_tgd; u64 left; int from_cache; int force = 0; /* can use cached data intially */ - long chunk = ofd_grant_chunk(exp, ofd, NULL); + long chunk = tgt_grant_chunk(exp, lut, NULL); ENTRY; refresh: /* get statfs information from OSD layer */ - ofd_grant_statfs(env, exp, force, &from_cache); + tgt_grant_statfs(env, exp, force, &from_cache); - spin_lock(&ofd->ofd_grant_lock); /* protect all grant counters */ + spin_lock(&tgd->tgd_grant_lock); /* protect all grant counters */ /* Grab free space from cached statfs data and take out space already * granted to clients as well as reserved space */ - left = ofd_grant_space_left(exp); + left = tgt_grant_space_left(exp); /* Get fresh statfs data if we are short in ungranted space */ if (from_cache && left < 32 * chunk) { - spin_unlock(&ofd->ofd_grant_lock); + spin_unlock(&tgd->tgd_grant_lock); CDEBUG(D_CACHE, "%s: fs has no space left and statfs too old\n", obd->obd_name); force = 1; @@ -1106,7 +1210,7 @@ refresh: /* That said, it is worth running a sync only if some pages did * not consume grant space on the client and could thus fail - * with ENOSPC later in ofd_grant_check() */ + * with ENOSPC later in tgt_grant_check() */ for (i = 0; i < niocount; i++) if (!(rnb[i].rnb_flags & OBD_BRW_FROM_GRANT)) from_grant = false; @@ -1114,9 +1218,9 @@ refresh: if (!from_grant) { /* at least one network buffer requires acquiring grant * space on the server */ - spin_unlock(&ofd->ofd_grant_lock); + spin_unlock(&tgd->tgd_grant_lock); /* discard errors, at least we tried ... */ - dt_sync(env, ofd->ofd_osd); + dt_sync(env, lut->lut_bottom); force = 2; goto refresh; } @@ -1124,13 +1228,13 @@ refresh: /* extract incoming grant information provided by the client, * and inflate grant counters if required */ - ofd_grant_incoming(env, exp, oa, chunk); + tgt_grant_incoming(env, exp, oa, chunk); /* check limit */ - ofd_grant_check(env, exp, oa, rnb, niocount, &left); + tgt_grant_check(env, exp, oa, rnb, niocount, &left); if (!(oa->o_valid & OBD_MD_FLGRANT)) { - spin_unlock(&ofd->ofd_grant_lock); + spin_unlock(&tgd->tgd_grant_lock); RETURN_EXIT; } @@ -1138,17 +1242,18 @@ refresh: * grant space. */ if ((oa->o_valid & OBD_MD_FLFLAGS) && (oa->o_flags & OBD_FL_SHRINK_GRANT)) - ofd_grant_shrink(exp, oa, left); + tgt_grant_shrink(exp, oa, left); else /* grant more space back to the client if possible */ - oa->o_grant = ofd_grant_alloc(exp, oa->o_grant, oa->o_undirty, + oa->o_grant = tgt_grant_alloc(exp, oa->o_grant, oa->o_undirty, left, chunk, true); - if (!ofd_grant_param_supp(exp)) - oa->o_grant = ofd_grant_deflate(ofd, oa->o_grant); - spin_unlock(&ofd->ofd_grant_lock); + if (!exp_grant_param_supp(exp)) + oa->o_grant = tgt_grant_deflate(tgd, oa->o_grant); + spin_unlock(&tgd->tgd_grant_lock); EXIT; } +EXPORT_SYMBOL(tgt_grant_prepare_write); /** * Consume grant space reserved for object creation. @@ -1168,91 +1273,93 @@ refresh: * \retval >= 0 amount of grant space allocated to the precreate request * \retval -ENOSPC on failure */ -long ofd_grant_create(const struct lu_env *env, struct obd_export *exp, int *nr) +long tgt_grant_create(const struct lu_env *env, struct obd_export *exp, int *nr) { - struct ofd_device *ofd = ofd_exp(exp); - struct filter_export_data *fed = &exp->exp_filter_data; - u64 left = 0; - unsigned long wanted; - unsigned long granted; + struct lu_target *lut = exp->exp_obd->u.obt.obt_lut; + struct tg_grants_data *tgd = &lut->lut_tgd; + struct tg_export_data *ted = &exp->exp_target_data; + u64 left = 0; + unsigned long wanted; + unsigned long granted; ENTRY; if (exp->exp_obd->obd_recovering || - ofd->ofd_dt_conf.ddp_inodespace == 0) + lut->lut_dt_conf.ddp_inodespace == 0) /* don't enforce grant during recovery */ RETURN(0); /* Update statfs data if required */ - ofd_grant_statfs(env, exp, 1, NULL); + tgt_grant_statfs(env, exp, 1, NULL); /* protect all grant counters */ - spin_lock(&ofd->ofd_grant_lock); + spin_lock(&tgd->tgd_grant_lock); /* fail precreate request if there is not enough blocks available for * writing */ - if (ofd->ofd_osfs.os_bavail - (fed->fed_grant >> ofd->ofd_blockbits) < - (ofd->ofd_osfs.os_blocks >> 10)) { - spin_unlock(&ofd->ofd_grant_lock); + if (tgd->tgd_osfs.os_bavail - (ted->ted_grant >> tgd->tgd_blockbits) < + (tgd->tgd_osfs.os_blocks >> 10)) { + spin_unlock(&tgd->tgd_grant_lock); CDEBUG(D_RPCTRACE, "%s: not enough space for create %llu\n", - ofd_name(ofd), - ofd->ofd_osfs.os_bavail * ofd->ofd_osfs.os_blocks); + exp->exp_obd->obd_name, + tgd->tgd_osfs.os_bavail * tgd->tgd_osfs.os_blocks); RETURN(-ENOSPC); } /* Grab free space from cached statfs data and take out space * already granted to clients as well as reserved space */ - left = ofd_grant_space_left(exp); + left = tgt_grant_space_left(exp); /* compute how much space is required to handle the precreation * request */ - wanted = *nr * ofd->ofd_dt_conf.ddp_inodespace; - if (wanted > fed->fed_grant + left) { + wanted = *nr * lut->lut_dt_conf.ddp_inodespace; + if (wanted > ted->ted_grant + left) { /* that's beyond what remains, adjust the number of objects that * can be safely precreated */ - wanted = fed->fed_grant + left; - *nr = wanted / ofd->ofd_dt_conf.ddp_inodespace; + wanted = ted->ted_grant + left; + *nr = wanted / lut->lut_dt_conf.ddp_inodespace; if (*nr == 0) { /* we really have no space any more for precreation, * fail the precreate request with ENOSPC */ - spin_unlock(&ofd->ofd_grant_lock); + spin_unlock(&tgd->tgd_grant_lock); RETURN(-ENOSPC); } /* compute space needed for the new number of creations */ - wanted = *nr * ofd->ofd_dt_conf.ddp_inodespace; + wanted = *nr * lut->lut_dt_conf.ddp_inodespace; } - LASSERT(wanted <= fed->fed_grant + left); + LASSERT(wanted <= ted->ted_grant + left); - if (wanted <= fed->fed_grant) { + if (wanted <= ted->ted_grant) { /* we've enough grant space to handle this precreate request */ - fed->fed_grant -= wanted; + ted->ted_grant -= wanted; } else { /* we need to take some space from the ungranted pool */ - ofd->ofd_tot_granted += wanted - fed->fed_grant; - left -= wanted - fed->fed_grant; - fed->fed_grant = 0; + tgd->tgd_tot_granted += wanted - ted->ted_grant; + left -= wanted - ted->ted_grant; + ted->ted_grant = 0; } granted = wanted; - fed->fed_pending += granted; - ofd->ofd_tot_pending += granted; + ted->ted_pending += granted; + tgd->tgd_tot_pending += granted; /* grant more space for precreate purpose if possible. */ - wanted = OST_MAX_PRECREATE * ofd->ofd_dt_conf.ddp_inodespace / 2; - if (wanted > fed->fed_grant) { + wanted = OST_MAX_PRECREATE * lut->lut_dt_conf.ddp_inodespace / 2; + if (wanted > ted->ted_grant) { long chunk; /* always try to book enough space to handle a large precreate * request */ - chunk = ofd_grant_chunk(exp, ofd, NULL); - wanted -= fed->fed_grant; - ofd_grant_alloc(exp, fed->fed_grant, wanted, left, chunk, + chunk = tgt_grant_chunk(exp, lut, NULL); + wanted -= ted->ted_grant; + tgt_grant_alloc(exp, ted->ted_grant, wanted, left, chunk, false); } - spin_unlock(&ofd->ofd_grant_lock); + spin_unlock(&tgd->tgd_grant_lock); RETURN(granted); } +EXPORT_SYMBOL(tgt_grant_create); /** - * Release grant space added to the pending counter by ofd_grant_prepare_write() + * Release grant space added to the pending counter by tgt_grant_prepare_write() * * Update pending grant counter once buffers have been written to the disk. * @@ -1260,18 +1367,19 @@ long ofd_grant_create(const struct lu_env *env, struct obd_export *exp, int *nr) * \param[in] pending amount of reserved space to be released * \param[in] rc return code of pre-commit operations */ -void ofd_grant_commit(struct obd_export *exp, unsigned long pending, +void tgt_grant_commit(struct obd_export *exp, unsigned long pending, int rc) { - struct ofd_device *ofd = ofd_exp(exp); + struct tg_grants_data *tgd = &exp->exp_obd->u.obt.obt_lut->lut_tgd; + ENTRY; /* get space accounted in tot_pending for the I/O, set in - * ofd_grant_check() */ + * tgt_grant_check() */ if (pending == 0) RETURN_EXIT; - spin_lock(&ofd->ofd_grant_lock); + spin_lock(&tgd->tgd_grant_lock); /* Don't update statfs data for errors raised before commit (e.g. * bulk transfer failed, ...) since we know those writes have not been * processed. For other errors hit during commit, we cannot really tell @@ -1279,55 +1387,56 @@ void ofd_grant_commit(struct obd_export *exp, unsigned long pending, * In any case, this should not be fatal since we always get fresh * statfs data before failing a request with ENOSPC */ if (rc == 0) { - spin_lock(&ofd->ofd_osfs_lock); + spin_lock(&tgd->tgd_osfs_lock); /* Take pending out of cached statfs data */ - ofd->ofd_osfs.os_bavail -= min_t(u64, - ofd->ofd_osfs.os_bavail, - pending >> ofd->ofd_blockbits); - if (ofd->ofd_statfs_inflight) + tgd->tgd_osfs.os_bavail -= min_t(u64, + tgd->tgd_osfs.os_bavail, + pending >> tgd->tgd_blockbits); + if (tgd->tgd_statfs_inflight) /* someone is running statfs and want to be notified of * writes happening meanwhile */ - ofd->ofd_osfs_inflight += pending; - spin_unlock(&ofd->ofd_osfs_lock); + tgd->tgd_osfs_inflight += pending; + spin_unlock(&tgd->tgd_osfs_lock); } - if (exp->exp_filter_data.fed_pending < pending) { - CERROR("%s: cli %s/%p fed_pending(%lu) < grant_used(%lu)\n", + if (exp->exp_target_data.ted_pending < pending) { + CERROR("%s: cli %s/%p ted_pending(%lu) < grant_used(%lu)\n", exp->exp_obd->obd_name, exp->exp_client_uuid.uuid, exp, - exp->exp_filter_data.fed_pending, pending); - spin_unlock(&ofd->ofd_grant_lock); + exp->exp_target_data.ted_pending, pending); + spin_unlock(&tgd->tgd_grant_lock); LBUG(); } - exp->exp_filter_data.fed_pending -= pending; + exp->exp_target_data.ted_pending -= pending; - if (ofd->ofd_tot_granted < pending) { + if (tgd->tgd_tot_granted < pending) { CERROR("%s: cli %s/%p tot_granted(%llu) < grant_used(%lu)\n", exp->exp_obd->obd_name, exp->exp_client_uuid.uuid, exp, - ofd->ofd_tot_granted, pending); - spin_unlock(&ofd->ofd_grant_lock); + tgd->tgd_tot_granted, pending); + spin_unlock(&tgd->tgd_grant_lock); LBUG(); } - ofd->ofd_tot_granted -= pending; + tgd->tgd_tot_granted -= pending; - if (ofd->ofd_tot_pending < pending) { + if (tgd->tgd_tot_pending < pending) { CERROR("%s: cli %s/%p tot_pending(%llu) < grant_used(%lu)\n", exp->exp_obd->obd_name, exp->exp_client_uuid.uuid, exp, - ofd->ofd_tot_pending, pending); - spin_unlock(&ofd->ofd_grant_lock); + tgd->tgd_tot_pending, pending); + spin_unlock(&tgd->tgd_grant_lock); LBUG(); } - ofd->ofd_tot_pending -= pending; - spin_unlock(&ofd->ofd_grant_lock); + tgd->tgd_tot_pending -= pending; + spin_unlock(&tgd->tgd_grant_lock); EXIT; } +EXPORT_SYMBOL(tgt_grant_commit); -struct ofd_grant_cb { +struct tgt_grant_cb { /* commit callback structure */ - struct dt_txn_commit_cb ogc_cb; + struct dt_txn_commit_cb tgc_cb; /* export associated with the bulk write */ - struct obd_export *ogc_exp; + struct obd_export *tgc_exp; /* pending grant to be released */ - unsigned long ogc_granted; + unsigned long tgc_granted; }; /** @@ -1340,16 +1449,16 @@ struct ofd_grant_cb { * \param[in] cb callback data * \param[in] err error code */ -static void ofd_grant_commit_cb(struct lu_env *env, struct thandle *th, +static void tgt_grant_commit_cb(struct lu_env *env, struct thandle *th, struct dt_txn_commit_cb *cb, int err) { - struct ofd_grant_cb *ogc; + struct tgt_grant_cb *tgc; - ogc = container_of(cb, struct ofd_grant_cb, ogc_cb); + tgc = container_of(cb, struct tgt_grant_cb, tgc_cb); - ofd_grant_commit(ogc->ogc_exp, ogc->ogc_granted, err); - class_export_cb_put(ogc->ogc_exp); - OBD_FREE_PTR(ogc); + tgt_grant_commit(tgc->tgc_exp, tgc->tgc_granted, err); + class_export_cb_put(tgc->tgc_exp); + OBD_FREE_PTR(tgc); } /** @@ -1364,31 +1473,32 @@ static void ofd_grant_commit_cb(struct lu_env *env, struct thandle *th, * \retval 0 on successful callback adding * \retval negative value on error */ -int ofd_grant_commit_cb_add(struct thandle *th, struct obd_export *exp, +int tgt_grant_commit_cb_add(struct thandle *th, struct obd_export *exp, unsigned long granted) { - struct ofd_grant_cb *ogc; + struct tgt_grant_cb *tgc; struct dt_txn_commit_cb *dcb; int rc; ENTRY; - OBD_ALLOC_PTR(ogc); - if (ogc == NULL) + OBD_ALLOC_PTR(tgc); + if (tgc == NULL) RETURN(-ENOMEM); - ogc->ogc_exp = class_export_cb_get(exp); - ogc->ogc_granted = granted; + tgc->tgc_exp = class_export_cb_get(exp); + tgc->tgc_granted = granted; - dcb = &ogc->ogc_cb; - dcb->dcb_func = ofd_grant_commit_cb; + dcb = &tgc->tgc_cb; + dcb->dcb_func = tgt_grant_commit_cb; INIT_LIST_HEAD(&dcb->dcb_linkage); - strlcpy(dcb->dcb_name, "ofd_grant_commit_cb", sizeof(dcb->dcb_name)); + strlcpy(dcb->dcb_name, "tgt_grant_commit_cb", sizeof(dcb->dcb_name)); rc = dt_trans_cb_add(th, dcb); if (rc) { - class_export_cb_put(ogc->ogc_exp); - OBD_FREE_PTR(ogc); + class_export_cb_put(tgc->tgc_exp); + OBD_FREE_PTR(tgc); } RETURN(rc); } +EXPORT_SYMBOL(tgt_grant_commit_cb_add); diff --git a/lustre/target/tgt_handler.c b/lustre/target/tgt_handler.c index c6ff5ef..2fcb794 100644 --- a/lustre/target/tgt_handler.c +++ b/lustre/target/tgt_handler.c @@ -42,7 +42,6 @@ #include #include #include -#include #include #include diff --git a/lustre/target/tgt_internal.h b/lustre/target/tgt_internal.h index ea19560..981e2ab 100644 --- a/lustre/target/tgt_internal.h +++ b/lustre/target/tgt_internal.h @@ -85,6 +85,7 @@ struct tgt_thread_info { struct dt_object *tti_dt_object; struct l_wait_info tti_wait_info; } update; + struct obd_statfs osfs; /* for obd_statfs() in OFD/MDT */ } tti_u; struct lfsck_req_local tti_lrl; struct dt_insert_rec tti_rec; -- 1.8.3.1