From: Mikhail Pershin Date: Fri, 1 Feb 2019 12:13:38 +0000 (+0300) Subject: LU-10496 tgt: move FMD handling from OFD to target X-Git-Tag: 2.12.1-RC1~7 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=5456b1bdbf94bdb1769000d4ca5a8131528ddf5d;p=fs%2Flustre-release.git LU-10496 tgt: move FMD handling from OFD to target - move ofd/ofd_fmd.c to target/tgt_fmd.c with corresponding changes - add FMD calls to the MDT for Data-on-MDT files - per-target tunable parameters init/fini - update related tests to be correctly used with DOM - make sanity.sh test_36 to work again - remove target_handle_ping() along with o_ping method in obd operations because it is not used anymore. Ping is fully handled in tgt_obd_ping() Lustre-change: https://review.whamcloud.com/34190 Lustre-commit: 52e33c507b84bcaf3af9df010f5de4a282aa3fca Signed-off-by: Mikhail Pershin Change-Id: I24280a2a9610d05eb9655c73bb067f94ff251980 Reviewed-by: Andreas Dilger Reviewed-by: Wang Shilong Reviewed-by: Alex Zhuravlev Signed-off-by: Minh Diep Reviewed-on: https://review.whamcloud.com/34691 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Oleg Drokin --- diff --git a/lustre/include/lu_target.h b/lustre/include/lu_target.h index 4ba3fe1..092e42d 100644 --- a/lustre/include/lu_target.h +++ b/lustre/include/lu_target.h @@ -205,6 +205,9 @@ struct lu_target { /* target grants fields */ struct tg_grants_data lut_tgd; + /* target tunables */ + const struct attribute **lut_attrs; + /* FMD (file modification data) values */ int lut_fmd_max_num; time64_t lut_fmd_max_age; @@ -500,6 +503,8 @@ int tgt_add_reply_data(const struct lu_env *env, struct lu_target *tgt, struct thandle *th, bool update_lrd_file); struct tg_reply_data *tgt_lookup_reply_by_xid(struct tg_export_data *ted, __u64 xid); +int tgt_tunables_init(struct lu_target *lut); +void tgt_tunables_fini(struct lu_target *lut); /* target/tgt_grant.c */ static inline int exp_grant_param_supp(struct obd_export *exp) @@ -542,6 +547,11 @@ void tgt_fmd_update(struct obd_export *exp, const struct lu_fid *fid, __u64 xid); bool tgt_fmd_check(struct obd_export *exp, const struct lu_fid *fid, __u64 xid); +#ifdef DO_FMD_DROP +void tgt_fmd_drop(struct obd_export *exp, const struct lu_fid *fid); +#else +#define tgt_fmd_drop(exp, fid) do {} while (0) +#endif /* target/update_trans.c */ int distribute_txn_init(const struct lu_env *env, diff --git a/lustre/include/lustre_lib.h b/lustre/include/lustre_lib.h index 4ab0db6..f677912 100644 --- a/lustre/include/lustre_lib.h +++ b/lustre/include/lustre_lib.h @@ -73,7 +73,6 @@ int rev_import_init(struct obd_export *exp); int target_handle_connect(struct ptlrpc_request *req); int target_handle_disconnect(struct ptlrpc_request *req); void target_destroy_export(struct obd_export *exp); -int target_handle_ping(struct ptlrpc_request *req); void target_committed_to_req(struct ptlrpc_request *req); void target_cancel_recovery_timer(struct obd_device *obd); void target_stop_recovery_thread(struct obd_device *obd); diff --git a/lustre/include/obd.h b/lustre/include/obd.h index 200a0f3..e43cd7e 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -1013,8 +1013,6 @@ struct obd_ops { int (*o_quotactl)(struct obd_device *, struct obd_export *, struct obd_quotactl *); - int (*o_ping)(const struct lu_env *, struct obd_export *exp); - /* pools methods */ int (*o_pool_new)(struct obd_device *obd, char *poolname); int (*o_pool_del)(struct obd_device *obd, char *poolname); diff --git a/lustre/include/obd_class.h b/lustre/include/obd_class.h index f0fcedb..d2dbcd7 100644 --- a/lustre/include/obd_class.h +++ b/lustre/include/obd_class.h @@ -911,19 +911,6 @@ static inline int obd_fid_alloc(const struct lu_env *env, RETURN(rc); } -static inline int obd_ping(const struct lu_env *env, struct obd_export *exp) -{ - int rc; - ENTRY; - - if (!exp->exp_obd->obd_type || - !exp->exp_obd->obd_type->typ_dt_ops->o_ping) - RETURN(0); - - rc = OBP(exp->exp_obd, ping)(env, exp); - RETURN(rc); -} - static inline int obd_pool_new(struct obd_device *obd, char *poolname) { int rc; diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index ba4b19f..06f22d0 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -2873,12 +2873,6 @@ added: RETURN(0); } -int target_handle_ping(struct ptlrpc_request *req) -{ - obd_ping(req->rq_svc_thread->t_env, req->rq_export); - return req_capsule_server_pack(&req->rq_pill); -} - void target_committed_to_req(struct ptlrpc_request *req) { struct obd_export *exp = req->rq_export; diff --git a/lustre/mdt/mdt_io.c b/lustre/mdt/mdt_io.c index 3124c8c..3999e26 100644 --- a/lustre/mdt/mdt_io.c +++ b/lustre/mdt/mdt_io.c @@ -702,7 +702,10 @@ int mdt_obd_commitrw(const struct lu_env *env, int cmd, struct obd_export *exp, * doesn't already exist so we can store the reservation handle * there. */ valid = OBD_MD_FLUID | OBD_MD_FLGID; - valid |= OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME; + if (tgt_fmd_check(exp, mdt_object_fid(mo), + mdt_info_req(info)->rq_xid)) + valid |= OBD_MD_FLATIME | OBD_MD_FLMTIME | + OBD_MD_FLCTIME; la_from_obdo(la, oa, valid); @@ -890,6 +893,11 @@ int mdt_punch_hdl(struct tgt_session_info *tsi) la->la_size = start; la->la_valid |= LA_SIZE; + /* MDT supports FMD for Data-on-MDT needs */ + if (la->la_valid & (LA_ATIME | LA_MTIME | LA_CTIME)) + tgt_fmd_update(tsi->tsi_exp, &tsi->tsi_fid, + tgt_ses_req(tsi)->rq_xid); + rc = mdt_object_punch(tsi->tsi_env, mdt->mdt_bottom, dob, start, end, la); mdt_dom_write_unlock(mo); diff --git a/lustre/mdt/mdt_lproc.c b/lustre/mdt/mdt_lproc.c index da3c529..d9b0d49 100644 --- a/lustre/mdt/mdt_lproc.c +++ b/lustre/mdt/mdt_lproc.c @@ -712,27 +712,6 @@ mdt_enable_dir_migration_seq_write(struct file *file, const char __user *buffer, } LPROC_SEQ_FOPS(mdt_enable_dir_migration); - -/** - * Show MDT policy for handling dirty metadata under a lock being cancelled. - * - * \param[in] m seq_file handle - * \param[in] data unused for single entry - * - * \retval 0 on success - * \retval negative value on error - */ -static int mdt_slc_seq_show(struct seq_file *m, void *data) -{ - struct obd_device *obd = m->private; - struct lu_target *tgt = obd->u.obt.obt_lut; - char *slc_states[] = {"never", "blocking", "always" }; - - seq_printf(m, "%s\n", slc_states[tgt->lut_sync_lock_cancel]); - return 0; -} -LPROC_SEQ_FOPS_RO(mdt_slc); - /** * Show MDT async commit count. * @@ -1054,8 +1033,6 @@ static struct lprocfs_vars lprocfs_mdt_obd_vars[] = { .fops = &mdt_recovery_time_hard_fops }, { .name = "recovery_time_soft", .fops = &mdt_recovery_time_soft_fops }, - { .name = "sync_lock_cancel", - .fops = &mdt_slc_fops }, { .name = "async_commit_count", .fops = &mdt_async_commit_count_fops }, { .name = "sync_count", @@ -1188,6 +1165,13 @@ int mdt_procfs_init(struct mdt_device *mdt, const char *name) return rc; } + rc = tgt_tunables_init(&mdt->mdt_lut); + if (rc) { + CERROR("%s: failed to init target tunables: rc = %d\n", + mdt_obd_name(mdt), rc); + return rc; + } + rc = hsm_cdt_procfs_init(mdt); if (rc) { CERROR("%s: cannot create hsm proc entries: rc = %d\n", @@ -1239,6 +1223,7 @@ void mdt_procfs_fini(struct mdt_device *mdt) lprocfs_free_per_client_stats(obd); hsm_cdt_procfs_fini(mdt); + tgt_tunables_fini(&mdt->mdt_lut); lprocfs_obd_cleanup(obd); lprocfs_free_md_stats(obd); lprocfs_free_obd_stats(obd); diff --git a/lustre/mdt/mdt_reint.c b/lustre/mdt/mdt_reint.c index 27913a99..5d5f33e 100644 --- a/lustre/mdt/mdt_reint.c +++ b/lustre/mdt/mdt_reint.c @@ -704,6 +704,12 @@ static int mdt_reint_setattr(struct mdt_thread_info *info, if (ma->ma_valid & MA_LOV) GOTO(out_put, rc = -EPROTO); + /* MDT supports FMD for regular files due to Data-on-MDT */ + if (S_ISREG(lu_object_attr(&mo->mot_obj)) && + ma->ma_attr.la_valid & (LA_ATIME | LA_MTIME | LA_CTIME)) + tgt_fmd_update(info->mti_exp, mdt_object_fid(mo), + req->rq_xid); + rc = mdt_attr_set(info, mo, ma); if (rc) GOTO(out_put, rc); diff --git a/lustre/obdclass/lprocfs_status_server.c b/lustre/obdclass/lprocfs_status_server.c index f878de0..224ea2c 100644 --- a/lustre/obdclass/lprocfs_status_server.c +++ b/lustre/obdclass/lprocfs_status_server.c @@ -384,6 +384,30 @@ int lprocfs_exp_replydata_seq_show(struct seq_file *m, void *data) } LPROC_SEQ_FOPS_RO(lprocfs_exp_replydata); +int lprocfs_exp_print_fmd_count_seq(struct cfs_hash *hs, struct cfs_hash_bd *bd, + struct hlist_node *hnode, void *cb_data) + +{ + struct obd_export *exp = cfs_hash_object(hs, hnode); + struct seq_file *m = cb_data; + struct tg_export_data *ted = &exp->exp_target_data; + + seq_printf(m, "%d\n", ted->ted_fmd_count); + + return 0; +} + +int lprocfs_exp_fmd_count_seq_show(struct seq_file *m, void *data) +{ + struct nid_stat *stats = m->private; + struct obd_device *obd = stats->nid_obd; + + cfs_hash_for_each_key(obd->obd_nid_hash, &stats->nid, + lprocfs_exp_print_fmd_count_seq, m); + return 0; +} +LPROC_SEQ_FOPS_RO(lprocfs_exp_fmd_count); + int lprocfs_nid_stats_clear_seq_show(struct seq_file *m, void *data) { seq_puts(m, "Write into this file to clear all nid stats and stale nid entries\n"); @@ -550,6 +574,15 @@ int lprocfs_exp_setup(struct obd_export *exp, lnet_nid_t *nid) GOTO(destroy_new_ns, rc); } + entry = lprocfs_add_simple(new_stat->nid_proc, "fmd_count", new_stat, + &lprocfs_exp_fmd_count_fops); + if (IS_ERR(entry)) { + rc = PTR_ERR(entry); + CWARN("%s: error adding the fmd_count file: rc = %d\n", + obd->obd_name, rc); + GOTO(destroy_new_ns, rc); + } + spin_lock(&exp->exp_lock); exp->exp_nid_stats = new_stat; spin_unlock(&exp->exp_lock); diff --git a/lustre/ofd/Makefile.in b/lustre/ofd/Makefile.in index 1d10108..f446c1e 100644 --- a/lustre/ofd/Makefile.in +++ b/lustre/ofd/Makefile.in @@ -1,7 +1,7 @@ MODULES := ofd ofd-objs := ofd_dev.o ofd_obd.o ofd_fs.o ofd_trans.o ofd_objects.o ofd_io.o -ofd-objs += lproc_ofd.o ofd_fmd.o ofd_dlm.o ofd_lvb.o +ofd-objs += lproc_ofd.o ofd_dlm.o ofd_lvb.o EXTRA_DIST = $(ofd-objs:%.o=%.c) ofd_internal.h diff --git a/lustre/ofd/lproc_ofd.c b/lustre/ofd/lproc_ofd.c index 1822137..9de4506 100644 --- a/lustre/ofd/lproc_ofd.c +++ b/lustre/ofd/lproc_ofd.c @@ -183,116 +183,6 @@ static int ofd_last_id_seq_show(struct seq_file *m, void *data) LPROC_SEQ_FOPS_RO(ofd_last_id); /** - * Show maximum number of Filter Modification Data (FMD) maintained by OFD. - * - * \param[in] m seq_file handle - * \param[in] data unused for single entry - * - * \retval 0 on success - * \retval negative value on error - */ -static ssize_t client_cache_count_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - struct obd_device *obd = container_of(kobj, struct obd_device, - obd_kset.kobj); - struct lu_target *lut = obd->u.obt.obt_lut; - - return sprintf(buf, "%u\n", lut->lut_fmd_max_num); -} - -/** - * Change number of FMDs maintained by OFD. - * - * This defines how large the list of FMDs can be. - * - * \param[in] file proc file - * \param[in] buffer string which represents maximum number - * \param[in] count \a buffer length - * \param[in] off unused for single entry - * - * \retval \a count on success - * \retval negative number on error - */ -static ssize_t client_cache_count_store(struct kobject *kobj, - struct attribute *attr, - const char *buffer, size_t count) -{ - struct obd_device *obd = container_of(kobj, struct obd_device, - obd_kset.kobj); - struct lu_target *lut = obd->u.obt.obt_lut; - int val; - int rc; - - rc = kstrtoint(buffer, 0, &val); - if (rc) - return rc; - - if (val < 1 || val > 65536) - return -EINVAL; - - lut->lut_fmd_max_num = val; - return count; -} -LUSTRE_RW_ATTR(client_cache_count); - -/** - * Show the maximum age of FMD data in seconds. - * - * \param[in] m seq_file handle - * \param[in] data unused for single entry - * - * \retval 0 on success - * \retval negative value on error - */ -static ssize_t client_cache_seconds_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - struct obd_device *obd = container_of(kobj, struct obd_device, - obd_kset.kobj); - struct lu_target *lut = obd->u.obt.obt_lut; - - return sprintf(buf, "%lld\n", lut->lut_fmd_max_age); -} - -/** - * Set the maximum age of FMD data in seconds. - * - * This defines how long FMD data stays in the FMD list. - * - * \param[in] file proc file - * \param[in] buffer string which represents maximum number - * \param[in] count \a buffer length - * \param[in] off unused for single entry - * - * \retval \a count on success - * \retval negative number on error - */ -static ssize_t client_cache_seconds_store(struct kobject *kobj, - struct attribute *attr, - const char *buffer, size_t count) -{ - struct obd_device *obd = container_of(kobj, struct obd_device, - obd_kset.kobj); - struct lu_target *lut = obd->u.obt.obt_lut; - time64_t val; - int rc; - - rc = kstrtoll(buffer, 0, &val); - if (rc) - return rc; - - if (val < 1 || val > 65536) /* ~ 18 hour max */ - return -EINVAL; - - lut->lut_fmd_max_age = val; - return count; -} -LUSTRE_RW_ATTR(client_cache_seconds); - -/** * Show if the OFD is in degraded mode. * * Degraded means OFD has a failed drive or is undergoing RAID rebuild. @@ -440,9 +330,6 @@ static ssize_t sync_journal_store(struct kobject *kobj, struct attribute *attr, } LUSTRE_RW_ATTR(sync_journal); -/* This must be longer than the longest string below */ -#define SYNC_STATES_MAXLEN 16 - static int ofd_brw_size_seq_show(struct seq_file *m, void *data) { struct obd_device *obd = m->private; @@ -482,88 +369,6 @@ ofd_brw_size_seq_write(struct file *file, const char __user *buffer, LPROC_SEQ_FOPS(ofd_brw_size); -static char *sync_on_cancel_states[] = {"never", - "blocking", - "always" }; - -/** - * Show OFD policy for handling dirty data under a lock being cancelled. - * - * \param[in] m seq_file handle - * \param[in] data unused for single entry - * - * \retval 0 on success - * \retval negative value on error - */ -static ssize_t sync_lock_cancel_show(struct kobject *kobj, - struct attribute *attr, char *buf) -{ - struct obd_device *obd = container_of(kobj, struct obd_device, - obd_kset.kobj); - struct lu_target *tgt = obd->u.obt.obt_lut; - - return sprintf(buf, "%s\n", - sync_on_cancel_states[tgt->lut_sync_lock_cancel]); -} - -/** - * Change OFD policy for handling dirty data under a lock being cancelled. - * - * This variable defines what action OFD takes upon lock cancel - * There are three possible modes: - * 1) never - never do sync upon lock cancel. This can lead to data - * inconsistencies if both the OST and client crash while writing a file - * that is also concurrently being read by another client. In these cases, - * this may allow the file data to "rewind" to an earlier state. - * 2) blocking - do sync only if there is blocking lock, e.g. if another - * client is trying to access this same object - * 3) always - do sync always - * - * \param[in] file proc file - * \param[in] buffer string which represents policy - * \param[in] count \a buffer length - * \param[in] off unused for single entry - * - * \retval \a count on success - * \retval negative number on error - */ -static ssize_t sync_lock_cancel_store(struct kobject *kobj, - struct attribute *attr, - const char *buffer, size_t count) -{ - struct obd_device *obd = container_of(kobj, struct obd_device, - obd_kset.kobj); - struct lu_target *tgt = obd->u.obt.obt_lut; - int val = -1; - int i; - - if (count == 0 || count >= SYNC_STATES_MAXLEN) - return -EINVAL; - - for (i = 0 ; i < NUM_SYNC_ON_CANCEL_STATES; i++) { - if (strcmp(buffer, sync_on_cancel_states[i]) == 0) { - val = i; - break; - } - } - - /* Legacy numeric codes */ - if (val == -1) { - int rc = kstrtoint(buffer, 0, &val); - if (rc) - return rc; - } - - if (val < 0 || val > 2) - return -EINVAL; - - spin_lock(&tgt->lut_flags_lock); - tgt->lut_sync_lock_cancel = val; - spin_unlock(&tgt->lut_flags_lock); - return count; -} -LUSTRE_RW_ATTR(sync_lock_cancel); - /** * Show the limit of soft sync RPCs. * @@ -923,12 +728,9 @@ static struct attribute *ofd_attrs[] = { &lustre_attr_seqs_allocated.attr, &lustre_attr_grant_precreate.attr, &lustre_attr_precreate_batch.attr, - &lustre_attr_client_cache_count.attr, - &lustre_attr_client_cache_seconds.attr, &lustre_attr_degraded.attr, &lustre_attr_fstype.attr, &lustre_attr_sync_journal.attr, - &lustre_attr_sync_lock_cancel.attr, &lustre_attr_soft_sync_limit.attr, &lustre_attr_lfsck_speed_limit.attr, &lustre_attr_checksum_t10pi_enforce.attr, @@ -962,11 +764,18 @@ int ofd_tunables_init(struct ofd_device *ofd) RETURN(rc); } + rc = tgt_tunables_init(&ofd->ofd_lut); + if (rc) { + CERROR("%s: tgt_tunables_init failed: rc = %d\n", + obd->obd_name, rc); + GOTO(obd_cleanup, rc); + } + rc = lprocfs_alloc_obd_stats(obd, LPROC_OFD_STATS_LAST); if (rc) { CERROR("%s: lprocfs_alloc_obd_stats failed: %d.\n", obd->obd_name, rc); - GOTO(obd_cleanup, rc); + GOTO(tgt_cleanup, rc); } entry = lprocfs_register("exports", obd->obd_proc_entry, NULL, NULL); @@ -998,6 +807,8 @@ int ofd_tunables_init(struct ofd_device *ofd) obd_free_stats: lprocfs_free_obd_stats(obd); +tgt_cleanup: + tgt_tunables_fini(&ofd->ofd_lut); obd_cleanup: lprocfs_obd_cleanup(obd); diff --git a/lustre/ofd/ofd_dev.c b/lustre/ofd/ofd_dev.c index fe18b39..6e2d7f0 100644 --- a/lustre/ofd/ofd_dev.c +++ b/lustre/ofd/ofd_dev.c @@ -82,8 +82,6 @@ /* Slab for OFD object allocation */ static struct kmem_cache *ofd_object_kmem; -struct kmem_cache *tgt_fmd_kmem; - static struct lu_kmem_descr ofd_caches[] = { { .ckd_cache = &ofd_object_kmem, @@ -91,11 +89,6 @@ static struct lu_kmem_descr ofd_caches[] = { .ckd_size = sizeof(struct ofd_object) }, { - .ckd_cache = &tgt_fmd_kmem, - .ckd_name = "ll_fmd_cache", - .ckd_size = sizeof(struct tgt_fmd_data) - }, - { .ckd_cache = NULL } }; @@ -756,6 +749,7 @@ static void ofd_procfs_fini(struct ofd_device *ofd) { struct obd_device *obd = ofd_obd(ofd); + tgt_tunables_fini(&ofd->ofd_lut); lprocfs_free_per_client_stats(obd); lprocfs_obd_cleanup(obd); lprocfs_free_obd_stats(obd); @@ -2871,12 +2865,6 @@ static int ofd_init0(const struct lu_env *env, struct ofd_device *m, /* set this lu_device to obd, because error handling need it */ obd->obd_lu_dev = &m->ofd_dt_dev.dd_lu_dev; - rc = ofd_tunables_init(m); - if (rc) { - CERROR("Can't init ofd lprocfs, rc %d\n", rc); - RETURN(rc); - } - /* No connection accepted until configurations will finish */ spin_lock(&obd->obd_dev_lock); obd->obd_no_conn = 1; @@ -2893,12 +2881,13 @@ static int ofd_init0(const struct lu_env *env, struct ofd_device *m, info = ofd_info_init(env, NULL); if (info == NULL) - GOTO(err_fini_proc, rc = -EFAULT); + RETURN(-EFAULT); rc = ofd_stack_init(env, m, cfg); if (rc) { - CERROR("Can't init device stack, rc %d\n", rc); - GOTO(err_fini_proc, rc); + CERROR("%s: can't init device stack, rc %d\n", + obd->obd_name, rc); + RETURN(rc); } #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 14, 53, 0) @@ -2928,6 +2917,10 @@ static int ofd_init0(const struct lu_env *env, struct ofd_device *m, if (rc) GOTO(err_free_ns, rc); + rc = ofd_tunables_init(m); + if (rc) + GOTO(err_fini_lut, rc); + tgd->tgd_reserved_pcnt = 0; m->ofd_brw_size = m->ofd_lut.lut_dt_conf.ddp_brw_size; @@ -2940,7 +2933,7 @@ static int ofd_init0(const struct lu_env *env, struct ofd_device *m, rc = ofd_fs_setup(env, m, obd); if (rc) - GOTO(err_fini_lut, rc); + GOTO(err_fini_proc, rc); fid.f_seq = FID_SEQ_LOCAL_NAME; fid.f_oid = 1; @@ -2976,6 +2969,8 @@ err_fini_los: m->ofd_los = NULL; err_fini_fs: ofd_fs_cleanup(env, m); +err_fini_proc: + ofd_procfs_fini(m); err_fini_lut: tgt_fini(env, &m->ofd_lut); err_free_ns: @@ -2983,8 +2978,6 @@ err_free_ns: obd->obd_namespace = m->ofd_namespace = NULL; err_fini_stack: ofd_stack_fini(env, m, &m->ofd_osd->dd_lu_dev); -err_fini_proc: - ofd_procfs_fini(m); return rc; } @@ -3015,6 +3008,7 @@ static void ofd_fini(const struct lu_env *env, struct ofd_device *m) obd_exports_barrier(obd); obd_zombie_barrier(); + ofd_procfs_fini(m); tgt_fini(env, &m->ofd_lut); ofd_stop_inconsistency_verification_thread(m); lfsck_degister(env, m->ofd_osd); @@ -3028,7 +3022,7 @@ static void ofd_fini(const struct lu_env *env, struct ofd_device *m) } ofd_stack_fini(env, m, &m->ofd_dt_dev.dd_lu_dev); - ofd_procfs_fini(m); + LASSERT(atomic_read(&d->ld_ref) == 0); server_put_mount(obd->obd_name, true); EXIT; diff --git a/lustre/ofd/ofd_internal.h b/lustre/ofd/ofd_internal.h index 6da99fb..3b3291c 100644 --- a/lustre/ofd/ofd_internal.h +++ b/lustre/ofd/ofd_internal.h @@ -51,15 +51,6 @@ #define OFD_VALID_FLAGS (LA_TYPE | LA_MODE | LA_SIZE | LA_BLOCKS | \ LA_BLKSIZE | LA_ATIME | LA_MTIME | LA_CTIME) -/* FMD tracking data */ -struct tgt_fmd_data { - struct list_head fmd_list; /* linked to tgt_fmd_list */ - struct lu_fid fmd_fid; /* FID being written to */ - __u64 fmd_mactime_xid; /* xid highest {m,a,c}time setattr */ - time64_t fmd_expire; /* time when the fmd should expire */ - int fmd_refcount; /* reference counter - list holds 1 */ -}; - #define OFD_SOFT_SYNC_LIMIT_DEFAULT 16 /* request stats */ @@ -391,16 +382,6 @@ struct ofd_object *ofd_object_find_exists(const struct lu_env *env, return fo; } -/* ofd_fmd.c */ -extern struct kmem_cache *tgt_fmd_kmem; -void ofd_fmd_expire(struct obd_export *exp); -void ofd_fmd_cleanup(struct obd_export *exp); -#ifdef DO_FMD_DROP -void ofd_fmd_drop(struct obd_export *exp, const struct lu_fid *fid); -#else -#define ofd_fmd_drop(exp, fid) do {} while (0) -#endif - /* ofd_dev.c */ int ofd_fid_set_index(const struct lu_env *env, struct ofd_device *ofd, int index); diff --git a/lustre/ofd/ofd_obd.c b/lustre/ofd/ofd_obd.c index b717ad4..eff9138 100644 --- a/lustre/ofd/ofd_obd.c +++ b/lustre/ofd/ofd_obd.c @@ -546,8 +546,6 @@ static int ofd_init_export(struct obd_export *exp) { int rc; - spin_lock_init(&exp->exp_target_data.ted_fmd_lock); - INIT_LIST_HEAD(&exp->exp_target_data.ted_fmd_list); atomic_set(&exp->exp_filter_data.fed_soft_sync_count, 0); spin_lock(&exp->exp_lock); exp->exp_connecting = 1; @@ -596,8 +594,6 @@ static int ofd_destroy_export(struct obd_export *exp) ldlm_destroy_export(exp); tgt_client_free(exp); - ofd_fmd_cleanup(exp); - /* * discard grants once we're sure no more * interaction with the client is possible @@ -1405,26 +1401,6 @@ static int ofd_precleanup(struct obd_device *obd) } /** - * Implementation of obd_ops::o_ping. - * - * This is OFD-specific part of OBD_PING request handling. - * It controls Filter Modification Data (FMD) expiration each time PING is - * received. - * - * \see ofd_fmd_expire() and ofd_fmd.c for details - * - * \param[in] env execution environment - * \param[in] exp OBD export of client - * - * \retval 0 - */ -static int ofd_ping(const struct lu_env *env, struct obd_export *exp) -{ - ofd_fmd_expire(exp); - return 0; -} - -/** * Implementation of obd_ops::o_health_check. * * This function checks the OFD device health - ability to respond on @@ -1516,7 +1492,6 @@ struct obd_ops ofd_obd_ops = { .o_getattr = ofd_echo_getattr, .o_iocontrol = ofd_iocontrol, .o_precleanup = ofd_precleanup, - .o_ping = ofd_ping, .o_health_check = ofd_health_check, .o_set_info_async = ofd_set_info_async, .o_get_info = ofd_get_info, diff --git a/lustre/ofd/ofd_objects.c b/lustre/ofd/ofd_objects.c index f0092c5..3e71d4b 100644 --- a/lustre/ofd/ofd_objects.c +++ b/lustre/ofd/ofd_objects.c @@ -854,7 +854,7 @@ int ofd_destroy(const struct lu_env *env, struct ofd_object *fo, if (rc) GOTO(stop, rc); - ofd_fmd_drop(ofd_info(env)->fti_exp, &fo->ofo_header.loh_fid); + tgt_fmd_drop(ofd_info(env)->fti_exp, &fo->ofo_header.loh_fid); dt_ref_del(env, ofd_object_child(fo), th); dt_destroy(env, ofd_object_child(fo), th); diff --git a/lustre/ptlrpc/Makefile.in b/lustre/ptlrpc/Makefile.in index 1847e22..a02ecb5 100644 --- a/lustre/ptlrpc/Makefile.in +++ b/lustre/ptlrpc/Makefile.in @@ -14,7 +14,7 @@ target_objs := $(TARGET)tgt_main.o $(TARGET)tgt_lastrcvd.o target_objs += $(TARGET)tgt_handler.o $(TARGET)out_handler.o target_objs += $(TARGET)out_lib.o $(TARGET)update_trans.o target_objs += $(TARGET)update_records.o $(TARGET)update_recovery.o -target_objs += $(TARGET)tgt_grant.o +target_objs += $(TARGET)tgt_grant.o $(TARGET)tgt_fmd.o ptlrpc_objs := client.o recover.o connection.o niobuf.o pack_generic.o ptlrpc_objs += events.o ptlrpc_module.o service.o pinger.o diff --git a/lustre/target/Makefile.am b/lustre/target/Makefile.am index aeaa588..a8165a9 100644 --- a/lustre/target/Makefile.am +++ b/lustre/target/Makefile.am @@ -32,7 +32,7 @@ MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ EXTRA_DIST = tgt_main.c tgt_lastrcvd.c tgt_handler.c tgt_internal.h \ - tgt_grant.c out_handler.c out_lib.c barrier.c + tgt_grant.c out_handler.c out_lib.c barrier.c tgt_fmd.c EXTRA_DIST += update_trans.c EXTRA_DIST += update_records.c EXTRA_DIST += update_recovery.c diff --git a/lustre/ofd/ofd_fmd.c b/lustre/target/tgt_fmd.c similarity index 83% rename from lustre/ofd/ofd_fmd.c rename to lustre/target/tgt_fmd.c index 8c8e263..afbf668 100644 --- a/lustre/ofd/ofd_fmd.c +++ b/lustre/target/tgt_fmd.c @@ -24,12 +24,13 @@ * Use is subject to license terms. * * Copyright (c) 2012, 2014, Intel Corporation. + * + * Copyright (c) 2019, DDN Storage Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. * - * lustre/ofd/ofd_fmd.c + * lustre/target/tgt_fmd.c * * This file provides functions to handle Filter Modification Data (FMD). * The FMD is responsible for file attributes to be applied in @@ -43,12 +44,16 @@ * FMD can expire if there are no updates for a long time to keep the list * reasonably small. * - * Author: Andreas Dilger + * Author: Andreas Dilger + * Author: Mike Pershin */ -#define DEBUG_SUBSYSTEM S_FILTER +#define DEBUG_SUBSYSTEM S_CLASS + +#include +#include -#include "ofd_internal.h" +#include "tgt_internal.h" /** * Drop FMD reference and free it if reference drops to zero. @@ -58,7 +63,7 @@ * \param[in] exp OBD export * \param[in] fmd FMD to put */ -static inline void ofd_fmd_put_nolock(struct obd_export *exp, +static inline void tgt_fmd_put_nolock(struct obd_export *exp, struct tgt_fmd_data *fmd) { struct tg_export_data *ted = &exp->exp_target_data; @@ -77,12 +82,12 @@ static inline void ofd_fmd_put_nolock(struct obd_export *exp, * \param[in] exp OBD export * \param[in] fmd FMD to put */ -void ofd_fmd_put(struct obd_export *exp, struct tgt_fmd_data *fmd) +void tgt_fmd_put(struct obd_export *exp, struct tgt_fmd_data *fmd) { struct tg_export_data *ted = &exp->exp_target_data; spin_lock(&ted->ted_fmd_lock); - ofd_fmd_put_nolock(exp, fmd); /* caller reference */ + tgt_fmd_put_nolock(exp, fmd); /* caller reference */ spin_unlock(&ted->ted_fmd_lock); } @@ -101,7 +106,7 @@ void ofd_fmd_put(struct obd_export *exp, struct tgt_fmd_data *fmd) * \param[in] exp OBD export * \param[in] keep FMD to keep always */ -static void ofd_fmd_expire_nolock(struct obd_export *exp, +static void tgt_fmd_expire_nolock(struct obd_export *exp, struct tgt_fmd_data *keep) { struct tg_export_data *ted = &exp->exp_target_data; @@ -118,7 +123,7 @@ static void ofd_fmd_expire_nolock(struct obd_export *exp, break; list_del_init(&fmd->fmd_list); - ofd_fmd_put_nolock(exp, fmd); /* list reference */ + tgt_fmd_put_nolock(exp, fmd); /* list reference */ } } @@ -129,12 +134,12 @@ static void ofd_fmd_expire_nolock(struct obd_export *exp, * * \param[in] exp OBD export */ -void ofd_fmd_expire(struct obd_export *exp) +void tgt_fmd_expire(struct obd_export *exp) { struct tg_export_data *ted = &exp->exp_target_data; spin_lock(&ted->ted_fmd_lock); - ofd_fmd_expire_nolock(exp, NULL); + tgt_fmd_expire_nolock(exp, NULL); spin_unlock(&ted->ted_fmd_lock); } @@ -151,12 +156,13 @@ void ofd_fmd_expire(struct obd_export *exp) * \retval struct tgt_fmd_data found by FID * \retval NULL is FMD is not found */ -static struct tgt_fmd_data *ofd_fmd_find_nolock(struct obd_export *exp, +static struct tgt_fmd_data *tgt_fmd_find_nolock(struct obd_export *exp, const struct lu_fid *fid) { struct tg_export_data *ted = &exp->exp_target_data; struct tgt_fmd_data *found = NULL, *fmd; struct lu_target *lut = exp->exp_obd->u.obt.obt_lut; + time64_t now = ktime_get_seconds(); assert_spin_locked(&ted->ted_fmd_lock); @@ -164,13 +170,12 @@ static struct tgt_fmd_data *ofd_fmd_find_nolock(struct obd_export *exp, if (lu_fid_eq(&fmd->fmd_fid, fid)) { found = fmd; list_move_tail(&fmd->fmd_list, &ted->ted_fmd_list); - fmd->fmd_expire = ktime_get_seconds() + - lut->lut_fmd_max_age; + fmd->fmd_expire = now + lut->lut_fmd_max_age; break; } } - ofd_fmd_expire_nolock(exp, found); + tgt_fmd_expire_nolock(exp, found); return found; } @@ -186,14 +191,14 @@ static struct tgt_fmd_data *ofd_fmd_find_nolock(struct obd_export *exp, * \retval struct tgt_fmd_data found by FID * \retval NULL indicates FMD is not found */ -struct tgt_fmd_data *ofd_fmd_find(struct obd_export *exp, +struct tgt_fmd_data *tgt_fmd_find(struct obd_export *exp, const struct lu_fid *fid) { struct tg_export_data *ted = &exp->exp_target_data; struct tgt_fmd_data *fmd; spin_lock(&ted->ted_fmd_lock); - fmd = ofd_fmd_find_nolock(exp, fid); + fmd = tgt_fmd_find_nolock(exp, fid); if (fmd) fmd->fmd_refcount++; /* caller reference */ spin_unlock(&ted->ted_fmd_lock); @@ -215,7 +220,7 @@ struct tgt_fmd_data *ofd_fmd_find(struct obd_export *exp, * \retval struct tgt_fmd_data found by FID * \retval NULL indicates FMD is not found */ -struct tgt_fmd_data *ofd_fmd_get(struct obd_export *exp, +struct tgt_fmd_data *tgt_fmd_get(struct obd_export *exp, const struct lu_fid *fid) { struct tg_export_data *ted = &exp->exp_target_data; @@ -224,9 +229,9 @@ struct tgt_fmd_data *ofd_fmd_get(struct obd_export *exp, OBD_SLAB_ALLOC_PTR(fmd_new, tgt_fmd_kmem); spin_lock(&ted->ted_fmd_lock); - found = ofd_fmd_find_nolock(exp, fid); + found = tgt_fmd_find_nolock(exp, fid); if (fmd_new) { - if (found == NULL) { + if (!found) { list_add_tail(&fmd_new->fmd_list, &ted->ted_fmd_list); fmd_new->fmd_fid = *fid; fmd_new->fmd_refcount++; /* list reference */ @@ -266,19 +271,20 @@ struct tgt_fmd_data *ofd_fmd_get(struct obd_export *exp, * \param[in] exp OBD export * \param[in] fid FID of FMD to drop */ -void ofd_fmd_drop(struct obd_export *exp, const struct lu_fid *fid) +void tgt_fmd_drop(struct obd_export *exp, const struct lu_fid *fid) { struct tg_export_data *ted = &exp->exp_target_data; - struct tgt_fmd_data *found = NULL; + struct tgt_fmd_data *fmd = NULL; spin_lock(&ted->ted_fmd_lock); - found = ofd_fmd_find_nolock(exp, fid); - if (found) { - list_del_init(&found->fmd_list); - ofd_fmd_put_nolock(exp, found); + fmd = tgt_fmd_find_nolock(exp, fid); + if (fmd) { + list_del_init(&fmd->fmd_list); + tgt_fmd_put_nolock(exp, fmd); } spin_unlock(&ted->ted_fmd_lock); } +EXPORT_SYMBOL(tgt_fmd_drop); #endif /** @@ -288,7 +294,7 @@ void ofd_fmd_drop(struct obd_export *exp, const struct lu_fid *fid) * * \param[in] exp OBD export */ -void ofd_fmd_cleanup(struct obd_export *exp) +void tgt_fmd_cleanup(struct obd_export *exp) { struct tg_export_data *ted = &exp->exp_target_data; struct tgt_fmd_data *fmd = NULL, *tmp; @@ -298,12 +304,13 @@ void ofd_fmd_cleanup(struct obd_export *exp) list_del_init(&fmd->fmd_list); if (fmd->fmd_refcount > 1) { CDEBUG(D_INFO, - "fmd %p is still referenced (refcount = %d)\n", + "fmd %p still referenced (refcount = %d)\n", fmd, fmd->fmd_refcount); } - ofd_fmd_put_nolock(exp, fmd); + tgt_fmd_put_nolock(exp, fmd); } spin_unlock(&ted->ted_fmd_lock); + LASSERT(list_empty(&exp->exp_target_data.ted_fmd_list)); } /** @@ -319,13 +326,14 @@ void tgt_fmd_update(struct obd_export *exp, const struct lu_fid *fid, __u64 xid) { struct tgt_fmd_data *fmd; - fmd = ofd_fmd_get(exp, fid); + fmd = tgt_fmd_get(exp, fid); if (fmd) { if (fmd->fmd_mactime_xid < xid) fmd->fmd_mactime_xid = xid; - ofd_fmd_put(exp, fmd); + tgt_fmd_put(exp, fmd); } } +EXPORT_SYMBOL(tgt_fmd_update); /** * Chech that time can be updated by the request with given XID. @@ -343,12 +351,13 @@ bool tgt_fmd_check(struct obd_export *exp, const struct lu_fid *fid, __u64 xid) struct tgt_fmd_data *fmd; bool can_update = true; - fmd = ofd_fmd_find(exp, fid); + fmd = tgt_fmd_find(exp, fid); if (fmd) { can_update = fmd->fmd_mactime_xid < xid; - ofd_fmd_put(exp, fmd); + tgt_fmd_put(exp, fmd); } return can_update; } +EXPORT_SYMBOL(tgt_fmd_check); diff --git a/lustre/target/tgt_grant.c b/lustre/target/tgt_grant.c index 518388c..f6ee8d1 100644 --- a/lustre/target/tgt_grant.c +++ b/lustre/target/tgt_grant.c @@ -71,7 +71,7 @@ * Author: Johann Lombardi */ -#define DEBUG_SUBSYSTEM S_FILTER +#define DEBUG_SUBSYSTEM S_CLASS #include #include diff --git a/lustre/target/tgt_handler.c b/lustre/target/tgt_handler.c index cc5352e..6510f4b 100644 --- a/lustre/target/tgt_handler.c +++ b/lustre/target/tgt_handler.c @@ -1036,7 +1036,16 @@ int tgt_obd_ping(struct tgt_session_info *tsi) ENTRY; - rc = target_handle_ping(tgt_ses_req(tsi)); + /* The target-specific part of OBD_PING request handling. + * It controls Filter Modification Data (FMD) expiration each time + * PING is received. + * + * Valid only for replayable targets, e.g. MDT and OFD + */ + if (tsi->tsi_exp->exp_obd->obd_replayable) + tgt_fmd_expire(tsi->tsi_exp); + + rc = req_capsule_server_pack(tsi->tsi_pill); if (rc) RETURN(err_serious(rc)); diff --git a/lustre/target/tgt_internal.h b/lustre/target/tgt_internal.h index 9539da0..ac7c3c1 100644 --- a/lustre/target/tgt_internal.h +++ b/lustre/target/tgt_internal.h @@ -288,4 +288,18 @@ void tgt_cancel_slc_locks(struct lu_target *tgt, __u64 transno); void barrier_init(void); void barrier_fini(void); +/* FMD tracking data */ +struct tgt_fmd_data { + struct list_head fmd_list; /* linked to tgt_fmd_list */ + struct lu_fid fmd_fid; /* FID being written to */ + __u64 fmd_mactime_xid; /* xid highest {m,a,c}time setattr */ + time64_t fmd_expire; /* time when the fmd should expire */ + int fmd_refcount; /* reference counter - list holds 1 */ +}; + +/* tgt_fmd.c */ +extern struct kmem_cache *tgt_fmd_kmem; +void tgt_fmd_expire(struct obd_export *exp); +void tgt_fmd_cleanup(struct obd_export *exp); + #endif /* _TG_INTERNAL_H */ diff --git a/lustre/target/tgt_lastrcvd.c b/lustre/target/tgt_lastrcvd.c index 9432c31..bcb4ff9 100644 --- a/lustre/target/tgt_lastrcvd.c +++ b/lustre/target/tgt_lastrcvd.c @@ -395,6 +395,8 @@ int tgt_client_alloc(struct obd_export *exp) spin_lock_init(&exp->exp_target_data.ted_nodemap_lock); INIT_LIST_HEAD(&exp->exp_target_data.ted_nodemap_member); + spin_lock_init(&exp->exp_target_data.ted_fmd_lock); + INIT_LIST_HEAD(&exp->exp_target_data.ted_fmd_list); OBD_ALLOC_PTR(exp->exp_target_data.ted_lcd); if (exp->exp_target_data.ted_lcd == NULL) @@ -418,6 +420,8 @@ void tgt_client_free(struct obd_export *exp) LASSERT(exp != exp->exp_obd->obd_self_export); + tgt_fmd_cleanup(exp); + /* free reply data */ mutex_lock(&ted->ted_lcd_lock); list_for_each_entry_safe(trd, tmp, &ted->ted_reply_list, trd_list) { diff --git a/lustre/target/tgt_main.c b/lustre/target/tgt_main.c index cdc1a53..2f9f381 100644 --- a/lustre/target/tgt_main.c +++ b/lustre/target/tgt_main.c @@ -37,6 +37,240 @@ #include "tgt_internal.h" #include "../ptlrpc/ptlrpc_internal.h" +/* This must be longer than the longest string below */ +#define SYNC_STATES_MAXLEN 16 +static char *sync_on_cancel_states[] = {"never", + "blocking", + "always" }; + +/** + * Show policy for handling dirty data under a lock being cancelled. + * + * \param[in] kobj sysfs kobject + * \param[in] attr sysfs attribute + * \param[in] buf buffer for data + * + * \retval 0 and buffer filled with data on success + * \retval negative value on error + */ +static ssize_t sync_lock_cancel_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); + struct lu_target *tgt = obd->u.obt.obt_lut; + + return sprintf(buf, "%s\n", + sync_on_cancel_states[tgt->lut_sync_lock_cancel]); +} + +/** + * Change policy for handling dirty data under a lock being cancelled. + * + * This variable defines what action target takes upon lock cancel + * There are three possible modes: + * 1) never - never do sync upon lock cancel. This can lead to data + * inconsistencies if both the OST and client crash while writing a file + * that is also concurrently being read by another client. In these cases, + * this may allow the file data to "rewind" to an earlier state. + * 2) blocking - do sync only if there is blocking lock, e.g. if another + * client is trying to access this same object + * 3) always - do sync always + * + * \param[in] kobj kobject + * \param[in] attr attribute to show + * \param[in] buf buffer for data + * \param[in] count buffer size + * + * \retval \a count on success + * \retval negative value on error + */ +static ssize_t sync_lock_cancel_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, size_t count) +{ + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); + struct lu_target *tgt = obd->u.obt.obt_lut; + int val = -1; + int i; + + if (count == 0 || count >= SYNC_STATES_MAXLEN) + return -EINVAL; + + for (i = 0 ; i < NUM_SYNC_ON_CANCEL_STATES; i++) { + if (strcmp(buffer, sync_on_cancel_states[i]) == 0) { + val = i; + break; + } + } + + /* Legacy numeric codes */ + if (val == -1) { + int rc = kstrtoint(buffer, 0, &val); + if (rc) + return rc; + } + + if (val < 0 || val > 2) + return -EINVAL; + + spin_lock(&tgt->lut_flags_lock); + tgt->lut_sync_lock_cancel = val; + spin_unlock(&tgt->lut_flags_lock); + return count; +} +LUSTRE_RW_ATTR(sync_lock_cancel); + +/** + * Show maximum number of Filter Modification Data (FMD) maintained. + * + * \param[in] kobj kobject + * \param[in] attr attribute to show + * \param[in] buf buffer for data + * + * \retval 0 and buffer filled with data on success + * \retval negative value on error + */ +ssize_t tgt_fmd_count_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); + struct lu_target *lut = obd->u.obt.obt_lut; + + return sprintf(buf, "%u\n", lut->lut_fmd_max_num); +} + +/** + * Change number of FMDs maintained by target. + * + * This defines how large the list of FMDs can be. + * + * \param[in] kobj kobject + * \param[in] attr attribute to show + * \param[in] buf buffer for data + * \param[in] count buffer size + * + * \retval \a count on success + * \retval negative value on error + */ +ssize_t tgt_fmd_count_store(struct kobject *kobj, struct attribute *attr, + const char *buffer, size_t count) +{ + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); + struct lu_target *lut = obd->u.obt.obt_lut; + int val, rc; + + rc = kstrtoint(buffer, 0, &val); + if (rc) + return rc; + + if (val < 1 || val > 65536) + return -EINVAL; + + lut->lut_fmd_max_num = val; + + return count; +} +LUSTRE_RW_ATTR(tgt_fmd_count); + +/** + * Show the maximum age of FMD data in seconds. + * + * \param[in] kobj kobject + * \param[in] attr attribute to show + * \param[in] buf buffer for data + * + * \retval 0 and buffer filled with data on success + * \retval negative value on error + */ +ssize_t tgt_fmd_seconds_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); + struct lu_target *lut = obd->u.obt.obt_lut; + + return sprintf(buf, "%lld\n", lut->lut_fmd_max_age); +} + +/** + * Set the maximum age of FMD data in seconds. + * + * This defines how long FMD data stays in the FMD list. + * + * \param[in] kobj kobject + * \param[in] attr attribute to show + * \param[in] buf buffer for data + * \param[in] count buffer size + * + * \retval \a count on success + * \retval negative number on error + */ +ssize_t tgt_fmd_seconds_store(struct kobject *kobj, struct attribute *attr, + const char *buffer, size_t count) +{ + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); + struct lu_target *lut = obd->u.obt.obt_lut; + time64_t val; + int rc; + + rc = kstrtoll(buffer, 0, &val); + if (rc) + return rc; + + if (val < 1 || val > 65536) /* ~ 18 hour max */ + return -EINVAL; + + lut->lut_fmd_max_age = val; + + return count; +} +LUSTRE_RW_ATTR(tgt_fmd_seconds); + +/* These two aliases are old names and kept for compatibility, they were + * changed to 'tgt_fmd_count' and 'tgt_fmd_seconds'. + * This change was made in Lustre 2.13, so these aliases can be removed + * when back compatibility is not needed with any Lustre version prior 2.13 + */ +static struct lustre_attr tgt_fmd_count_compat = __ATTR(client_cache_count, + 0644, tgt_fmd_count_show, tgt_fmd_count_store); +static struct lustre_attr tgt_fmd_seconds_compat = __ATTR(client_cache_seconds, + 0644, tgt_fmd_seconds_show, tgt_fmd_seconds_store); + +static const struct attribute *tgt_attrs[] = { + &lustre_attr_sync_lock_cancel.attr, + &lustre_attr_tgt_fmd_count.attr, + &lustre_attr_tgt_fmd_seconds.attr, + &tgt_fmd_count_compat.attr, + &tgt_fmd_seconds_compat.attr, + NULL, +}; + +int tgt_tunables_init(struct lu_target *lut) +{ + int rc; + + rc = sysfs_create_files(&lut->lut_obd->obd_kset.kobj, tgt_attrs); + if (!rc) + lut->lut_attrs = tgt_attrs; + return rc; +} +EXPORT_SYMBOL(tgt_tunables_init); + +void tgt_tunables_fini(struct lu_target *lut) +{ + if (lut->lut_attrs) { + sysfs_remove_files(&lut->lut_obd->obd_kset.kobj, + lut->lut_attrs); + lut->lut_attrs = NULL; + } +} +EXPORT_SYMBOL(tgt_tunables_fini); + /* * Save cross-MDT lock in lut_slc_locks. * @@ -376,6 +610,8 @@ EXPORT_SYMBOL(tgt_fini); static struct kmem_cache *tgt_thread_kmem; static struct kmem_cache *tgt_session_kmem; +struct kmem_cache *tgt_fmd_kmem; + static struct lu_kmem_descr tgt_caches[] = { { .ckd_cache = &tgt_thread_kmem, @@ -388,6 +624,11 @@ static struct lu_kmem_descr tgt_caches[] = { .ckd_size = sizeof(struct tgt_session_info) }, { + .ckd_cache = &tgt_fmd_kmem, + .ckd_name = "tgt_fmd_cache", + .ckd_size = sizeof(struct tgt_fmd_data) + }, + { .ckd_cache = NULL } }; diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 5d56ee3..dba9220 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -3300,7 +3300,7 @@ subr_36fh() { sleep 1 touch --date="$DATESTR" $DIR/$tdir/$tfile # setattr timestamp in past LS_BEFORE="`ls -l $DIR/$tdir/$tfile`" # old timestamp from client cache - cancel_lru_locks osc + cancel_lru_locks $OSC LS_AFTER="`ls -l $DIR/$tdir/$tfile`" # timestamp from OST object date; date +%s [ "$LS_BEFORE" != "$LS_AFTER" ] && \ @@ -3323,30 +3323,36 @@ run_test 36f "utime on file racing with OST BRW write ==========" test_36g() { remote_ost_nodsh && skip "remote OST with nodsh" [ $PARALLEL == "yes" ] && skip "skip parallel run" + [ $MDS1_VERSION -lt $(version_code 2.12.51) ] && + skip "Need MDS version at least 2.12.51" local fmd_max_age - local fmd_before - local fmd_after + local fmd + local facet="ost1" + local tgt="obdfilter" + + [[ $OSC == "mdc" ]] && tgt="mdt" && facet="mds1" test_mkdir $DIR/$tdir - fmd_max_age=$(do_facet ost1 \ - "lctl get_param -n obdfilter.*.client_cache_seconds 2> /dev/null | \ + fmd_max_age=$(do_facet $facet \ + "lctl get_param -n $tgt.*.tgt_fmd_seconds 2> /dev/null | \ head -n 1") - fmd_before=$(do_facet ost1 \ - "awk '/ll_fmd_cache/ {print \\\$2}' /proc/slabinfo") + echo "FMD max age: ${fmd_max_age}s" touch $DIR/$tdir/$tfile + fmd=$(do_facet $facet "lctl get_param -n $tgt.*.exports.*.fmd_count" | + gawk '{cnt=cnt+$1} END{print cnt}') + echo "FMD before: $fmd" + [[ $fmd == 0 ]] && + error "FMD wasn't create by touch" sleep $((fmd_max_age + 12)) - fmd_after=$(do_facet ost1 \ - "awk '/ll_fmd_cache/ {print \\\$2}' /proc/slabinfo") - - echo "fmd_before: $fmd_before" - echo "fmd_after: $fmd_after" - [[ $fmd_after -gt $fmd_before ]] && - echo "AFTER: $fmd_after > BEFORE: $fmd_before" && - error "fmd didn't expire after ping" || true + fmd=$(do_facet $facet "lctl get_param -n $tgt.*.exports.*.fmd_count" | + gawk '{cnt=cnt+$1} END{print cnt}') + echo "FMD after: $fmd" + [[ $fmd == 0 ]] || + error "FMD wasn't expired by ping" } -run_test 36g "filter mod data cache expiry =====================" +run_test 36g "FMD cache expiry =====================" test_36h() { [ $PARALLEL == "yes" ] && skip "skip parallel run" @@ -3443,7 +3449,7 @@ test_39b() { [ $unlink_new2 -eq $unlink_new ] || error "unlink file reverses mtime" [ $rename_new2 -eq $rename_new ] || error "rename file reverses mtime" - cancel_lru_locks osc + cancel_lru_locks $OSC if [ $i = 0 ] ; then echo "repeat after cancel_lru_locks"; fi done } @@ -3477,7 +3483,7 @@ test_39c() { [ "$mtime2" = "$mtime3" ] || \ error "mtime ($mtime2) changed (to $mtime3) on rename" - cancel_lru_locks osc + cancel_lru_locks $OSC if [ $i = 0 ] ; then echo "repeat after cancel_lru_locks"; fi done } @@ -3495,7 +3501,7 @@ test_39d() { [ $mtime = $TEST_39_MTIME ] || \ error "mtime($mtime) is not set to $TEST_39_MTIME" - cancel_lru_locks osc + cancel_lru_locks $OSC if [ $i = 0 ] ; then echo "repeat after cancel_lru_locks"; fi done } @@ -3515,7 +3521,7 @@ test_39e() { [ $mtime2 = $TEST_39_MTIME ] || \ error "mtime($mtime2) is not set to $TEST_39_MTIME" - cancel_lru_locks osc + cancel_lru_locks $OSC if [ $i = 0 ] ; then echo "repeat after cancel_lru_locks"; fi done } @@ -3536,7 +3542,7 @@ test_39f() { [ $mtime2 = $TEST_39_MTIME ] || \ error "mtime($mtime2) is not set to $TEST_39_MTIME" - cancel_lru_locks osc + cancel_lru_locks $OSC if [ $i = 0 ] ; then echo "repeat after cancel_lru_locks"; fi done } @@ -3557,7 +3563,7 @@ test_39g() { [ "$mtime1" = "$mtime2" ] || \ error "lost mtime: $mtime2, should be $mtime1" - cancel_lru_locks osc + cancel_lru_locks $OSC if [ $i = 0 ] ; then echo "repeat after cancel_lru_locks"; fi done } @@ -3584,7 +3590,7 @@ test_39h() { [ "$mtime2" = $TEST_39_MTIME ] || \ error "lost mtime: $mtime2, should be $TEST_39_MTIME" - cancel_lru_locks osc + cancel_lru_locks $OSC if [ $i = 0 ] ; then echo "repeat after cancel_lru_locks"; fi done fi @@ -3608,7 +3614,7 @@ test_39i() { [ "$mtime1" = "$mtime2" ] || \ error "lost mtime: $mtime2, should be $mtime1" - cancel_lru_locks osc + cancel_lru_locks $OSC if [ $i = 0 ] ; then echo "repeat after cancel_lru_locks"; fi done } @@ -3639,7 +3645,7 @@ test_39j() { error "mtime is lost on close: $mtime2, " \ "should be $mtime1" - cancel_lru_locks osc + cancel_lru_locks $OSC if [ $i = 0 ] ; then echo "repeat after cancel_lru_locks"; fi done lctl set_param fail_loc=0 @@ -3743,7 +3749,7 @@ test_39m() { [ "$timestamps" = "$far_past_atime $far_past_mtime" ] || \ error "atime or mtime set incorrectly" - cancel_lru_locks osc + cancel_lru_locks $OSC if [ $i = 0 ] ; then echo "repeat after cancel_lru_locks"; fi done }