/* target grants fields */
struct tg_grants_data lut_tgd;
+ /* target tunables */
+ const struct attribute **lut_attrs;
+
/* FMD (file modification data) values */
int lut_fmd_max_num;
time64_t lut_fmd_max_age;
struct thandle *th, bool update_lrd_file);
struct tg_reply_data *tgt_lookup_reply_by_xid(struct tg_export_data *ted,
__u64 xid);
+int tgt_tunables_init(struct lu_target *lut);
+void tgt_tunables_fini(struct lu_target *lut);
/* target/tgt_grant.c */
static inline int exp_grant_param_supp(struct obd_export *exp)
__u64 xid);
bool tgt_fmd_check(struct obd_export *exp, const struct lu_fid *fid,
__u64 xid);
+#ifdef DO_FMD_DROP
+void tgt_fmd_drop(struct obd_export *exp, const struct lu_fid *fid);
+#else
+#define tgt_fmd_drop(exp, fid) do {} while (0)
+#endif
/* target/update_trans.c */
int distribute_txn_init(const struct lu_env *env,
int target_handle_connect(struct ptlrpc_request *req);
int target_handle_disconnect(struct ptlrpc_request *req);
void target_destroy_export(struct obd_export *exp);
-int target_handle_ping(struct ptlrpc_request *req);
void target_committed_to_req(struct ptlrpc_request *req);
void target_cancel_recovery_timer(struct obd_device *obd);
void target_stop_recovery_thread(struct obd_device *obd);
int (*o_quotactl)(struct obd_device *, struct obd_export *,
struct obd_quotactl *);
- int (*o_ping)(const struct lu_env *, struct obd_export *exp);
-
/* pools methods */
int (*o_pool_new)(struct obd_device *obd, char *poolname);
int (*o_pool_del)(struct obd_device *obd, char *poolname);
RETURN(rc);
}
-static inline int obd_ping(const struct lu_env *env, struct obd_export *exp)
-{
- int rc;
- ENTRY;
-
- if (!exp->exp_obd->obd_type ||
- !exp->exp_obd->obd_type->typ_dt_ops->o_ping)
- RETURN(0);
-
- rc = OBP(exp->exp_obd, ping)(env, exp);
- RETURN(rc);
-}
-
static inline int obd_pool_new(struct obd_device *obd, char *poolname)
{
int rc;
RETURN(0);
}
-int target_handle_ping(struct ptlrpc_request *req)
-{
- obd_ping(req->rq_svc_thread->t_env, req->rq_export);
- return req_capsule_server_pack(&req->rq_pill);
-}
-
void target_committed_to_req(struct ptlrpc_request *req)
{
struct obd_export *exp = req->rq_export;
* doesn't already exist so we can store the reservation handle
* there. */
valid = OBD_MD_FLUID | OBD_MD_FLGID;
- valid |= OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME;
+ if (tgt_fmd_check(exp, mdt_object_fid(mo),
+ mdt_info_req(info)->rq_xid))
+ valid |= OBD_MD_FLATIME | OBD_MD_FLMTIME |
+ OBD_MD_FLCTIME;
la_from_obdo(la, oa, valid);
la->la_size = start;
la->la_valid |= LA_SIZE;
+ /* MDT supports FMD for Data-on-MDT needs */
+ if (la->la_valid & (LA_ATIME | LA_MTIME | LA_CTIME))
+ tgt_fmd_update(tsi->tsi_exp, &tsi->tsi_fid,
+ tgt_ses_req(tsi)->rq_xid);
+
rc = mdt_object_punch(tsi->tsi_env, mdt->mdt_bottom, dob,
start, end, la);
mdt_dom_write_unlock(mo);
}
LPROC_SEQ_FOPS(mdt_enable_dir_migration);
-
-/**
- * Show MDT policy for handling dirty metadata under a lock being cancelled.
- *
- * \param[in] m seq_file handle
- * \param[in] data unused for single entry
- *
- * \retval 0 on success
- * \retval negative value on error
- */
-static int mdt_slc_seq_show(struct seq_file *m, void *data)
-{
- struct obd_device *obd = m->private;
- struct lu_target *tgt = obd->u.obt.obt_lut;
- char *slc_states[] = {"never", "blocking", "always" };
-
- seq_printf(m, "%s\n", slc_states[tgt->lut_sync_lock_cancel]);
- return 0;
-}
-LPROC_SEQ_FOPS_RO(mdt_slc);
-
/**
* Show MDT async commit count.
*
.fops = &mdt_recovery_time_hard_fops },
{ .name = "recovery_time_soft",
.fops = &mdt_recovery_time_soft_fops },
- { .name = "sync_lock_cancel",
- .fops = &mdt_slc_fops },
{ .name = "async_commit_count",
.fops = &mdt_async_commit_count_fops },
{ .name = "sync_count",
return rc;
}
+ rc = tgt_tunables_init(&mdt->mdt_lut);
+ if (rc) {
+ CERROR("%s: failed to init target tunables: rc = %d\n",
+ mdt_obd_name(mdt), rc);
+ return rc;
+ }
+
rc = hsm_cdt_procfs_init(mdt);
if (rc) {
CERROR("%s: cannot create hsm proc entries: rc = %d\n",
lprocfs_free_per_client_stats(obd);
hsm_cdt_procfs_fini(mdt);
+ tgt_tunables_fini(&mdt->mdt_lut);
lprocfs_obd_cleanup(obd);
lprocfs_free_md_stats(obd);
lprocfs_free_obd_stats(obd);
if (ma->ma_valid & MA_LOV)
GOTO(out_put, rc = -EPROTO);
+ /* MDT supports FMD for regular files due to Data-on-MDT */
+ if (S_ISREG(lu_object_attr(&mo->mot_obj)) &&
+ ma->ma_attr.la_valid & (LA_ATIME | LA_MTIME | LA_CTIME))
+ tgt_fmd_update(info->mti_exp, mdt_object_fid(mo),
+ req->rq_xid);
+
rc = mdt_attr_set(info, mo, ma);
if (rc)
GOTO(out_put, rc);
}
LPROC_SEQ_FOPS_RO(lprocfs_exp_replydata);
+int lprocfs_exp_print_fmd_count_seq(struct cfs_hash *hs, struct cfs_hash_bd *bd,
+ struct hlist_node *hnode, void *cb_data)
+
+{
+ struct obd_export *exp = cfs_hash_object(hs, hnode);
+ struct seq_file *m = cb_data;
+ struct tg_export_data *ted = &exp->exp_target_data;
+
+ seq_printf(m, "%d\n", ted->ted_fmd_count);
+
+ return 0;
+}
+
+int lprocfs_exp_fmd_count_seq_show(struct seq_file *m, void *data)
+{
+ struct nid_stat *stats = m->private;
+ struct obd_device *obd = stats->nid_obd;
+
+ cfs_hash_for_each_key(obd->obd_nid_hash, &stats->nid,
+ lprocfs_exp_print_fmd_count_seq, m);
+ return 0;
+}
+LPROC_SEQ_FOPS_RO(lprocfs_exp_fmd_count);
+
int lprocfs_nid_stats_clear_seq_show(struct seq_file *m, void *data)
{
seq_puts(m, "Write into this file to clear all nid stats and stale nid entries\n");
GOTO(destroy_new_ns, rc);
}
+ entry = lprocfs_add_simple(new_stat->nid_proc, "fmd_count", new_stat,
+ &lprocfs_exp_fmd_count_fops);
+ if (IS_ERR(entry)) {
+ rc = PTR_ERR(entry);
+ CWARN("%s: error adding the fmd_count file: rc = %d\n",
+ obd->obd_name, rc);
+ GOTO(destroy_new_ns, rc);
+ }
+
spin_lock(&exp->exp_lock);
exp->exp_nid_stats = new_stat;
spin_unlock(&exp->exp_lock);
MODULES := ofd
ofd-objs := ofd_dev.o ofd_obd.o ofd_fs.o ofd_trans.o ofd_objects.o ofd_io.o
-ofd-objs += lproc_ofd.o ofd_fmd.o ofd_dlm.o ofd_lvb.o
+ofd-objs += lproc_ofd.o ofd_dlm.o ofd_lvb.o
EXTRA_DIST = $(ofd-objs:%.o=%.c) ofd_internal.h
LPROC_SEQ_FOPS_RO(ofd_last_id);
/**
- * Show maximum number of Filter Modification Data (FMD) maintained by OFD.
- *
- * \param[in] m seq_file handle
- * \param[in] data unused for single entry
- *
- * \retval 0 on success
- * \retval negative value on error
- */
-static ssize_t client_cache_count_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct obd_device *obd = container_of(kobj, struct obd_device,
- obd_kset.kobj);
- struct lu_target *lut = obd->u.obt.obt_lut;
-
- return sprintf(buf, "%u\n", lut->lut_fmd_max_num);
-}
-
-/**
- * Change number of FMDs maintained by OFD.
- *
- * This defines how large the list of FMDs can be.
- *
- * \param[in] file proc file
- * \param[in] buffer string which represents maximum number
- * \param[in] count \a buffer length
- * \param[in] off unused for single entry
- *
- * \retval \a count on success
- * \retval negative number on error
- */
-static ssize_t client_cache_count_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer, size_t count)
-{
- struct obd_device *obd = container_of(kobj, struct obd_device,
- obd_kset.kobj);
- struct lu_target *lut = obd->u.obt.obt_lut;
- int val;
- int rc;
-
- rc = kstrtoint(buffer, 0, &val);
- if (rc)
- return rc;
-
- if (val < 1 || val > 65536)
- return -EINVAL;
-
- lut->lut_fmd_max_num = val;
- return count;
-}
-LUSTRE_RW_ATTR(client_cache_count);
-
-/**
- * Show the maximum age of FMD data in seconds.
- *
- * \param[in] m seq_file handle
- * \param[in] data unused for single entry
- *
- * \retval 0 on success
- * \retval negative value on error
- */
-static ssize_t client_cache_seconds_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct obd_device *obd = container_of(kobj, struct obd_device,
- obd_kset.kobj);
- struct lu_target *lut = obd->u.obt.obt_lut;
-
- return sprintf(buf, "%lld\n", lut->lut_fmd_max_age);
-}
-
-/**
- * Set the maximum age of FMD data in seconds.
- *
- * This defines how long FMD data stays in the FMD list.
- *
- * \param[in] file proc file
- * \param[in] buffer string which represents maximum number
- * \param[in] count \a buffer length
- * \param[in] off unused for single entry
- *
- * \retval \a count on success
- * \retval negative number on error
- */
-static ssize_t client_cache_seconds_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer, size_t count)
-{
- struct obd_device *obd = container_of(kobj, struct obd_device,
- obd_kset.kobj);
- struct lu_target *lut = obd->u.obt.obt_lut;
- time64_t val;
- int rc;
-
- rc = kstrtoll(buffer, 0, &val);
- if (rc)
- return rc;
-
- if (val < 1 || val > 65536) /* ~ 18 hour max */
- return -EINVAL;
-
- lut->lut_fmd_max_age = val;
- return count;
-}
-LUSTRE_RW_ATTR(client_cache_seconds);
-
-/**
* Show if the OFD is in degraded mode.
*
* Degraded means OFD has a failed drive or is undergoing RAID rebuild.
}
LUSTRE_RW_ATTR(sync_journal);
-/* This must be longer than the longest string below */
-#define SYNC_STATES_MAXLEN 16
-
static int ofd_brw_size_seq_show(struct seq_file *m, void *data)
{
struct obd_device *obd = m->private;
LPROC_SEQ_FOPS(ofd_brw_size);
-static char *sync_on_cancel_states[] = {"never",
- "blocking",
- "always" };
-
-/**
- * Show OFD policy for handling dirty data under a lock being cancelled.
- *
- * \param[in] m seq_file handle
- * \param[in] data unused for single entry
- *
- * \retval 0 on success
- * \retval negative value on error
- */
-static ssize_t sync_lock_cancel_show(struct kobject *kobj,
- struct attribute *attr, char *buf)
-{
- struct obd_device *obd = container_of(kobj, struct obd_device,
- obd_kset.kobj);
- struct lu_target *tgt = obd->u.obt.obt_lut;
-
- return sprintf(buf, "%s\n",
- sync_on_cancel_states[tgt->lut_sync_lock_cancel]);
-}
-
-/**
- * Change OFD policy for handling dirty data under a lock being cancelled.
- *
- * This variable defines what action OFD takes upon lock cancel
- * There are three possible modes:
- * 1) never - never do sync upon lock cancel. This can lead to data
- * inconsistencies if both the OST and client crash while writing a file
- * that is also concurrently being read by another client. In these cases,
- * this may allow the file data to "rewind" to an earlier state.
- * 2) blocking - do sync only if there is blocking lock, e.g. if another
- * client is trying to access this same object
- * 3) always - do sync always
- *
- * \param[in] file proc file
- * \param[in] buffer string which represents policy
- * \param[in] count \a buffer length
- * \param[in] off unused for single entry
- *
- * \retval \a count on success
- * \retval negative number on error
- */
-static ssize_t sync_lock_cancel_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer, size_t count)
-{
- struct obd_device *obd = container_of(kobj, struct obd_device,
- obd_kset.kobj);
- struct lu_target *tgt = obd->u.obt.obt_lut;
- int val = -1;
- int i;
-
- if (count == 0 || count >= SYNC_STATES_MAXLEN)
- return -EINVAL;
-
- for (i = 0 ; i < NUM_SYNC_ON_CANCEL_STATES; i++) {
- if (strcmp(buffer, sync_on_cancel_states[i]) == 0) {
- val = i;
- break;
- }
- }
-
- /* Legacy numeric codes */
- if (val == -1) {
- int rc = kstrtoint(buffer, 0, &val);
- if (rc)
- return rc;
- }
-
- if (val < 0 || val > 2)
- return -EINVAL;
-
- spin_lock(&tgt->lut_flags_lock);
- tgt->lut_sync_lock_cancel = val;
- spin_unlock(&tgt->lut_flags_lock);
- return count;
-}
-LUSTRE_RW_ATTR(sync_lock_cancel);
-
/**
* Show the limit of soft sync RPCs.
*
&lustre_attr_seqs_allocated.attr,
&lustre_attr_grant_precreate.attr,
&lustre_attr_precreate_batch.attr,
- &lustre_attr_client_cache_count.attr,
- &lustre_attr_client_cache_seconds.attr,
&lustre_attr_degraded.attr,
&lustre_attr_fstype.attr,
&lustre_attr_sync_journal.attr,
- &lustre_attr_sync_lock_cancel.attr,
&lustre_attr_soft_sync_limit.attr,
&lustre_attr_lfsck_speed_limit.attr,
&lustre_attr_checksum_t10pi_enforce.attr,
RETURN(rc);
}
+ rc = tgt_tunables_init(&ofd->ofd_lut);
+ if (rc) {
+ CERROR("%s: tgt_tunables_init failed: rc = %d\n",
+ obd->obd_name, rc);
+ GOTO(obd_cleanup, rc);
+ }
+
rc = lprocfs_alloc_obd_stats(obd, LPROC_OFD_STATS_LAST);
if (rc) {
CERROR("%s: lprocfs_alloc_obd_stats failed: %d.\n",
obd->obd_name, rc);
- GOTO(obd_cleanup, rc);
+ GOTO(tgt_cleanup, rc);
}
entry = lprocfs_register("exports", obd->obd_proc_entry, NULL, NULL);
obd_free_stats:
lprocfs_free_obd_stats(obd);
+tgt_cleanup:
+ tgt_tunables_fini(&ofd->ofd_lut);
obd_cleanup:
lprocfs_obd_cleanup(obd);
/* Slab for OFD object allocation */
static struct kmem_cache *ofd_object_kmem;
-struct kmem_cache *tgt_fmd_kmem;
-
static struct lu_kmem_descr ofd_caches[] = {
{
.ckd_cache = &ofd_object_kmem,
.ckd_size = sizeof(struct ofd_object)
},
{
- .ckd_cache = &tgt_fmd_kmem,
- .ckd_name = "ll_fmd_cache",
- .ckd_size = sizeof(struct tgt_fmd_data)
- },
- {
.ckd_cache = NULL
}
};
{
struct obd_device *obd = ofd_obd(ofd);
+ tgt_tunables_fini(&ofd->ofd_lut);
lprocfs_free_per_client_stats(obd);
lprocfs_obd_cleanup(obd);
lprocfs_free_obd_stats(obd);
/* set this lu_device to obd, because error handling need it */
obd->obd_lu_dev = &m->ofd_dt_dev.dd_lu_dev;
- rc = ofd_tunables_init(m);
- if (rc) {
- CERROR("Can't init ofd lprocfs, rc %d\n", rc);
- RETURN(rc);
- }
-
/* No connection accepted until configurations will finish */
spin_lock(&obd->obd_dev_lock);
obd->obd_no_conn = 1;
info = ofd_info_init(env, NULL);
if (info == NULL)
- GOTO(err_fini_proc, rc = -EFAULT);
+ RETURN(-EFAULT);
rc = ofd_stack_init(env, m, cfg);
if (rc) {
- CERROR("Can't init device stack, rc %d\n", rc);
- GOTO(err_fini_proc, rc);
+ CERROR("%s: can't init device stack, rc %d\n",
+ obd->obd_name, rc);
+ RETURN(rc);
}
#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 14, 53, 0)
if (rc)
GOTO(err_free_ns, rc);
+ rc = ofd_tunables_init(m);
+ if (rc)
+ GOTO(err_fini_lut, rc);
+
tgd->tgd_reserved_pcnt = 0;
m->ofd_brw_size = m->ofd_lut.lut_dt_conf.ddp_brw_size;
rc = ofd_fs_setup(env, m, obd);
if (rc)
- GOTO(err_fini_lut, rc);
+ GOTO(err_fini_proc, rc);
fid.f_seq = FID_SEQ_LOCAL_NAME;
fid.f_oid = 1;
m->ofd_los = NULL;
err_fini_fs:
ofd_fs_cleanup(env, m);
+err_fini_proc:
+ ofd_procfs_fini(m);
err_fini_lut:
tgt_fini(env, &m->ofd_lut);
err_free_ns:
obd->obd_namespace = m->ofd_namespace = NULL;
err_fini_stack:
ofd_stack_fini(env, m, &m->ofd_osd->dd_lu_dev);
-err_fini_proc:
- ofd_procfs_fini(m);
return rc;
}
obd_exports_barrier(obd);
obd_zombie_barrier();
+ ofd_procfs_fini(m);
tgt_fini(env, &m->ofd_lut);
ofd_stop_inconsistency_verification_thread(m);
lfsck_degister(env, m->ofd_osd);
}
ofd_stack_fini(env, m, &m->ofd_dt_dev.dd_lu_dev);
- ofd_procfs_fini(m);
+
LASSERT(atomic_read(&d->ld_ref) == 0);
server_put_mount(obd->obd_name, true);
EXIT;
#define OFD_VALID_FLAGS (LA_TYPE | LA_MODE | LA_SIZE | LA_BLOCKS | \
LA_BLKSIZE | LA_ATIME | LA_MTIME | LA_CTIME)
-/* FMD tracking data */
-struct tgt_fmd_data {
- struct list_head fmd_list; /* linked to tgt_fmd_list */
- struct lu_fid fmd_fid; /* FID being written to */
- __u64 fmd_mactime_xid; /* xid highest {m,a,c}time setattr */
- time64_t fmd_expire; /* time when the fmd should expire */
- int fmd_refcount; /* reference counter - list holds 1 */
-};
-
#define OFD_SOFT_SYNC_LIMIT_DEFAULT 16
/* request stats */
return fo;
}
-/* ofd_fmd.c */
-extern struct kmem_cache *tgt_fmd_kmem;
-void ofd_fmd_expire(struct obd_export *exp);
-void ofd_fmd_cleanup(struct obd_export *exp);
-#ifdef DO_FMD_DROP
-void ofd_fmd_drop(struct obd_export *exp, const struct lu_fid *fid);
-#else
-#define ofd_fmd_drop(exp, fid) do {} while (0)
-#endif
-
/* ofd_dev.c */
int ofd_fid_set_index(const struct lu_env *env, struct ofd_device *ofd,
int index);
{
int rc;
- spin_lock_init(&exp->exp_target_data.ted_fmd_lock);
- INIT_LIST_HEAD(&exp->exp_target_data.ted_fmd_list);
atomic_set(&exp->exp_filter_data.fed_soft_sync_count, 0);
spin_lock(&exp->exp_lock);
exp->exp_connecting = 1;
ldlm_destroy_export(exp);
tgt_client_free(exp);
- ofd_fmd_cleanup(exp);
-
/*
* discard grants once we're sure no more
* interaction with the client is possible
}
/**
- * Implementation of obd_ops::o_ping.
- *
- * This is OFD-specific part of OBD_PING request handling.
- * It controls Filter Modification Data (FMD) expiration each time PING is
- * received.
- *
- * \see ofd_fmd_expire() and ofd_fmd.c for details
- *
- * \param[in] env execution environment
- * \param[in] exp OBD export of client
- *
- * \retval 0
- */
-static int ofd_ping(const struct lu_env *env, struct obd_export *exp)
-{
- ofd_fmd_expire(exp);
- return 0;
-}
-
-/**
* Implementation of obd_ops::o_health_check.
*
* This function checks the OFD device health - ability to respond on
.o_getattr = ofd_echo_getattr,
.o_iocontrol = ofd_iocontrol,
.o_precleanup = ofd_precleanup,
- .o_ping = ofd_ping,
.o_health_check = ofd_health_check,
.o_set_info_async = ofd_set_info_async,
.o_get_info = ofd_get_info,
if (rc)
GOTO(stop, rc);
- ofd_fmd_drop(ofd_info(env)->fti_exp, &fo->ofo_header.loh_fid);
+ tgt_fmd_drop(ofd_info(env)->fti_exp, &fo->ofo_header.loh_fid);
dt_ref_del(env, ofd_object_child(fo), th);
dt_destroy(env, ofd_object_child(fo), th);
target_objs += $(TARGET)tgt_handler.o $(TARGET)out_handler.o
target_objs += $(TARGET)out_lib.o $(TARGET)update_trans.o
target_objs += $(TARGET)update_records.o $(TARGET)update_recovery.o
-target_objs += $(TARGET)tgt_grant.o
+target_objs += $(TARGET)tgt_grant.o $(TARGET)tgt_fmd.o
ptlrpc_objs := client.o recover.o connection.o niobuf.o pack_generic.o
ptlrpc_objs += events.o ptlrpc_module.o service.o pinger.o
MOSTLYCLEANFILES := @MOSTLYCLEANFILES@
EXTRA_DIST = tgt_main.c tgt_lastrcvd.c tgt_handler.c tgt_internal.h \
- tgt_grant.c out_handler.c out_lib.c barrier.c
+ tgt_grant.c out_handler.c out_lib.c barrier.c tgt_fmd.c
EXTRA_DIST += update_trans.c
EXTRA_DIST += update_records.c
EXTRA_DIST += update_recovery.c
* Use is subject to license terms.
*
* Copyright (c) 2012, 2014, Intel Corporation.
+ *
+ * Copyright (c) 2019, DDN Storage Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
*
- * lustre/ofd/ofd_fmd.c
+ * lustre/target/tgt_fmd.c
*
* This file provides functions to handle Filter Modification Data (FMD).
* The FMD is responsible for file attributes to be applied in
* FMD can expire if there are no updates for a long time to keep the list
* reasonably small.
*
- * Author: Andreas Dilger <andreas.dilger@intel.com>
+ * Author: Andreas Dilger <adilger@whamcloud.com>
+ * Author: Mike Pershin <mpershin@whamcloud.com>
*/
-#define DEBUG_SUBSYSTEM S_FILTER
+#define DEBUG_SUBSYSTEM S_CLASS
+
+#include <obd.h>
+#include <obd_class.h>
-#include "ofd_internal.h"
+#include "tgt_internal.h"
/**
* Drop FMD reference and free it if reference drops to zero.
* \param[in] exp OBD export
* \param[in] fmd FMD to put
*/
-static inline void ofd_fmd_put_nolock(struct obd_export *exp,
+static inline void tgt_fmd_put_nolock(struct obd_export *exp,
struct tgt_fmd_data *fmd)
{
struct tg_export_data *ted = &exp->exp_target_data;
* \param[in] exp OBD export
* \param[in] fmd FMD to put
*/
-void ofd_fmd_put(struct obd_export *exp, struct tgt_fmd_data *fmd)
+void tgt_fmd_put(struct obd_export *exp, struct tgt_fmd_data *fmd)
{
struct tg_export_data *ted = &exp->exp_target_data;
spin_lock(&ted->ted_fmd_lock);
- ofd_fmd_put_nolock(exp, fmd); /* caller reference */
+ tgt_fmd_put_nolock(exp, fmd); /* caller reference */
spin_unlock(&ted->ted_fmd_lock);
}
* \param[in] exp OBD export
* \param[in] keep FMD to keep always
*/
-static void ofd_fmd_expire_nolock(struct obd_export *exp,
+static void tgt_fmd_expire_nolock(struct obd_export *exp,
struct tgt_fmd_data *keep)
{
struct tg_export_data *ted = &exp->exp_target_data;
break;
list_del_init(&fmd->fmd_list);
- ofd_fmd_put_nolock(exp, fmd); /* list reference */
+ tgt_fmd_put_nolock(exp, fmd); /* list reference */
}
}
*
* \param[in] exp OBD export
*/
-void ofd_fmd_expire(struct obd_export *exp)
+void tgt_fmd_expire(struct obd_export *exp)
{
struct tg_export_data *ted = &exp->exp_target_data;
spin_lock(&ted->ted_fmd_lock);
- ofd_fmd_expire_nolock(exp, NULL);
+ tgt_fmd_expire_nolock(exp, NULL);
spin_unlock(&ted->ted_fmd_lock);
}
* \retval struct tgt_fmd_data found by FID
* \retval NULL is FMD is not found
*/
-static struct tgt_fmd_data *ofd_fmd_find_nolock(struct obd_export *exp,
+static struct tgt_fmd_data *tgt_fmd_find_nolock(struct obd_export *exp,
const struct lu_fid *fid)
{
struct tg_export_data *ted = &exp->exp_target_data;
struct tgt_fmd_data *found = NULL, *fmd;
struct lu_target *lut = exp->exp_obd->u.obt.obt_lut;
+ time64_t now = ktime_get_seconds();
assert_spin_locked(&ted->ted_fmd_lock);
if (lu_fid_eq(&fmd->fmd_fid, fid)) {
found = fmd;
list_move_tail(&fmd->fmd_list, &ted->ted_fmd_list);
- fmd->fmd_expire = ktime_get_seconds() +
- lut->lut_fmd_max_age;
+ fmd->fmd_expire = now + lut->lut_fmd_max_age;
break;
}
}
- ofd_fmd_expire_nolock(exp, found);
+ tgt_fmd_expire_nolock(exp, found);
return found;
}
* \retval struct tgt_fmd_data found by FID
* \retval NULL indicates FMD is not found
*/
-struct tgt_fmd_data *ofd_fmd_find(struct obd_export *exp,
+struct tgt_fmd_data *tgt_fmd_find(struct obd_export *exp,
const struct lu_fid *fid)
{
struct tg_export_data *ted = &exp->exp_target_data;
struct tgt_fmd_data *fmd;
spin_lock(&ted->ted_fmd_lock);
- fmd = ofd_fmd_find_nolock(exp, fid);
+ fmd = tgt_fmd_find_nolock(exp, fid);
if (fmd)
fmd->fmd_refcount++; /* caller reference */
spin_unlock(&ted->ted_fmd_lock);
* \retval struct tgt_fmd_data found by FID
* \retval NULL indicates FMD is not found
*/
-struct tgt_fmd_data *ofd_fmd_get(struct obd_export *exp,
+struct tgt_fmd_data *tgt_fmd_get(struct obd_export *exp,
const struct lu_fid *fid)
{
struct tg_export_data *ted = &exp->exp_target_data;
OBD_SLAB_ALLOC_PTR(fmd_new, tgt_fmd_kmem);
spin_lock(&ted->ted_fmd_lock);
- found = ofd_fmd_find_nolock(exp, fid);
+ found = tgt_fmd_find_nolock(exp, fid);
if (fmd_new) {
- if (found == NULL) {
+ if (!found) {
list_add_tail(&fmd_new->fmd_list, &ted->ted_fmd_list);
fmd_new->fmd_fid = *fid;
fmd_new->fmd_refcount++; /* list reference */
* \param[in] exp OBD export
* \param[in] fid FID of FMD to drop
*/
-void ofd_fmd_drop(struct obd_export *exp, const struct lu_fid *fid)
+void tgt_fmd_drop(struct obd_export *exp, const struct lu_fid *fid)
{
struct tg_export_data *ted = &exp->exp_target_data;
- struct tgt_fmd_data *found = NULL;
+ struct tgt_fmd_data *fmd = NULL;
spin_lock(&ted->ted_fmd_lock);
- found = ofd_fmd_find_nolock(exp, fid);
- if (found) {
- list_del_init(&found->fmd_list);
- ofd_fmd_put_nolock(exp, found);
+ fmd = tgt_fmd_find_nolock(exp, fid);
+ if (fmd) {
+ list_del_init(&fmd->fmd_list);
+ tgt_fmd_put_nolock(exp, fmd);
}
spin_unlock(&ted->ted_fmd_lock);
}
+EXPORT_SYMBOL(tgt_fmd_drop);
#endif
/**
*
* \param[in] exp OBD export
*/
-void ofd_fmd_cleanup(struct obd_export *exp)
+void tgt_fmd_cleanup(struct obd_export *exp)
{
struct tg_export_data *ted = &exp->exp_target_data;
struct tgt_fmd_data *fmd = NULL, *tmp;
list_del_init(&fmd->fmd_list);
if (fmd->fmd_refcount > 1) {
CDEBUG(D_INFO,
- "fmd %p is still referenced (refcount = %d)\n",
+ "fmd %p still referenced (refcount = %d)\n",
fmd, fmd->fmd_refcount);
}
- ofd_fmd_put_nolock(exp, fmd);
+ tgt_fmd_put_nolock(exp, fmd);
}
spin_unlock(&ted->ted_fmd_lock);
+ LASSERT(list_empty(&exp->exp_target_data.ted_fmd_list));
}
/**
{
struct tgt_fmd_data *fmd;
- fmd = ofd_fmd_get(exp, fid);
+ fmd = tgt_fmd_get(exp, fid);
if (fmd) {
if (fmd->fmd_mactime_xid < xid)
fmd->fmd_mactime_xid = xid;
- ofd_fmd_put(exp, fmd);
+ tgt_fmd_put(exp, fmd);
}
}
+EXPORT_SYMBOL(tgt_fmd_update);
/**
* Chech that time can be updated by the request with given XID.
struct tgt_fmd_data *fmd;
bool can_update = true;
- fmd = ofd_fmd_find(exp, fid);
+ fmd = tgt_fmd_find(exp, fid);
if (fmd) {
can_update = fmd->fmd_mactime_xid < xid;
- ofd_fmd_put(exp, fmd);
+ tgt_fmd_put(exp, fmd);
}
return can_update;
}
+EXPORT_SYMBOL(tgt_fmd_check);
* Author: Johann Lombardi <johann.lombardi@intel.com>
*/
-#define DEBUG_SUBSYSTEM S_FILTER
+#define DEBUG_SUBSYSTEM S_CLASS
#include <obd.h>
#include <obd_class.h>
ENTRY;
- rc = target_handle_ping(tgt_ses_req(tsi));
+ /* The target-specific part of OBD_PING request handling.
+ * It controls Filter Modification Data (FMD) expiration each time
+ * PING is received.
+ *
+ * Valid only for replayable targets, e.g. MDT and OFD
+ */
+ if (tsi->tsi_exp->exp_obd->obd_replayable)
+ tgt_fmd_expire(tsi->tsi_exp);
+
+ rc = req_capsule_server_pack(tsi->tsi_pill);
if (rc)
RETURN(err_serious(rc));
void barrier_init(void);
void barrier_fini(void);
+/* FMD tracking data */
+struct tgt_fmd_data {
+ struct list_head fmd_list; /* linked to tgt_fmd_list */
+ struct lu_fid fmd_fid; /* FID being written to */
+ __u64 fmd_mactime_xid; /* xid highest {m,a,c}time setattr */
+ time64_t fmd_expire; /* time when the fmd should expire */
+ int fmd_refcount; /* reference counter - list holds 1 */
+};
+
+/* tgt_fmd.c */
+extern struct kmem_cache *tgt_fmd_kmem;
+void tgt_fmd_expire(struct obd_export *exp);
+void tgt_fmd_cleanup(struct obd_export *exp);
+
#endif /* _TG_INTERNAL_H */
spin_lock_init(&exp->exp_target_data.ted_nodemap_lock);
INIT_LIST_HEAD(&exp->exp_target_data.ted_nodemap_member);
+ spin_lock_init(&exp->exp_target_data.ted_fmd_lock);
+ INIT_LIST_HEAD(&exp->exp_target_data.ted_fmd_list);
OBD_ALLOC_PTR(exp->exp_target_data.ted_lcd);
if (exp->exp_target_data.ted_lcd == NULL)
LASSERT(exp != exp->exp_obd->obd_self_export);
+ tgt_fmd_cleanup(exp);
+
/* free reply data */
mutex_lock(&ted->ted_lcd_lock);
list_for_each_entry_safe(trd, tmp, &ted->ted_reply_list, trd_list) {
#include "tgt_internal.h"
#include "../ptlrpc/ptlrpc_internal.h"
+/* This must be longer than the longest string below */
+#define SYNC_STATES_MAXLEN 16
+static char *sync_on_cancel_states[] = {"never",
+ "blocking",
+ "always" };
+
+/**
+ * Show policy for handling dirty data under a lock being cancelled.
+ *
+ * \param[in] kobj sysfs kobject
+ * \param[in] attr sysfs attribute
+ * \param[in] buf buffer for data
+ *
+ * \retval 0 and buffer filled with data on success
+ * \retval negative value on error
+ */
+static ssize_t sync_lock_cancel_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct obd_device *obd = container_of(kobj, struct obd_device,
+ obd_kset.kobj);
+ struct lu_target *tgt = obd->u.obt.obt_lut;
+
+ return sprintf(buf, "%s\n",
+ sync_on_cancel_states[tgt->lut_sync_lock_cancel]);
+}
+
+/**
+ * Change policy for handling dirty data under a lock being cancelled.
+ *
+ * This variable defines what action target takes upon lock cancel
+ * There are three possible modes:
+ * 1) never - never do sync upon lock cancel. This can lead to data
+ * inconsistencies if both the OST and client crash while writing a file
+ * that is also concurrently being read by another client. In these cases,
+ * this may allow the file data to "rewind" to an earlier state.
+ * 2) blocking - do sync only if there is blocking lock, e.g. if another
+ * client is trying to access this same object
+ * 3) always - do sync always
+ *
+ * \param[in] kobj kobject
+ * \param[in] attr attribute to show
+ * \param[in] buf buffer for data
+ * \param[in] count buffer size
+ *
+ * \retval \a count on success
+ * \retval negative value on error
+ */
+static ssize_t sync_lock_cancel_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buffer, size_t count)
+{
+ struct obd_device *obd = container_of(kobj, struct obd_device,
+ obd_kset.kobj);
+ struct lu_target *tgt = obd->u.obt.obt_lut;
+ int val = -1;
+ int i;
+
+ if (count == 0 || count >= SYNC_STATES_MAXLEN)
+ return -EINVAL;
+
+ for (i = 0 ; i < NUM_SYNC_ON_CANCEL_STATES; i++) {
+ if (strcmp(buffer, sync_on_cancel_states[i]) == 0) {
+ val = i;
+ break;
+ }
+ }
+
+ /* Legacy numeric codes */
+ if (val == -1) {
+ int rc = kstrtoint(buffer, 0, &val);
+ if (rc)
+ return rc;
+ }
+
+ if (val < 0 || val > 2)
+ return -EINVAL;
+
+ spin_lock(&tgt->lut_flags_lock);
+ tgt->lut_sync_lock_cancel = val;
+ spin_unlock(&tgt->lut_flags_lock);
+ return count;
+}
+LUSTRE_RW_ATTR(sync_lock_cancel);
+
+/**
+ * Show maximum number of Filter Modification Data (FMD) maintained.
+ *
+ * \param[in] kobj kobject
+ * \param[in] attr attribute to show
+ * \param[in] buf buffer for data
+ *
+ * \retval 0 and buffer filled with data on success
+ * \retval negative value on error
+ */
+ssize_t tgt_fmd_count_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ struct obd_device *obd = container_of(kobj, struct obd_device,
+ obd_kset.kobj);
+ struct lu_target *lut = obd->u.obt.obt_lut;
+
+ return sprintf(buf, "%u\n", lut->lut_fmd_max_num);
+}
+
+/**
+ * Change number of FMDs maintained by target.
+ *
+ * This defines how large the list of FMDs can be.
+ *
+ * \param[in] kobj kobject
+ * \param[in] attr attribute to show
+ * \param[in] buf buffer for data
+ * \param[in] count buffer size
+ *
+ * \retval \a count on success
+ * \retval negative value on error
+ */
+ssize_t tgt_fmd_count_store(struct kobject *kobj, struct attribute *attr,
+ const char *buffer, size_t count)
+{
+ struct obd_device *obd = container_of(kobj, struct obd_device,
+ obd_kset.kobj);
+ struct lu_target *lut = obd->u.obt.obt_lut;
+ int val, rc;
+
+ rc = kstrtoint(buffer, 0, &val);
+ if (rc)
+ return rc;
+
+ if (val < 1 || val > 65536)
+ return -EINVAL;
+
+ lut->lut_fmd_max_num = val;
+
+ return count;
+}
+LUSTRE_RW_ATTR(tgt_fmd_count);
+
+/**
+ * Show the maximum age of FMD data in seconds.
+ *
+ * \param[in] kobj kobject
+ * \param[in] attr attribute to show
+ * \param[in] buf buffer for data
+ *
+ * \retval 0 and buffer filled with data on success
+ * \retval negative value on error
+ */
+ssize_t tgt_fmd_seconds_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ struct obd_device *obd = container_of(kobj, struct obd_device,
+ obd_kset.kobj);
+ struct lu_target *lut = obd->u.obt.obt_lut;
+
+ return sprintf(buf, "%lld\n", lut->lut_fmd_max_age);
+}
+
+/**
+ * Set the maximum age of FMD data in seconds.
+ *
+ * This defines how long FMD data stays in the FMD list.
+ *
+ * \param[in] kobj kobject
+ * \param[in] attr attribute to show
+ * \param[in] buf buffer for data
+ * \param[in] count buffer size
+ *
+ * \retval \a count on success
+ * \retval negative number on error
+ */
+ssize_t tgt_fmd_seconds_store(struct kobject *kobj, struct attribute *attr,
+ const char *buffer, size_t count)
+{
+ struct obd_device *obd = container_of(kobj, struct obd_device,
+ obd_kset.kobj);
+ struct lu_target *lut = obd->u.obt.obt_lut;
+ time64_t val;
+ int rc;
+
+ rc = kstrtoll(buffer, 0, &val);
+ if (rc)
+ return rc;
+
+ if (val < 1 || val > 65536) /* ~ 18 hour max */
+ return -EINVAL;
+
+ lut->lut_fmd_max_age = val;
+
+ return count;
+}
+LUSTRE_RW_ATTR(tgt_fmd_seconds);
+
+/* These two aliases are old names and kept for compatibility, they were
+ * changed to 'tgt_fmd_count' and 'tgt_fmd_seconds'.
+ * This change was made in Lustre 2.13, so these aliases can be removed
+ * when back compatibility is not needed with any Lustre version prior 2.13
+ */
+static struct lustre_attr tgt_fmd_count_compat = __ATTR(client_cache_count,
+ 0644, tgt_fmd_count_show, tgt_fmd_count_store);
+static struct lustre_attr tgt_fmd_seconds_compat = __ATTR(client_cache_seconds,
+ 0644, tgt_fmd_seconds_show, tgt_fmd_seconds_store);
+
+static const struct attribute *tgt_attrs[] = {
+ &lustre_attr_sync_lock_cancel.attr,
+ &lustre_attr_tgt_fmd_count.attr,
+ &lustre_attr_tgt_fmd_seconds.attr,
+ &tgt_fmd_count_compat.attr,
+ &tgt_fmd_seconds_compat.attr,
+ NULL,
+};
+
+int tgt_tunables_init(struct lu_target *lut)
+{
+ int rc;
+
+ rc = sysfs_create_files(&lut->lut_obd->obd_kset.kobj, tgt_attrs);
+ if (!rc)
+ lut->lut_attrs = tgt_attrs;
+ return rc;
+}
+EXPORT_SYMBOL(tgt_tunables_init);
+
+void tgt_tunables_fini(struct lu_target *lut)
+{
+ if (lut->lut_attrs) {
+ sysfs_remove_files(&lut->lut_obd->obd_kset.kobj,
+ lut->lut_attrs);
+ lut->lut_attrs = NULL;
+ }
+}
+EXPORT_SYMBOL(tgt_tunables_fini);
+
/*
* Save cross-MDT lock in lut_slc_locks.
*
static struct kmem_cache *tgt_thread_kmem;
static struct kmem_cache *tgt_session_kmem;
+struct kmem_cache *tgt_fmd_kmem;
+
static struct lu_kmem_descr tgt_caches[] = {
{
.ckd_cache = &tgt_thread_kmem,
.ckd_size = sizeof(struct tgt_session_info)
},
{
+ .ckd_cache = &tgt_fmd_kmem,
+ .ckd_name = "tgt_fmd_cache",
+ .ckd_size = sizeof(struct tgt_fmd_data)
+ },
+ {
.ckd_cache = NULL
}
};
sleep 1
touch --date="$DATESTR" $DIR/$tdir/$tfile # setattr timestamp in past
LS_BEFORE="`ls -l $DIR/$tdir/$tfile`" # old timestamp from client cache
- cancel_lru_locks osc
+ cancel_lru_locks $OSC
LS_AFTER="`ls -l $DIR/$tdir/$tfile`" # timestamp from OST object
date; date +%s
[ "$LS_BEFORE" != "$LS_AFTER" ] && \
test_36g() {
remote_ost_nodsh && skip "remote OST with nodsh"
[ $PARALLEL == "yes" ] && skip "skip parallel run"
+ [ $MDS1_VERSION -lt $(version_code 2.12.51) ] &&
+ skip "Need MDS version at least 2.12.51"
local fmd_max_age
- local fmd_before
- local fmd_after
+ local fmd
+ local facet="ost1"
+ local tgt="obdfilter"
+
+ [[ $OSC == "mdc" ]] && tgt="mdt" && facet="mds1"
test_mkdir $DIR/$tdir
- fmd_max_age=$(do_facet ost1 \
- "lctl get_param -n obdfilter.*.client_cache_seconds 2> /dev/null | \
+ fmd_max_age=$(do_facet $facet \
+ "lctl get_param -n $tgt.*.tgt_fmd_seconds 2> /dev/null | \
head -n 1")
- fmd_before=$(do_facet ost1 \
- "awk '/ll_fmd_cache/ {print \\\$2}' /proc/slabinfo")
+ echo "FMD max age: ${fmd_max_age}s"
touch $DIR/$tdir/$tfile
+ fmd=$(do_facet $facet "lctl get_param -n $tgt.*.exports.*.fmd_count" |
+ gawk '{cnt=cnt+$1} END{print cnt}')
+ echo "FMD before: $fmd"
+ [[ $fmd == 0 ]] &&
+ error "FMD wasn't create by touch"
sleep $((fmd_max_age + 12))
- fmd_after=$(do_facet ost1 \
- "awk '/ll_fmd_cache/ {print \\\$2}' /proc/slabinfo")
-
- echo "fmd_before: $fmd_before"
- echo "fmd_after: $fmd_after"
- [[ $fmd_after -gt $fmd_before ]] &&
- echo "AFTER: $fmd_after > BEFORE: $fmd_before" &&
- error "fmd didn't expire after ping" || true
+ fmd=$(do_facet $facet "lctl get_param -n $tgt.*.exports.*.fmd_count" |
+ gawk '{cnt=cnt+$1} END{print cnt}')
+ echo "FMD after: $fmd"
+ [[ $fmd == 0 ]] ||
+ error "FMD wasn't expired by ping"
}
-run_test 36g "filter mod data cache expiry ====================="
+run_test 36g "FMD cache expiry ====================="
test_36h() {
[ $PARALLEL == "yes" ] && skip "skip parallel run"
[ $unlink_new2 -eq $unlink_new ] || error "unlink file reverses mtime"
[ $rename_new2 -eq $rename_new ] || error "rename file reverses mtime"
- cancel_lru_locks osc
+ cancel_lru_locks $OSC
if [ $i = 0 ] ; then echo "repeat after cancel_lru_locks"; fi
done
}
[ "$mtime2" = "$mtime3" ] || \
error "mtime ($mtime2) changed (to $mtime3) on rename"
- cancel_lru_locks osc
+ cancel_lru_locks $OSC
if [ $i = 0 ] ; then echo "repeat after cancel_lru_locks"; fi
done
}
[ $mtime = $TEST_39_MTIME ] || \
error "mtime($mtime) is not set to $TEST_39_MTIME"
- cancel_lru_locks osc
+ cancel_lru_locks $OSC
if [ $i = 0 ] ; then echo "repeat after cancel_lru_locks"; fi
done
}
[ $mtime2 = $TEST_39_MTIME ] || \
error "mtime($mtime2) is not set to $TEST_39_MTIME"
- cancel_lru_locks osc
+ cancel_lru_locks $OSC
if [ $i = 0 ] ; then echo "repeat after cancel_lru_locks"; fi
done
}
[ $mtime2 = $TEST_39_MTIME ] || \
error "mtime($mtime2) is not set to $TEST_39_MTIME"
- cancel_lru_locks osc
+ cancel_lru_locks $OSC
if [ $i = 0 ] ; then echo "repeat after cancel_lru_locks"; fi
done
}
[ "$mtime1" = "$mtime2" ] || \
error "lost mtime: $mtime2, should be $mtime1"
- cancel_lru_locks osc
+ cancel_lru_locks $OSC
if [ $i = 0 ] ; then echo "repeat after cancel_lru_locks"; fi
done
}
[ "$mtime2" = $TEST_39_MTIME ] || \
error "lost mtime: $mtime2, should be $TEST_39_MTIME"
- cancel_lru_locks osc
+ cancel_lru_locks $OSC
if [ $i = 0 ] ; then echo "repeat after cancel_lru_locks"; fi
done
fi
[ "$mtime1" = "$mtime2" ] || \
error "lost mtime: $mtime2, should be $mtime1"
- cancel_lru_locks osc
+ cancel_lru_locks $OSC
if [ $i = 0 ] ; then echo "repeat after cancel_lru_locks"; fi
done
}
error "mtime is lost on close: $mtime2, " \
"should be $mtime1"
- cancel_lru_locks osc
+ cancel_lru_locks $OSC
if [ $i = 0 ] ; then echo "repeat after cancel_lru_locks"; fi
done
lctl set_param fail_loc=0
[ "$timestamps" = "$far_past_atime $far_past_mtime" ] || \
error "atime or mtime set incorrectly"
- cancel_lru_locks osc
+ cancel_lru_locks $OSC
if [ $i = 0 ] ; then echo "repeat after cancel_lru_locks"; fi
done
}