#define DEBUG_SUBSYSTEM S_CLASS
#include <obd.h>
+#include <obd_cksum.h>
#include "tgt_internal.h"
#include "../ptlrpc/ptlrpc_internal.h"
+/* This must be longer than the longest string below */
+#define SYNC_STATES_MAXLEN 16
+static const char * const sync_lock_cancel_states[] = {
+ [SYNC_LOCK_CANCEL_NEVER] = "never",
+ [SYNC_LOCK_CANCEL_BLOCKING] = "blocking",
+ [SYNC_LOCK_CANCEL_ALWAYS] = "always",
+};
+
+/**
+ * Show policy for handling dirty data under a lock being cancelled.
+ *
+ * \param[in] kobj sysfs kobject
+ * \param[in] attr sysfs attribute
+ * \param[in] buf buffer for data
+ *
+ * \retval 0 and buffer filled with data on success
+ * \retval negative value on error
+ */
+ssize_t sync_lock_cancel_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct obd_device *obd = container_of(kobj, struct obd_device,
+ obd_kset.kobj);
+ struct lu_target *tgt = obd->u.obt.obt_lut;
+
+ return sprintf(buf, "%s\n",
+ sync_lock_cancel_states[tgt->lut_sync_lock_cancel]);
+}
+EXPORT_SYMBOL(sync_lock_cancel_show);
+
+/**
+ * Change policy for handling dirty data under a lock being cancelled.
+ *
+ * This variable defines what action target takes upon lock cancel
+ * There are three possible modes:
+ * 1) never - never do sync upon lock cancel. This can lead to data
+ * inconsistencies if both the OST and client crash while writing a file
+ * that is also concurrently being read by another client. In these cases,
+ * this may allow the file data to "rewind" to an earlier state.
+ * 2) blocking - do sync only if there is blocking lock, e.g. if another
+ * client is trying to access this same object
+ * 3) always - do sync always
+ *
+ * \param[in] kobj kobject
+ * \param[in] attr attribute to show
+ * \param[in] buf buffer for data
+ * \param[in] count buffer size
+ *
+ * \retval \a count on success
+ * \retval negative value on error
+ */
+ssize_t sync_lock_cancel_store(struct kobject *kobj, struct attribute *attr,
+ const char *buffer, size_t count)
+{
+ struct obd_device *obd = container_of(kobj, struct obd_device,
+ obd_kset.kobj);
+ struct lu_target *tgt = obd->u.obt.obt_lut;
+ int val = -1;
+ enum tgt_sync_lock_cancel slc;
+
+ if (count == 0 || count >= SYNC_STATES_MAXLEN)
+ return -EINVAL;
+
+ for (slc = 0; slc < ARRAY_SIZE(sync_lock_cancel_states); slc++) {
+ if (strcmp(buffer, sync_lock_cancel_states[slc]) == 0) {
+ val = slc;
+ break;
+ }
+ }
+
+ /* Legacy numeric codes */
+ if (val == -1) {
+ int rc = kstrtoint(buffer, 0, &val);
+ if (rc)
+ return rc;
+ }
+
+ if (val < 0 || val > 2)
+ return -EINVAL;
+
+ spin_lock(&tgt->lut_flags_lock);
+ tgt->lut_sync_lock_cancel = val;
+ spin_unlock(&tgt->lut_flags_lock);
+ return count;
+}
+EXPORT_SYMBOL(sync_lock_cancel_store);
+LUSTRE_RW_ATTR(sync_lock_cancel);
+
+/**
+ * Show maximum number of Filter Modification Data (FMD) maintained.
+ *
+ * \param[in] kobj kobject
+ * \param[in] attr attribute to show
+ * \param[in] buf buffer for data
+ *
+ * \retval 0 and buffer filled with data on success
+ * \retval negative value on error
+ */
+ssize_t tgt_fmd_count_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ struct obd_device *obd = container_of(kobj, struct obd_device,
+ obd_kset.kobj);
+ struct lu_target *lut = obd->u.obt.obt_lut;
+
+ return sprintf(buf, "%u\n", lut->lut_fmd_max_num);
+}
+
+/**
+ * Change number of FMDs maintained by target.
+ *
+ * This defines how large the list of FMDs can be.
+ *
+ * \param[in] kobj kobject
+ * \param[in] attr attribute to show
+ * \param[in] buf buffer for data
+ * \param[in] count buffer size
+ *
+ * \retval \a count on success
+ * \retval negative value on error
+ */
+ssize_t tgt_fmd_count_store(struct kobject *kobj, struct attribute *attr,
+ const char *buffer, size_t count)
+{
+ struct obd_device *obd = container_of(kobj, struct obd_device,
+ obd_kset.kobj);
+ struct lu_target *lut = obd->u.obt.obt_lut;
+ int val, rc;
+
+ rc = kstrtoint(buffer, 0, &val);
+ if (rc)
+ return rc;
+
+ if (val < 1 || val > 65536)
+ return -EINVAL;
+
+ lut->lut_fmd_max_num = val;
+
+ return count;
+}
+LUSTRE_RW_ATTR(tgt_fmd_count);
+
+/**
+ * Show the maximum age of FMD data in seconds.
+ *
+ * \param[in] kobj kobject
+ * \param[in] attr attribute to show
+ * \param[in] buf buffer for data
+ *
+ * \retval 0 and buffer filled with data on success
+ * \retval negative value on error
+ */
+ssize_t tgt_fmd_seconds_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ struct obd_device *obd = container_of(kobj, struct obd_device,
+ obd_kset.kobj);
+ struct lu_target *lut = obd->u.obt.obt_lut;
+
+ return sprintf(buf, "%lld\n", lut->lut_fmd_max_age);
+}
+
+/**
+ * Set the maximum age of FMD data in seconds.
+ *
+ * This defines how long FMD data stays in the FMD list.
+ *
+ * \param[in] kobj kobject
+ * \param[in] attr attribute to show
+ * \param[in] buf buffer for data
+ * \param[in] count buffer size
+ *
+ * \retval \a count on success
+ * \retval negative number on error
+ */
+ssize_t tgt_fmd_seconds_store(struct kobject *kobj, struct attribute *attr,
+ const char *buffer, size_t count)
+{
+ struct obd_device *obd = container_of(kobj, struct obd_device,
+ obd_kset.kobj);
+ struct lu_target *lut = obd->u.obt.obt_lut;
+ time64_t val;
+ int rc;
+
+ rc = kstrtoll(buffer, 0, &val);
+ if (rc)
+ return rc;
+
+ if (val < 1 || val > 65536) /* ~ 18 hour max */
+ return -EINVAL;
+
+ lut->lut_fmd_max_age = val;
+
+ return count;
+}
+LUSTRE_RW_ATTR(tgt_fmd_seconds);
+
+/* These two aliases are old names and kept for compatibility, they were
+ * changed to 'tgt_fmd_count' and 'tgt_fmd_seconds'.
+ * This change was made in Lustre 2.13, so these aliases can be removed
+ * when back compatibility is not needed with any Lustre version prior 2.13
+ */
+static struct lustre_attr tgt_fmd_count_compat = __ATTR(client_cache_count,
+ 0644, tgt_fmd_count_show, tgt_fmd_count_store);
+static struct lustre_attr tgt_fmd_seconds_compat = __ATTR(client_cache_seconds,
+ 0644, tgt_fmd_seconds_show, tgt_fmd_seconds_store);
+
+static const struct attribute *tgt_attrs[] = {
+ &lustre_attr_sync_lock_cancel.attr,
+ &lustre_attr_tgt_fmd_count.attr,
+ &lustre_attr_tgt_fmd_seconds.attr,
+ &tgt_fmd_count_compat.attr,
+ &tgt_fmd_seconds_compat.attr,
+ NULL,
+};
+
+/**
+ * Decide which checksums both client and OST support, possibly forcing
+ * the use of T10PI checksums if the hardware supports this.
+ *
+ * The clients that have no T10-PI RPC checksum support will use the same
+ * mechanism to select checksum type as before, and will not be affected by
+ * the following logic.
+ *
+ * For the clients that have T10-PI RPC checksum support:
+ *
+ * If the target supports T10-PI feature and T10-PI checksum is enforced,
+ * clients will have no other choice for RPC checksum type other than using
+ * the T10PI checksum type. This is useful for enforcing end-to-end integrity
+ * in the whole system.
+ *
+ * If the target doesn't support T10-PI feature and T10-PI checksum is
+ * enforced, together with other checksum with reasonably good speeds (e.g.
+ * crc32, crc32c, adler, etc.), all T10-PI checksum types understood by the
+ * client (t10ip512, t10ip4K, t10crc512, t10crc4K) will be added to the
+ * available checksum types, regardless of the speeds of T10-PI checksums.
+ * This is useful for testing T10-PI checksum of RPC.
+ *
+ * If the target supports T10-PI feature and T10-PI checksum is NOT enforced,
+ * the corresponding T10-PI checksum type will be added to the checksum type
+ * list, regardless of the speed of the T10-PI checksum. This provides clients
+ * the flexibility to choose whether to enable end-to-end integrity or not.
+ *
+ * If the target does NOT supports T10-PI feature and T10-PI checksum is NOT
+ * enforced, together with other checksums with reasonably good speeds,
+ * all the T10-PI checksum types with good speeds will be added into the
+ * checksum type list. Note that a T10-PI checksum type with a speed worse
+ * than half of Alder will NOT be added as a option. In this circumstance,
+ * T10-PI checksum types has the same behavior like other normal checksum
+ * types.
+ */
+void tgt_mask_cksum_types(struct lu_target *lut, enum cksum_types *cksum_types)
+{
+ bool enforce = lut->lut_cksum_t10pi_enforce;
+ enum cksum_types tgt_t10_cksum_type;
+ enum cksum_types client_t10_types = *cksum_types & OBD_CKSUM_T10_ALL;
+ enum cksum_types server_t10_types;
+
+ /*
+ * The client set in ocd_cksum_types the checksum types it
+ * supports. We have to mask off the algorithms that we don't
+ * support. T10PI checksum types will be added later.
+ */
+ *cksum_types &= (lut->lut_cksum_types_supported & ~OBD_CKSUM_T10_ALL);
+ server_t10_types = lut->lut_cksum_types_supported & OBD_CKSUM_T10_ALL;
+ tgt_t10_cksum_type = lut->lut_dt_conf.ddp_t10_cksum_type;
+
+ /* Quick exit if no T10-PI support on client */
+ if (!client_t10_types)
+ return;
+
+ /*
+ * This OST has NO T10-PI feature. Add all supported T10-PI checksums
+ * as options if T10-PI checksum is enforced. If the T10-PI checksum is
+ * not enforced, only add them as options when speed is good.
+ */
+ if (tgt_t10_cksum_type == 0) {
+ /*
+ * Server allows all T10PI checksums, and server_t10_types
+ * include quick ones.
+ */
+ if (enforce)
+ *cksum_types |= client_t10_types;
+ else
+ *cksum_types |= client_t10_types & server_t10_types;
+ return;
+ }
+
+ /*
+ * This OST has T10-PI feature. Disable all other checksum types if
+ * T10-PI checksum is enforced. If the T10-PI checksum is not enforced,
+ * add the checksum type as an option.
+ */
+ if (client_t10_types & tgt_t10_cksum_type) {
+ if (enforce)
+ *cksum_types = tgt_t10_cksum_type;
+ else
+ *cksum_types |= tgt_t10_cksum_type;
+ }
+}
+EXPORT_SYMBOL(tgt_mask_cksum_types);
+
+int tgt_tunables_init(struct lu_target *lut)
+{
+ int rc;
+
+ rc = sysfs_create_files(&lut->lut_obd->obd_kset.kobj, tgt_attrs);
+ if (!rc)
+ lut->lut_attrs = tgt_attrs;
+ return rc;
+}
+EXPORT_SYMBOL(tgt_tunables_init);
+
+void tgt_tunables_fini(struct lu_target *lut)
+{
+ if (lut->lut_attrs) {
+ sysfs_remove_files(&lut->lut_obd->obd_kset.kobj,
+ lut->lut_attrs);
+ lut->lut_attrs = NULL;
+ }
+}
+EXPORT_SYMBOL(tgt_tunables_fini);
+
/*
* Save cross-MDT lock in lut_slc_locks.
*
sptlrpc_rule_set_init(&lut->lut_sptlrpc_rset);
spin_lock_init(&lut->lut_flags_lock);
- lut->lut_sync_lock_cancel = NEVER_SYNC_ON_CANCEL;
+ lut->lut_sync_lock_cancel = SYNC_LOCK_CANCEL_NEVER;
+ lut->lut_cksum_t10pi_enforce = 0;
+ lut->lut_cksum_types_supported =
+ obd_cksum_types_supported_server(obd->obd_name);
spin_lock_init(&lut->lut_slc_locks_guard);
INIT_LIST_HEAD(&lut->lut_slc_locks);
/* statfs data */
spin_lock_init(&tgd->tgd_osfs_lock);
- tgd->tgd_osfs_age = cfs_time_shift_64(-1000);
+ tgd->tgd_osfs_age = ktime_get_seconds() - 1000;
tgd->tgd_osfs_unstable = 0;
tgd->tgd_statfs_inflight = 0;
tgd->tgd_osfs_inflight = 0;
/* prepare transactions callbacks */
lut->lut_txn_cb.dtc_txn_start = tgt_txn_start_cb;
lut->lut_txn_cb.dtc_txn_stop = tgt_txn_stop_cb;
- lut->lut_txn_cb.dtc_txn_commit = NULL;
lut->lut_txn_cb.dtc_cookie = lut;
lut->lut_txn_cb.dtc_tag = LCT_DT_THREAD | LCT_MD_THREAD;
INIT_LIST_HEAD(&lut->lut_txn_cb.dtc_linkage);
dt_txn_callback_add(lut->lut_bottom, &lut->lut_txn_cb);
lut->lut_bottom->dd_lu_dev.ld_site->ls_tgt = lut;
+ lut->lut_fmd_max_num = LUT_FMD_MAX_NUM_DEFAULT;
+ lut->lut_fmd_max_age = LUT_FMD_MAX_AGE_DEFAULT;
+
+ atomic_set(&lut->lut_sync_count, 0);
+
/* reply_data is supported by MDT targets only for now */
if (strncmp(obd->obd_type->typ_name, LUSTRE_MDT_NAME, 3) != 0)
RETURN(0);
if (rc < 0)
GOTO(out, rc);
- atomic_set(&lut->lut_sync_count, 0);
-
RETURN(0);
out:
static struct kmem_cache *tgt_thread_kmem;
static struct kmem_cache *tgt_session_kmem;
+struct kmem_cache *tgt_fmd_kmem;
+
static struct lu_kmem_descr tgt_caches[] = {
{
.ckd_cache = &tgt_thread_kmem,
.ckd_size = sizeof(struct tgt_session_info)
},
{
+ .ckd_cache = &tgt_fmd_kmem,
+ .ckd_name = "tgt_fmd_cache",
+ .ckd_size = sizeof(struct tgt_fmd_data)
+ },
+ {
.ckd_cache = NULL
}
};
}
if (args->ta_args != NULL)
- OBD_FREE(args->ta_args, sizeof(args->ta_args[0]) *
- args->ta_alloc_args);
+ OBD_FREE_PTR_ARRAY(args->ta_args, args->ta_alloc_args);
OBD_SLAB_FREE_PTR(info, tgt_thread_kmem);
}