* GPL HEADER END
*/
/*
- * Copyright (c) 2012, 2015, Intel Corporation.
+ * Copyright (c) 2012, 2017, Intel Corporation.
*/
/*
* lustre/target/tgt_main.c
#include "tgt_internal.h"
#include "../ptlrpc/ptlrpc_internal.h"
-static spinlock_t uncommitted_slc_locks_guard;
-static struct list_head uncommitted_slc_locks;
+/* This must be longer than the longest string below */
+#define SYNC_STATES_MAXLEN 16
+static const char * const sync_lock_cancel_states[] = {
+ [SYNC_LOCK_CANCEL_NEVER] = "never",
+ [SYNC_LOCK_CANCEL_BLOCKING] = "blocking",
+ [SYNC_LOCK_CANCEL_ALWAYS] = "always",
+};
+
+/**
+ * Show policy for handling dirty data under a lock being cancelled.
+ *
+ * \param[in] kobj sysfs kobject
+ * \param[in] attr sysfs attribute
+ * \param[in] buf buffer for data
+ *
+ * \retval 0 and buffer filled with data on success
+ * \retval negative value on error
+ */
+ssize_t sync_lock_cancel_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct obd_device *obd = container_of(kobj, struct obd_device,
+ obd_kset.kobj);
+ struct lu_target *tgt = obd->u.obt.obt_lut;
+
+ return sprintf(buf, "%s\n",
+ sync_lock_cancel_states[tgt->lut_sync_lock_cancel]);
+}
+EXPORT_SYMBOL(sync_lock_cancel_show);
+
+/**
+ * Change policy for handling dirty data under a lock being cancelled.
+ *
+ * This variable defines what action target takes upon lock cancel
+ * There are three possible modes:
+ * 1) never - never do sync upon lock cancel. This can lead to data
+ * inconsistencies if both the OST and client crash while writing a file
+ * that is also concurrently being read by another client. In these cases,
+ * this may allow the file data to "rewind" to an earlier state.
+ * 2) blocking - do sync only if there is blocking lock, e.g. if another
+ * client is trying to access this same object
+ * 3) always - do sync always
+ *
+ * \param[in] kobj kobject
+ * \param[in] attr attribute to show
+ * \param[in] buf buffer for data
+ * \param[in] count buffer size
+ *
+ * \retval \a count on success
+ * \retval negative value on error
+ */
+ssize_t sync_lock_cancel_store(struct kobject *kobj, struct attribute *attr,
+ const char *buffer, size_t count)
+{
+ struct obd_device *obd = container_of(kobj, struct obd_device,
+ obd_kset.kobj);
+ struct lu_target *tgt = obd->u.obt.obt_lut;
+ int val = -1;
+ enum tgt_sync_lock_cancel slc;
+
+ if (count == 0 || count >= SYNC_STATES_MAXLEN)
+ return -EINVAL;
+
+ for (slc = 0; slc < ARRAY_SIZE(sync_lock_cancel_states); slc++) {
+ if (strcmp(buffer, sync_lock_cancel_states[slc]) == 0) {
+ val = slc;
+ break;
+ }
+ }
+
+ /* Legacy numeric codes */
+ if (val == -1) {
+ int rc = kstrtoint(buffer, 0, &val);
+ if (rc)
+ return rc;
+ }
+
+ if (val < 0 || val > 2)
+ return -EINVAL;
+
+ spin_lock(&tgt->lut_flags_lock);
+ tgt->lut_sync_lock_cancel = val;
+ spin_unlock(&tgt->lut_flags_lock);
+ return count;
+}
+EXPORT_SYMBOL(sync_lock_cancel_store);
+LUSTRE_RW_ATTR(sync_lock_cancel);
+
+/**
+ * Show maximum number of Filter Modification Data (FMD) maintained.
+ *
+ * \param[in] kobj kobject
+ * \param[in] attr attribute to show
+ * \param[in] buf buffer for data
+ *
+ * \retval 0 and buffer filled with data on success
+ * \retval negative value on error
+ */
+ssize_t tgt_fmd_count_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ struct obd_device *obd = container_of(kobj, struct obd_device,
+ obd_kset.kobj);
+ struct lu_target *lut = obd->u.obt.obt_lut;
+
+ return sprintf(buf, "%u\n", lut->lut_fmd_max_num);
+}
+
+/**
+ * Change number of FMDs maintained by target.
+ *
+ * This defines how large the list of FMDs can be.
+ *
+ * \param[in] kobj kobject
+ * \param[in] attr attribute to show
+ * \param[in] buf buffer for data
+ * \param[in] count buffer size
+ *
+ * \retval \a count on success
+ * \retval negative value on error
+ */
+ssize_t tgt_fmd_count_store(struct kobject *kobj, struct attribute *attr,
+ const char *buffer, size_t count)
+{
+ struct obd_device *obd = container_of(kobj, struct obd_device,
+ obd_kset.kobj);
+ struct lu_target *lut = obd->u.obt.obt_lut;
+ int val, rc;
+
+ rc = kstrtoint(buffer, 0, &val);
+ if (rc)
+ return rc;
+
+ if (val < 1 || val > 65536)
+ return -EINVAL;
+
+ lut->lut_fmd_max_num = val;
+
+ return count;
+}
+LUSTRE_RW_ATTR(tgt_fmd_count);
+
+/**
+ * Show the maximum age of FMD data in seconds.
+ *
+ * \param[in] kobj kobject
+ * \param[in] attr attribute to show
+ * \param[in] buf buffer for data
+ *
+ * \retval 0 and buffer filled with data on success
+ * \retval negative value on error
+ */
+ssize_t tgt_fmd_seconds_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ struct obd_device *obd = container_of(kobj, struct obd_device,
+ obd_kset.kobj);
+ struct lu_target *lut = obd->u.obt.obt_lut;
+
+ return sprintf(buf, "%lld\n", lut->lut_fmd_max_age);
+}
+
+/**
+ * Set the maximum age of FMD data in seconds.
+ *
+ * This defines how long FMD data stays in the FMD list.
+ *
+ * \param[in] kobj kobject
+ * \param[in] attr attribute to show
+ * \param[in] buf buffer for data
+ * \param[in] count buffer size
+ *
+ * \retval \a count on success
+ * \retval negative number on error
+ */
+ssize_t tgt_fmd_seconds_store(struct kobject *kobj, struct attribute *attr,
+ const char *buffer, size_t count)
+{
+ struct obd_device *obd = container_of(kobj, struct obd_device,
+ obd_kset.kobj);
+ struct lu_target *lut = obd->u.obt.obt_lut;
+ time64_t val;
+ int rc;
+
+ rc = kstrtoll(buffer, 0, &val);
+ if (rc)
+ return rc;
+
+ if (val < 1 || val > 65536) /* ~ 18 hour max */
+ return -EINVAL;
+
+ lut->lut_fmd_max_age = val;
+
+ return count;
+}
+LUSTRE_RW_ATTR(tgt_fmd_seconds);
+
+/* These two aliases are old names and kept for compatibility, they were
+ * changed to 'tgt_fmd_count' and 'tgt_fmd_seconds'.
+ * This change was made in Lustre 2.13, so these aliases can be removed
+ * when back compatibility is not needed with any Lustre version prior 2.13
+ */
+static struct lustre_attr tgt_fmd_count_compat = __ATTR(client_cache_count,
+ 0644, tgt_fmd_count_show, tgt_fmd_count_store);
+static struct lustre_attr tgt_fmd_seconds_compat = __ATTR(client_cache_seconds,
+ 0644, tgt_fmd_seconds_show, tgt_fmd_seconds_store);
+
+static const struct attribute *tgt_attrs[] = {
+ &lustre_attr_sync_lock_cancel.attr,
+ &lustre_attr_tgt_fmd_count.attr,
+ &lustre_attr_tgt_fmd_seconds.attr,
+ &tgt_fmd_count_compat.attr,
+ &tgt_fmd_seconds_compat.attr,
+ NULL,
+};
+
+int tgt_tunables_init(struct lu_target *lut)
+{
+ int rc;
+
+ rc = sysfs_create_files(&lut->lut_obd->obd_kset.kobj, tgt_attrs);
+ if (!rc)
+ lut->lut_attrs = tgt_attrs;
+ return rc;
+}
+EXPORT_SYMBOL(tgt_tunables_init);
+
+void tgt_tunables_fini(struct lu_target *lut)
+{
+ if (lut->lut_attrs) {
+ sysfs_remove_files(&lut->lut_obd->obd_kset.kobj,
+ lut->lut_attrs);
+ lut->lut_attrs = NULL;
+ }
+}
+EXPORT_SYMBOL(tgt_tunables_fini);
/*
- * Save cross-MDT lock in uncommitted_slc_locks.
+ * Save cross-MDT lock in lut_slc_locks.
*
* Lock R/W count is not saved, but released in unlock (not canceled remotely),
* instead only a refcount is taken, so that the remote MDT where the object
* resides can detect conflict with this lock there.
*
+ * \param lut target
* \param lock cross-MDT lock to save
* \param transno when the transaction with this transno is committed, this lock
* can be canceled.
*/
-void tgt_save_slc_lock(struct ldlm_lock *lock, __u64 transno)
+void tgt_save_slc_lock(struct lu_target *lut, struct ldlm_lock *lock,
+ __u64 transno)
{
- spin_lock(&uncommitted_slc_locks_guard);
+ spin_lock(&lut->lut_slc_locks_guard);
lock_res_and_lock(lock);
if (ldlm_is_cbpending(lock)) {
/* if it was canceld by server, don't save, because remote MDT
* both use this lock, and save it after use, so for the second
* one, just put the refcount. */
if (list_empty(&lock->l_slc_link))
- list_add_tail(&lock->l_slc_link,
- &uncommitted_slc_locks);
+ list_add_tail(&lock->l_slc_link, &lut->lut_slc_locks);
else
LDLM_LOCK_PUT(lock);
}
unlock_res_and_lock(lock);
- spin_unlock(&uncommitted_slc_locks_guard);
+ spin_unlock(&lut->lut_slc_locks_guard);
}
EXPORT_SYMBOL(tgt_save_slc_lock);
/*
- * Discard cross-MDT lock from uncommitted_slc_locks.
+ * Discard cross-MDT lock from lut_slc_locks.
*
- * This is called upon BAST, just remove lock from uncommitted_slc_locks and put
- * lock refcount. The BAST will cancel this lock.
+ * This is called upon BAST, just remove lock from lut_slc_locks and put lock
+ * refcount. The BAST will cancel this lock.
*
+ * \param lut target
* \param lock cross-MDT lock to discard
*/
-void tgt_discard_slc_lock(struct ldlm_lock *lock)
+void tgt_discard_slc_lock(struct lu_target *lut, struct ldlm_lock *lock)
{
- spin_lock(&uncommitted_slc_locks_guard);
+ spin_lock(&lut->lut_slc_locks_guard);
lock_res_and_lock(lock);
/* may race with tgt_cancel_slc_locks() */
if (lock->l_transno != 0) {
LDLM_LOCK_PUT(lock);
}
unlock_res_and_lock(lock);
- spin_unlock(&uncommitted_slc_locks_guard);
+ spin_unlock(&lut->lut_slc_locks_guard);
}
EXPORT_SYMBOL(tgt_discard_slc_lock);
/*
* Cancel cross-MDT locks upon transaction commit.
*
- * Remove cross-MDT locks from uncommitted_slc_locks, cancel them and put lock
- * refcount.
+ * Remove cross-MDT locks from lut_slc_locks, cancel them and put lock refcount.
*
+ * \param lut target
* \param transno transaction with this number was committed.
*/
-void tgt_cancel_slc_locks(__u64 transno)
+void tgt_cancel_slc_locks(struct lu_target *lut, __u64 transno)
{
struct ldlm_lock *lock, *next;
LIST_HEAD(list);
struct lustre_handle lockh;
- spin_lock(&uncommitted_slc_locks_guard);
- list_for_each_entry_safe(lock, next, &uncommitted_slc_locks,
+ spin_lock(&lut->lut_slc_locks_guard);
+ list_for_each_entry_safe(lock, next, &lut->lut_slc_locks,
l_slc_link) {
lock_res_and_lock(lock);
LASSERT(lock->l_transno != 0);
list_move(&lock->l_slc_link, &list);
unlock_res_and_lock(lock);
}
- spin_unlock(&uncommitted_slc_locks_guard);
+ spin_unlock(&lut->lut_slc_locks_guard);
list_for_each_entry_safe(lock, next, &list, l_slc_link) {
list_del_init(&lock->l_slc_link);
struct lu_attr attr;
struct lu_fid fid;
struct dt_object *o;
- int rc = 0;
+ struct tg_grants_data *tgd = &lut->lut_tgd;
+ struct obd_statfs *osfs;
+ int i, rc = 0;
ENTRY;
sptlrpc_rule_set_init(&lut->lut_sptlrpc_rset);
spin_lock_init(&lut->lut_flags_lock);
- lut->lut_sync_lock_cancel = NEVER_SYNC_ON_CANCEL;
+ lut->lut_sync_lock_cancel = SYNC_LOCK_CANCEL_NEVER;
+
+ spin_lock_init(&lut->lut_slc_locks_guard);
+ INIT_LIST_HEAD(&lut->lut_slc_locks);
/* last_rcvd initialization is needed by replayable targets only */
if (!obd->obd_replayable)
RETURN(0);
+ /* initialize grant and statfs data in target */
+ dt_conf_get(env, lut->lut_bottom, &lut->lut_dt_conf);
+
+ /* statfs data */
+ spin_lock_init(&tgd->tgd_osfs_lock);
+ tgd->tgd_osfs_age = ktime_get_seconds() - 1000;
+ tgd->tgd_osfs_unstable = 0;
+ tgd->tgd_statfs_inflight = 0;
+ tgd->tgd_osfs_inflight = 0;
+
+ /* grant data */
+ spin_lock_init(&tgd->tgd_grant_lock);
+ tgd->tgd_tot_dirty = 0;
+ tgd->tgd_tot_granted = 0;
+ tgd->tgd_tot_pending = 0;
+ tgd->tgd_grant_compat_disable = 0;
+
+ /* populate cached statfs data */
+ osfs = &tgt_th_info(env)->tti_u.osfs;
+ rc = tgt_statfs_internal(env, lut, osfs, 0, NULL);
+ if (rc != 0) {
+ CERROR("%s: can't get statfs data, rc %d\n", tgt_name(lut),
+ rc);
+ GOTO(out, rc);
+ }
+ if (!is_power_of_2(osfs->os_bsize)) {
+ CERROR("%s: blocksize (%d) is not a power of 2\n",
+ tgt_name(lut), osfs->os_bsize);
+ GOTO(out, rc = -EPROTO);
+ }
+ tgd->tgd_blockbits = fls(osfs->os_bsize) - 1;
+
spin_lock_init(&lut->lut_translock);
spin_lock_init(&lut->lut_client_bitmap_lock);
/* prepare transactions callbacks */
lut->lut_txn_cb.dtc_txn_start = tgt_txn_start_cb;
lut->lut_txn_cb.dtc_txn_stop = tgt_txn_stop_cb;
- lut->lut_txn_cb.dtc_txn_commit = NULL;
lut->lut_txn_cb.dtc_cookie = lut;
lut->lut_txn_cb.dtc_tag = LCT_DT_THREAD | LCT_MD_THREAD;
INIT_LIST_HEAD(&lut->lut_txn_cb.dtc_linkage);
dt_txn_callback_add(lut->lut_bottom, &lut->lut_txn_cb);
lut->lut_bottom->dd_lu_dev.ld_site->ls_tgt = lut;
+ lut->lut_fmd_max_num = LUT_FMD_MAX_NUM_DEFAULT;
+ lut->lut_fmd_max_age = LUT_FMD_MAX_AGE_DEFAULT;
+
+ atomic_set(&lut->lut_sync_count, 0);
+
/* reply_data is supported by MDT targets only for now */
if (strncmp(obd->obd_type->typ_name, LUSTRE_MDT_NAME, 3) != 0)
RETURN(0);
OBD_ALLOC(lut->lut_reply_bitmap,
LUT_REPLY_SLOTS_MAX_CHUNKS * sizeof(unsigned long *));
if (lut->lut_reply_bitmap == NULL)
- GOTO(out, rc);
+ GOTO(out, rc = -ENOMEM);
memset(&attr, 0, sizeof(attr));
attr.la_valid = LA_MODE;
if (rc < 0)
GOTO(out, rc);
- atomic_set(&lut->lut_sync_count, 0);
-
RETURN(0);
out:
obd->u.obt.obt_magic = 0;
obd->u.obt.obt_lut = NULL;
if (lut->lut_last_rcvd != NULL) {
- lu_object_put(env, &lut->lut_last_rcvd->do_lu);
+ dt_object_put(env, lut->lut_last_rcvd);
lut->lut_last_rcvd = NULL;
}
if (lut->lut_client_bitmap != NULL)
OBD_FREE(lut->lut_client_bitmap, LR_MAX_CLIENTS >> 3);
lut->lut_client_bitmap = NULL;
if (lut->lut_reply_data != NULL)
- lu_object_put(env, &lut->lut_reply_data->do_lu);
+ dt_object_put(env, lut->lut_reply_data);
lut->lut_reply_data = NULL;
- if (lut->lut_reply_bitmap != NULL)
+ if (lut->lut_reply_bitmap != NULL) {
+ for (i = 0; i < LUT_REPLY_SLOTS_MAX_CHUNKS; i++) {
+ if (lut->lut_reply_bitmap[i] != NULL)
+ OBD_FREE_LARGE(lut->lut_reply_bitmap[i],
+ BITS_TO_LONGS(LUT_REPLY_SLOTS_PER_CHUNK) *
+ sizeof(long));
+ lut->lut_reply_bitmap[i] = NULL;
+ }
OBD_FREE(lut->lut_reply_bitmap,
LUT_REPLY_SLOTS_MAX_CHUNKS * sizeof(unsigned long *));
+ }
lut->lut_reply_bitmap = NULL;
return rc;
}
sptlrpc_rule_set_free(&lut->lut_sptlrpc_rset);
if (lut->lut_reply_data != NULL)
- lu_object_put(env, &lut->lut_reply_data->do_lu);
+ dt_object_put(env, lut->lut_reply_data);
lut->lut_reply_data = NULL;
if (lut->lut_reply_bitmap != NULL) {
for (i = 0; i < LUT_REPLY_SLOTS_MAX_CHUNKS; i++) {
if (lut->lut_reply_bitmap[i] != NULL)
- OBD_FREE(lut->lut_reply_bitmap[i],
+ OBD_FREE_LARGE(lut->lut_reply_bitmap[i],
BITS_TO_LONGS(LUT_REPLY_SLOTS_PER_CHUNK) *
sizeof(long));
lut->lut_reply_bitmap[i] = NULL;
}
if (lut->lut_last_rcvd) {
dt_txn_callback_del(lut->lut_bottom, &lut->lut_txn_cb);
- lu_object_put(env, &lut->lut_last_rcvd->do_lu);
+ dt_object_put(env, lut->lut_last_rcvd);
lut->lut_last_rcvd = NULL;
}
EXIT;
}
EXPORT_SYMBOL(tgt_fini);
+static struct kmem_cache *tgt_thread_kmem;
+static struct kmem_cache *tgt_session_kmem;
+struct kmem_cache *tgt_fmd_kmem;
+
+static struct lu_kmem_descr tgt_caches[] = {
+ {
+ .ckd_cache = &tgt_thread_kmem,
+ .ckd_name = "tgt_thread_kmem",
+ .ckd_size = sizeof(struct tgt_thread_info),
+ },
+ {
+ .ckd_cache = &tgt_session_kmem,
+ .ckd_name = "tgt_session_kmem",
+ .ckd_size = sizeof(struct tgt_session_info)
+ },
+ {
+ .ckd_cache = &tgt_fmd_kmem,
+ .ckd_name = "tgt_fmd_cache",
+ .ckd_size = sizeof(struct tgt_fmd_data)
+ },
+ {
+ .ckd_cache = NULL
+ }
+};
+
+
/* context key constructor/destructor: tg_key_init, tg_key_fini */
-LU_KEY_INIT(tgt, struct tgt_thread_info);
+static void *tgt_key_init(const struct lu_context *ctx,
+ struct lu_context_key *key)
+{
+ struct tgt_thread_info *thread;
+
+ OBD_SLAB_ALLOC_PTR_GFP(thread, tgt_thread_kmem, GFP_NOFS);
+ if (thread == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ return thread;
+}
static void tgt_key_fini(const struct lu_context *ctx,
struct lu_context_key *key, void *data)
}
if (args->ta_args != NULL)
- OBD_FREE(args->ta_args, sizeof(args->ta_args[0]) *
- args->ta_alloc_args);
- OBD_FREE_PTR(info);
+ OBD_FREE_PTR_ARRAY(args->ta_args, args->ta_alloc_args);
+ OBD_SLAB_FREE_PTR(info, tgt_thread_kmem);
}
static void tgt_key_exit(const struct lu_context *ctx,
LU_KEY_INIT_GENERIC(tgt);
-/* context key constructor/destructor: tgt_ses_key_init, tgt_ses_key_fini */
-LU_KEY_INIT_FINI(tgt_ses, struct tgt_session_info);
+static void *tgt_ses_key_init(const struct lu_context *ctx,
+ struct lu_context_key *key)
+{
+ struct tgt_session_info *session;
+
+ OBD_SLAB_ALLOC_PTR_GFP(session, tgt_session_kmem, GFP_NOFS);
+ if (session == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ return session;
+}
+
+static void tgt_ses_key_fini(const struct lu_context *ctx,
+ struct lu_context_key *key, void *data)
+{
+ struct tgt_session_info *session = data;
+
+ OBD_SLAB_FREE_PTR(session, tgt_session_kmem);
+}
/* context key: tgt_session_key */
struct lu_context_key tgt_session_key = {
int tgt_mod_init(void)
{
+ int result;
ENTRY;
+ result = lu_kmem_init(tgt_caches);
+ if (result != 0)
+ RETURN(result);
+
tgt_page_to_corrupt = alloc_page(GFP_KERNEL);
tgt_key_init_generic(&tgt_thread_key, NULL);
tgt_ses_key_init_generic(&tgt_session_key, NULL);
lu_context_key_register_many(&tgt_session_key, NULL);
+ barrier_init();
update_info_init();
- spin_lock_init(&uncommitted_slc_locks_guard);
- INIT_LIST_HEAD(&uncommitted_slc_locks);
-
RETURN(0);
}
void tgt_mod_exit(void)
{
+ barrier_fini();
if (tgt_page_to_corrupt != NULL)
- page_cache_release(tgt_page_to_corrupt);
+ put_page(tgt_page_to_corrupt);
lu_context_key_degister(&tgt_thread_key);
lu_context_key_degister(&tgt_session_key);
update_info_fini();
+
+ lu_kmem_fini(tgt_caches);
}