X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Ftarget%2Ftgt_main.c;h=2f9f3811349f3b64dcc708158d7258a2fbdae6b7;hb=e2ac6e1eaa108eef3493837e9bd881629582ea1d;hp=e32e558d90117955ae1e55e87d18ab28d26866ba;hpb=3792aa13dab69c1a8de50ea23c5a4352f9bd0f45;p=fs%2Flustre-release.git diff --git a/lustre/target/tgt_main.c b/lustre/target/tgt_main.c index e32e558..2f9f381 100644 --- a/lustre/target/tgt_main.c +++ b/lustre/target/tgt_main.c @@ -21,7 +21,7 @@ * GPL HEADER END */ /* - * Copyright (c) 2012, 2015, Intel Corporation. + * Copyright (c) 2012, 2017, Intel Corporation. */ /* * lustre/target/tgt_main.c @@ -37,23 +37,256 @@ #include "tgt_internal.h" #include "../ptlrpc/ptlrpc_internal.h" -static spinlock_t uncommitted_slc_locks_guard; -static struct list_head uncommitted_slc_locks; +/* This must be longer than the longest string below */ +#define SYNC_STATES_MAXLEN 16 +static char *sync_on_cancel_states[] = {"never", + "blocking", + "always" }; + +/** + * Show policy for handling dirty data under a lock being cancelled. + * + * \param[in] kobj sysfs kobject + * \param[in] attr sysfs attribute + * \param[in] buf buffer for data + * + * \retval 0 and buffer filled with data on success + * \retval negative value on error + */ +static ssize_t sync_lock_cancel_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); + struct lu_target *tgt = obd->u.obt.obt_lut; + + return sprintf(buf, "%s\n", + sync_on_cancel_states[tgt->lut_sync_lock_cancel]); +} + +/** + * Change policy for handling dirty data under a lock being cancelled. + * + * This variable defines what action target takes upon lock cancel + * There are three possible modes: + * 1) never - never do sync upon lock cancel. This can lead to data + * inconsistencies if both the OST and client crash while writing a file + * that is also concurrently being read by another client. In these cases, + * this may allow the file data to "rewind" to an earlier state. + * 2) blocking - do sync only if there is blocking lock, e.g. if another + * client is trying to access this same object + * 3) always - do sync always + * + * \param[in] kobj kobject + * \param[in] attr attribute to show + * \param[in] buf buffer for data + * \param[in] count buffer size + * + * \retval \a count on success + * \retval negative value on error + */ +static ssize_t sync_lock_cancel_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, size_t count) +{ + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); + struct lu_target *tgt = obd->u.obt.obt_lut; + int val = -1; + int i; + + if (count == 0 || count >= SYNC_STATES_MAXLEN) + return -EINVAL; + + for (i = 0 ; i < NUM_SYNC_ON_CANCEL_STATES; i++) { + if (strcmp(buffer, sync_on_cancel_states[i]) == 0) { + val = i; + break; + } + } + + /* Legacy numeric codes */ + if (val == -1) { + int rc = kstrtoint(buffer, 0, &val); + if (rc) + return rc; + } + + if (val < 0 || val > 2) + return -EINVAL; + + spin_lock(&tgt->lut_flags_lock); + tgt->lut_sync_lock_cancel = val; + spin_unlock(&tgt->lut_flags_lock); + return count; +} +LUSTRE_RW_ATTR(sync_lock_cancel); + +/** + * Show maximum number of Filter Modification Data (FMD) maintained. + * + * \param[in] kobj kobject + * \param[in] attr attribute to show + * \param[in] buf buffer for data + * + * \retval 0 and buffer filled with data on success + * \retval negative value on error + */ +ssize_t tgt_fmd_count_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); + struct lu_target *lut = obd->u.obt.obt_lut; + + return sprintf(buf, "%u\n", lut->lut_fmd_max_num); +} + +/** + * Change number of FMDs maintained by target. + * + * This defines how large the list of FMDs can be. + * + * \param[in] kobj kobject + * \param[in] attr attribute to show + * \param[in] buf buffer for data + * \param[in] count buffer size + * + * \retval \a count on success + * \retval negative value on error + */ +ssize_t tgt_fmd_count_store(struct kobject *kobj, struct attribute *attr, + const char *buffer, size_t count) +{ + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); + struct lu_target *lut = obd->u.obt.obt_lut; + int val, rc; + + rc = kstrtoint(buffer, 0, &val); + if (rc) + return rc; + + if (val < 1 || val > 65536) + return -EINVAL; + + lut->lut_fmd_max_num = val; + + return count; +} +LUSTRE_RW_ATTR(tgt_fmd_count); + +/** + * Show the maximum age of FMD data in seconds. + * + * \param[in] kobj kobject + * \param[in] attr attribute to show + * \param[in] buf buffer for data + * + * \retval 0 and buffer filled with data on success + * \retval negative value on error + */ +ssize_t tgt_fmd_seconds_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); + struct lu_target *lut = obd->u.obt.obt_lut; + + return sprintf(buf, "%lld\n", lut->lut_fmd_max_age); +} + +/** + * Set the maximum age of FMD data in seconds. + * + * This defines how long FMD data stays in the FMD list. + * + * \param[in] kobj kobject + * \param[in] attr attribute to show + * \param[in] buf buffer for data + * \param[in] count buffer size + * + * \retval \a count on success + * \retval negative number on error + */ +ssize_t tgt_fmd_seconds_store(struct kobject *kobj, struct attribute *attr, + const char *buffer, size_t count) +{ + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); + struct lu_target *lut = obd->u.obt.obt_lut; + time64_t val; + int rc; + + rc = kstrtoll(buffer, 0, &val); + if (rc) + return rc; + + if (val < 1 || val > 65536) /* ~ 18 hour max */ + return -EINVAL; + + lut->lut_fmd_max_age = val; + + return count; +} +LUSTRE_RW_ATTR(tgt_fmd_seconds); + +/* These two aliases are old names and kept for compatibility, they were + * changed to 'tgt_fmd_count' and 'tgt_fmd_seconds'. + * This change was made in Lustre 2.13, so these aliases can be removed + * when back compatibility is not needed with any Lustre version prior 2.13 + */ +static struct lustre_attr tgt_fmd_count_compat = __ATTR(client_cache_count, + 0644, tgt_fmd_count_show, tgt_fmd_count_store); +static struct lustre_attr tgt_fmd_seconds_compat = __ATTR(client_cache_seconds, + 0644, tgt_fmd_seconds_show, tgt_fmd_seconds_store); + +static const struct attribute *tgt_attrs[] = { + &lustre_attr_sync_lock_cancel.attr, + &lustre_attr_tgt_fmd_count.attr, + &lustre_attr_tgt_fmd_seconds.attr, + &tgt_fmd_count_compat.attr, + &tgt_fmd_seconds_compat.attr, + NULL, +}; + +int tgt_tunables_init(struct lu_target *lut) +{ + int rc; + + rc = sysfs_create_files(&lut->lut_obd->obd_kset.kobj, tgt_attrs); + if (!rc) + lut->lut_attrs = tgt_attrs; + return rc; +} +EXPORT_SYMBOL(tgt_tunables_init); + +void tgt_tunables_fini(struct lu_target *lut) +{ + if (lut->lut_attrs) { + sysfs_remove_files(&lut->lut_obd->obd_kset.kobj, + lut->lut_attrs); + lut->lut_attrs = NULL; + } +} +EXPORT_SYMBOL(tgt_tunables_fini); /* - * Save cross-MDT lock in uncommitted_slc_locks. + * Save cross-MDT lock in lut_slc_locks. * * Lock R/W count is not saved, but released in unlock (not canceled remotely), * instead only a refcount is taken, so that the remote MDT where the object * resides can detect conflict with this lock there. * + * \param lut target * \param lock cross-MDT lock to save * \param transno when the transaction with this transno is committed, this lock * can be canceled. */ -void tgt_save_slc_lock(struct ldlm_lock *lock, __u64 transno) +void tgt_save_slc_lock(struct lu_target *lut, struct ldlm_lock *lock, + __u64 transno) { - spin_lock(&uncommitted_slc_locks_guard); + spin_lock(&lut->lut_slc_locks_guard); lock_res_and_lock(lock); if (ldlm_is_cbpending(lock)) { /* if it was canceld by server, don't save, because remote MDT @@ -65,27 +298,27 @@ void tgt_save_slc_lock(struct ldlm_lock *lock, __u64 transno) * both use this lock, and save it after use, so for the second * one, just put the refcount. */ if (list_empty(&lock->l_slc_link)) - list_add_tail(&lock->l_slc_link, - &uncommitted_slc_locks); + list_add_tail(&lock->l_slc_link, &lut->lut_slc_locks); else LDLM_LOCK_PUT(lock); } unlock_res_and_lock(lock); - spin_unlock(&uncommitted_slc_locks_guard); + spin_unlock(&lut->lut_slc_locks_guard); } EXPORT_SYMBOL(tgt_save_slc_lock); /* - * Discard cross-MDT lock from uncommitted_slc_locks. + * Discard cross-MDT lock from lut_slc_locks. * - * This is called upon BAST, just remove lock from uncommitted_slc_locks and put - * lock refcount. The BAST will cancel this lock. + * This is called upon BAST, just remove lock from lut_slc_locks and put lock + * refcount. The BAST will cancel this lock. * + * \param lut target * \param lock cross-MDT lock to discard */ -void tgt_discard_slc_lock(struct ldlm_lock *lock) +void tgt_discard_slc_lock(struct lu_target *lut, struct ldlm_lock *lock) { - spin_lock(&uncommitted_slc_locks_guard); + spin_lock(&lut->lut_slc_locks_guard); lock_res_and_lock(lock); /* may race with tgt_cancel_slc_locks() */ if (lock->l_transno != 0) { @@ -96,26 +329,26 @@ void tgt_discard_slc_lock(struct ldlm_lock *lock) LDLM_LOCK_PUT(lock); } unlock_res_and_lock(lock); - spin_unlock(&uncommitted_slc_locks_guard); + spin_unlock(&lut->lut_slc_locks_guard); } EXPORT_SYMBOL(tgt_discard_slc_lock); /* * Cancel cross-MDT locks upon transaction commit. * - * Remove cross-MDT locks from uncommitted_slc_locks, cancel them and put lock - * refcount. + * Remove cross-MDT locks from lut_slc_locks, cancel them and put lock refcount. * + * \param lut target * \param transno transaction with this number was committed. */ -void tgt_cancel_slc_locks(__u64 transno) +void tgt_cancel_slc_locks(struct lu_target *lut, __u64 transno) { struct ldlm_lock *lock, *next; LIST_HEAD(list); struct lustre_handle lockh; - spin_lock(&uncommitted_slc_locks_guard); - list_for_each_entry_safe(lock, next, &uncommitted_slc_locks, + spin_lock(&lut->lut_slc_locks_guard); + list_for_each_entry_safe(lock, next, &lut->lut_slc_locks, l_slc_link) { lock_res_and_lock(lock); LASSERT(lock->l_transno != 0); @@ -134,7 +367,7 @@ void tgt_cancel_slc_locks(__u64 transno) list_move(&lock->l_slc_link, &list); unlock_res_and_lock(lock); } - spin_unlock(&uncommitted_slc_locks_guard); + spin_unlock(&lut->lut_slc_locks_guard); list_for_each_entry_safe(lock, next, &list, l_slc_link) { list_del_init(&lock->l_slc_link); @@ -153,6 +386,8 @@ int tgt_init(const struct lu_env *env, struct lu_target *lut, struct lu_attr attr; struct lu_fid fid; struct dt_object *o; + struct tg_grants_data *tgd = &lut->lut_tgd; + struct obd_statfs *osfs; int i, rc = 0; ENTRY; @@ -182,10 +417,45 @@ int tgt_init(const struct lu_env *env, struct lu_target *lut, spin_lock_init(&lut->lut_flags_lock); lut->lut_sync_lock_cancel = NEVER_SYNC_ON_CANCEL; + spin_lock_init(&lut->lut_slc_locks_guard); + INIT_LIST_HEAD(&lut->lut_slc_locks); + /* last_rcvd initialization is needed by replayable targets only */ if (!obd->obd_replayable) RETURN(0); + /* initialize grant and statfs data in target */ + dt_conf_get(env, lut->lut_bottom, &lut->lut_dt_conf); + + /* statfs data */ + spin_lock_init(&tgd->tgd_osfs_lock); + tgd->tgd_osfs_age = ktime_get_seconds() - 1000; + tgd->tgd_osfs_unstable = 0; + tgd->tgd_statfs_inflight = 0; + tgd->tgd_osfs_inflight = 0; + + /* grant data */ + spin_lock_init(&tgd->tgd_grant_lock); + tgd->tgd_tot_dirty = 0; + tgd->tgd_tot_granted = 0; + tgd->tgd_tot_pending = 0; + tgd->tgd_grant_compat_disable = 0; + + /* populate cached statfs data */ + osfs = &tgt_th_info(env)->tti_u.osfs; + rc = tgt_statfs_internal(env, lut, osfs, 0, NULL); + if (rc != 0) { + CERROR("%s: can't get statfs data, rc %d\n", tgt_name(lut), + rc); + GOTO(out, rc); + } + if (!is_power_of_2(osfs->os_bsize)) { + CERROR("%s: blocksize (%d) is not a power of 2\n", + tgt_name(lut), osfs->os_bsize); + GOTO(out, rc = -EPROTO); + } + tgd->tgd_blockbits = fls(osfs->os_bsize) - 1; + spin_lock_init(&lut->lut_translock); spin_lock_init(&lut->lut_client_bitmap_lock); @@ -216,7 +486,6 @@ int tgt_init(const struct lu_env *env, struct lu_target *lut, /* prepare transactions callbacks */ lut->lut_txn_cb.dtc_txn_start = tgt_txn_start_cb; lut->lut_txn_cb.dtc_txn_stop = tgt_txn_stop_cb; - lut->lut_txn_cb.dtc_txn_commit = NULL; lut->lut_txn_cb.dtc_cookie = lut; lut->lut_txn_cb.dtc_tag = LCT_DT_THREAD | LCT_MD_THREAD; INIT_LIST_HEAD(&lut->lut_txn_cb.dtc_linkage); @@ -224,6 +493,11 @@ int tgt_init(const struct lu_env *env, struct lu_target *lut, dt_txn_callback_add(lut->lut_bottom, &lut->lut_txn_cb); lut->lut_bottom->dd_lu_dev.ld_site->ls_tgt = lut; + lut->lut_fmd_max_num = LUT_FMD_MAX_NUM_DEFAULT; + lut->lut_fmd_max_age = LUT_FMD_MAX_AGE_DEFAULT; + + atomic_set(&lut->lut_sync_count, 0); + /* reply_data is supported by MDT targets only for now */ if (strncmp(obd->obd_type->typ_name, LUSTRE_MDT_NAME, 3) != 0) RETURN(0); @@ -253,8 +527,6 @@ int tgt_init(const struct lu_env *env, struct lu_target *lut, if (rc < 0) GOTO(out, rc); - atomic_set(&lut->lut_sync_count, 0); - RETURN(0); out: @@ -263,14 +535,14 @@ out_put: obd->u.obt.obt_magic = 0; obd->u.obt.obt_lut = NULL; if (lut->lut_last_rcvd != NULL) { - lu_object_put(env, &lut->lut_last_rcvd->do_lu); + dt_object_put(env, lut->lut_last_rcvd); lut->lut_last_rcvd = NULL; } if (lut->lut_client_bitmap != NULL) OBD_FREE(lut->lut_client_bitmap, LR_MAX_CLIENTS >> 3); lut->lut_client_bitmap = NULL; if (lut->lut_reply_data != NULL) - lu_object_put(env, &lut->lut_reply_data->do_lu); + dt_object_put(env, lut->lut_reply_data); lut->lut_reply_data = NULL; if (lut->lut_reply_bitmap != NULL) { for (i = 0; i < LUT_REPLY_SLOTS_MAX_CHUNKS; i++) { @@ -309,7 +581,7 @@ void tgt_fini(const struct lu_env *env, struct lu_target *lut) sptlrpc_rule_set_free(&lut->lut_sptlrpc_rset); if (lut->lut_reply_data != NULL) - lu_object_put(env, &lut->lut_reply_data->do_lu); + dt_object_put(env, lut->lut_reply_data); lut->lut_reply_data = NULL; if (lut->lut_reply_bitmap != NULL) { for (i = 0; i < LUT_REPLY_SLOTS_MAX_CHUNKS; i++) { @@ -329,15 +601,51 @@ void tgt_fini(const struct lu_env *env, struct lu_target *lut) } if (lut->lut_last_rcvd) { dt_txn_callback_del(lut->lut_bottom, &lut->lut_txn_cb); - lu_object_put(env, &lut->lut_last_rcvd->do_lu); + dt_object_put(env, lut->lut_last_rcvd); lut->lut_last_rcvd = NULL; } EXIT; } EXPORT_SYMBOL(tgt_fini); +static struct kmem_cache *tgt_thread_kmem; +static struct kmem_cache *tgt_session_kmem; +struct kmem_cache *tgt_fmd_kmem; + +static struct lu_kmem_descr tgt_caches[] = { + { + .ckd_cache = &tgt_thread_kmem, + .ckd_name = "tgt_thread_kmem", + .ckd_size = sizeof(struct tgt_thread_info), + }, + { + .ckd_cache = &tgt_session_kmem, + .ckd_name = "tgt_session_kmem", + .ckd_size = sizeof(struct tgt_session_info) + }, + { + .ckd_cache = &tgt_fmd_kmem, + .ckd_name = "tgt_fmd_cache", + .ckd_size = sizeof(struct tgt_fmd_data) + }, + { + .ckd_cache = NULL + } +}; + + /* context key constructor/destructor: tg_key_init, tg_key_fini */ -LU_KEY_INIT(tgt, struct tgt_thread_info); +static void *tgt_key_init(const struct lu_context *ctx, + struct lu_context_key *key) +{ + struct tgt_thread_info *thread; + + OBD_SLAB_ALLOC_PTR_GFP(thread, tgt_thread_kmem, GFP_NOFS); + if (thread == NULL) + return ERR_PTR(-ENOMEM); + + return thread; +} static void tgt_key_fini(const struct lu_context *ctx, struct lu_context_key *key, void *data) @@ -354,7 +662,7 @@ static void tgt_key_fini(const struct lu_context *ctx, if (args->ta_args != NULL) OBD_FREE(args->ta_args, sizeof(args->ta_args[0]) * args->ta_alloc_args); - OBD_FREE_PTR(info); + OBD_SLAB_FREE_PTR(info, tgt_thread_kmem); } static void tgt_key_exit(const struct lu_context *ctx, @@ -376,8 +684,25 @@ struct lu_context_key tgt_thread_key = { LU_KEY_INIT_GENERIC(tgt); -/* context key constructor/destructor: tgt_ses_key_init, tgt_ses_key_fini */ -LU_KEY_INIT_FINI(tgt_ses, struct tgt_session_info); +static void *tgt_ses_key_init(const struct lu_context *ctx, + struct lu_context_key *key) +{ + struct tgt_session_info *session; + + OBD_SLAB_ALLOC_PTR_GFP(session, tgt_session_kmem, GFP_NOFS); + if (session == NULL) + return ERR_PTR(-ENOMEM); + + return session; +} + +static void tgt_ses_key_fini(const struct lu_context *ctx, + struct lu_context_key *key, void *data) +{ + struct tgt_session_info *session = data; + + OBD_SLAB_FREE_PTR(session, tgt_session_kmem); +} /* context key: tgt_session_key */ struct lu_context_key tgt_session_key = { @@ -400,8 +725,13 @@ struct page *tgt_page_to_corrupt; int tgt_mod_init(void) { + int result; ENTRY; + result = lu_kmem_init(tgt_caches); + if (result != 0) + RETURN(result); + tgt_page_to_corrupt = alloc_page(GFP_KERNEL); tgt_key_init_generic(&tgt_thread_key, NULL); @@ -409,22 +739,23 @@ int tgt_mod_init(void) tgt_ses_key_init_generic(&tgt_session_key, NULL); lu_context_key_register_many(&tgt_session_key, NULL); + barrier_init(); update_info_init(); - spin_lock_init(&uncommitted_slc_locks_guard); - INIT_LIST_HEAD(&uncommitted_slc_locks); - RETURN(0); } void tgt_mod_exit(void) { + barrier_fini(); if (tgt_page_to_corrupt != NULL) put_page(tgt_page_to_corrupt); lu_context_key_degister(&tgt_thread_key); lu_context_key_degister(&tgt_session_key); update_info_fini(); + + lu_kmem_fini(tgt_caches); }