4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 021110-1307, USA
24 * Copyright (c) 2012, 2017, Intel Corporation.
27 * lustre/target/tgt_main.c
29 * Lustre Unified Target main initialization code
31 * Author: Mikhail Pershin <mike.pershin@intel.com>
34 #define DEBUG_SUBSYSTEM S_CLASS
37 #include "tgt_internal.h"
38 #include "../ptlrpc/ptlrpc_internal.h"
41 * Save cross-MDT lock in lut_slc_locks.
43 * Lock R/W count is not saved, but released in unlock (not canceled remotely),
44 * instead only a refcount is taken, so that the remote MDT where the object
45 * resides can detect conflict with this lock there.
48 * \param lock cross-MDT lock to save
49 * \param transno when the transaction with this transno is committed, this lock
52 void tgt_save_slc_lock(struct lu_target *lut, struct ldlm_lock *lock,
55 spin_lock(&lut->lut_slc_locks_guard);
56 lock_res_and_lock(lock);
57 if (ldlm_is_cbpending(lock)) {
58 /* if it was canceld by server, don't save, because remote MDT
59 * will do Sync-on-Cancel. */
62 lock->l_transno = transno;
63 /* if this lock is in the list already, there are two operations
64 * both use this lock, and save it after use, so for the second
65 * one, just put the refcount. */
66 if (list_empty(&lock->l_slc_link))
67 list_add_tail(&lock->l_slc_link, &lut->lut_slc_locks);
71 unlock_res_and_lock(lock);
72 spin_unlock(&lut->lut_slc_locks_guard);
74 EXPORT_SYMBOL(tgt_save_slc_lock);
77 * Discard cross-MDT lock from lut_slc_locks.
79 * This is called upon BAST, just remove lock from lut_slc_locks and put lock
80 * refcount. The BAST will cancel this lock.
83 * \param lock cross-MDT lock to discard
85 void tgt_discard_slc_lock(struct lu_target *lut, struct ldlm_lock *lock)
87 spin_lock(&lut->lut_slc_locks_guard);
88 lock_res_and_lock(lock);
89 /* may race with tgt_cancel_slc_locks() */
90 if (lock->l_transno != 0) {
91 LASSERT(!list_empty(&lock->l_slc_link));
92 LASSERT(ldlm_is_cbpending(lock));
93 list_del_init(&lock->l_slc_link);
97 unlock_res_and_lock(lock);
98 spin_unlock(&lut->lut_slc_locks_guard);
100 EXPORT_SYMBOL(tgt_discard_slc_lock);
103 * Cancel cross-MDT locks upon transaction commit.
105 * Remove cross-MDT locks from lut_slc_locks, cancel them and put lock refcount.
108 * \param transno transaction with this number was committed.
110 void tgt_cancel_slc_locks(struct lu_target *lut, __u64 transno)
112 struct ldlm_lock *lock, *next;
114 struct lustre_handle lockh;
116 spin_lock(&lut->lut_slc_locks_guard);
117 list_for_each_entry_safe(lock, next, &lut->lut_slc_locks,
119 lock_res_and_lock(lock);
120 LASSERT(lock->l_transno != 0);
121 if (lock->l_transno > transno) {
122 unlock_res_and_lock(lock);
125 /* ouch, another operation is using it after it's saved */
126 if (lock->l_readers != 0 || lock->l_writers != 0) {
127 unlock_res_and_lock(lock);
130 /* set CBPENDING so that this lock won't be used again */
131 ldlm_set_cbpending(lock);
133 list_move(&lock->l_slc_link, &list);
134 unlock_res_and_lock(lock);
136 spin_unlock(&lut->lut_slc_locks_guard);
138 list_for_each_entry_safe(lock, next, &list, l_slc_link) {
139 list_del_init(&lock->l_slc_link);
140 ldlm_lock2handle(lock, &lockh);
141 ldlm_cli_cancel(&lockh, LCF_ASYNC);
146 int tgt_init(const struct lu_env *env, struct lu_target *lut,
147 struct obd_device *obd, struct dt_device *dt,
148 struct tgt_opc_slice *slice, int request_fail_id,
151 struct dt_object_format dof;
155 struct tg_grants_data *tgd = &lut->lut_tgd;
156 struct obd_statfs *osfs;
164 lut->lut_bottom = dt;
165 lut->lut_last_rcvd = NULL;
166 lut->lut_client_bitmap = NULL;
167 atomic_set(&lut->lut_num_clients, 0);
168 atomic_set(&lut->lut_client_generation, 0);
169 lut->lut_reply_data = NULL;
170 lut->lut_reply_bitmap = NULL;
171 obd->u.obt.obt_lut = lut;
172 obd->u.obt.obt_magic = OBT_MAGIC;
174 /* set request handler slice and parameters */
175 lut->lut_slice = slice;
176 lut->lut_reply_fail_id = reply_fail_id;
177 lut->lut_request_fail_id = request_fail_id;
179 /* sptlrcp variables init */
180 rwlock_init(&lut->lut_sptlrpc_lock);
181 sptlrpc_rule_set_init(&lut->lut_sptlrpc_rset);
183 spin_lock_init(&lut->lut_flags_lock);
184 lut->lut_sync_lock_cancel = NEVER_SYNC_ON_CANCEL;
186 spin_lock_init(&lut->lut_slc_locks_guard);
187 INIT_LIST_HEAD(&lut->lut_slc_locks);
189 /* last_rcvd initialization is needed by replayable targets only */
190 if (!obd->obd_replayable)
193 /* initialize grant and statfs data in target */
194 dt_conf_get(env, lut->lut_bottom, &lut->lut_dt_conf);
197 spin_lock_init(&tgd->tgd_osfs_lock);
198 tgd->tgd_osfs_age = ktime_get_seconds() - 1000;
199 tgd->tgd_osfs_unstable = 0;
200 tgd->tgd_statfs_inflight = 0;
201 tgd->tgd_osfs_inflight = 0;
204 spin_lock_init(&tgd->tgd_grant_lock);
205 tgd->tgd_tot_dirty = 0;
206 tgd->tgd_tot_granted = 0;
207 tgd->tgd_tot_pending = 0;
208 tgd->tgd_grant_compat_disable = 0;
210 /* populate cached statfs data */
211 osfs = &tgt_th_info(env)->tti_u.osfs;
212 rc = tgt_statfs_internal(env, lut, osfs, 0, NULL);
214 CERROR("%s: can't get statfs data, rc %d\n", tgt_name(lut),
218 if (!is_power_of_2(osfs->os_bsize)) {
219 CERROR("%s: blocksize (%d) is not a power of 2\n",
220 tgt_name(lut), osfs->os_bsize);
221 GOTO(out, rc = -EPROTO);
223 tgd->tgd_blockbits = fls(osfs->os_bsize) - 1;
225 spin_lock_init(&lut->lut_translock);
226 spin_lock_init(&lut->lut_client_bitmap_lock);
228 OBD_ALLOC(lut->lut_client_bitmap, LR_MAX_CLIENTS >> 3);
229 if (lut->lut_client_bitmap == NULL)
232 memset(&attr, 0, sizeof(attr));
233 attr.la_valid = LA_MODE;
234 attr.la_mode = S_IFREG | S_IRUGO | S_IWUSR;
235 dof.dof_type = dt_mode_to_dft(S_IFREG);
237 lu_local_obj_fid(&fid, LAST_RECV_OID);
239 o = dt_find_or_create(env, lut->lut_bottom, &fid, &dof, &attr);
242 CERROR("%s: cannot open LAST_RCVD: rc = %d\n", tgt_name(lut),
247 lut->lut_last_rcvd = o;
248 rc = tgt_server_data_init(env, lut);
252 /* prepare transactions callbacks */
253 lut->lut_txn_cb.dtc_txn_start = tgt_txn_start_cb;
254 lut->lut_txn_cb.dtc_txn_stop = tgt_txn_stop_cb;
255 lut->lut_txn_cb.dtc_cookie = lut;
256 lut->lut_txn_cb.dtc_tag = LCT_DT_THREAD | LCT_MD_THREAD;
257 INIT_LIST_HEAD(&lut->lut_txn_cb.dtc_linkage);
259 dt_txn_callback_add(lut->lut_bottom, &lut->lut_txn_cb);
260 lut->lut_bottom->dd_lu_dev.ld_site->ls_tgt = lut;
262 lut->lut_fmd_max_num = LUT_FMD_MAX_NUM_DEFAULT;
263 lut->lut_fmd_max_age = LUT_FMD_MAX_AGE_DEFAULT;
265 atomic_set(&lut->lut_sync_count, 0);
267 /* reply_data is supported by MDT targets only for now */
268 if (strncmp(obd->obd_type->typ_name, LUSTRE_MDT_NAME, 3) != 0)
271 OBD_ALLOC(lut->lut_reply_bitmap,
272 LUT_REPLY_SLOTS_MAX_CHUNKS * sizeof(unsigned long *));
273 if (lut->lut_reply_bitmap == NULL)
274 GOTO(out, rc = -ENOMEM);
276 memset(&attr, 0, sizeof(attr));
277 attr.la_valid = LA_MODE;
278 attr.la_mode = S_IFREG | S_IRUGO | S_IWUSR;
279 dof.dof_type = dt_mode_to_dft(S_IFREG);
281 lu_local_obj_fid(&fid, REPLY_DATA_OID);
283 o = dt_find_or_create(env, lut->lut_bottom, &fid, &dof, &attr);
286 CERROR("%s: cannot open REPLY_DATA: rc = %d\n", tgt_name(lut),
290 lut->lut_reply_data = o;
292 rc = tgt_reply_data_init(env, lut);
299 dt_txn_callback_del(lut->lut_bottom, &lut->lut_txn_cb);
301 obd->u.obt.obt_magic = 0;
302 obd->u.obt.obt_lut = NULL;
303 if (lut->lut_last_rcvd != NULL) {
304 dt_object_put(env, lut->lut_last_rcvd);
305 lut->lut_last_rcvd = NULL;
307 if (lut->lut_client_bitmap != NULL)
308 OBD_FREE(lut->lut_client_bitmap, LR_MAX_CLIENTS >> 3);
309 lut->lut_client_bitmap = NULL;
310 if (lut->lut_reply_data != NULL)
311 dt_object_put(env, lut->lut_reply_data);
312 lut->lut_reply_data = NULL;
313 if (lut->lut_reply_bitmap != NULL) {
314 for (i = 0; i < LUT_REPLY_SLOTS_MAX_CHUNKS; i++) {
315 if (lut->lut_reply_bitmap[i] != NULL)
316 OBD_FREE_LARGE(lut->lut_reply_bitmap[i],
317 BITS_TO_LONGS(LUT_REPLY_SLOTS_PER_CHUNK) *
319 lut->lut_reply_bitmap[i] = NULL;
321 OBD_FREE(lut->lut_reply_bitmap,
322 LUT_REPLY_SLOTS_MAX_CHUNKS * sizeof(unsigned long *));
324 lut->lut_reply_bitmap = NULL;
327 EXPORT_SYMBOL(tgt_init);
329 void tgt_fini(const struct lu_env *env, struct lu_target *lut)
335 if (lut->lut_lsd.lsd_feature_incompat & OBD_INCOMPAT_MULTI_RPCS &&
336 atomic_read(&lut->lut_num_clients) == 0) {
337 /* Clear MULTI RPCS incompatibility flag that prevents previous
338 * Lustre versions to mount a target with reply_data file */
339 lut->lut_lsd.lsd_feature_incompat &= ~OBD_INCOMPAT_MULTI_RPCS;
340 rc = tgt_server_data_update(env, lut, 1);
342 CERROR("%s: unable to clear MULTI RPCS "
343 "incompatibility flag\n",
344 lut->lut_obd->obd_name);
347 sptlrpc_rule_set_free(&lut->lut_sptlrpc_rset);
349 if (lut->lut_reply_data != NULL)
350 dt_object_put(env, lut->lut_reply_data);
351 lut->lut_reply_data = NULL;
352 if (lut->lut_reply_bitmap != NULL) {
353 for (i = 0; i < LUT_REPLY_SLOTS_MAX_CHUNKS; i++) {
354 if (lut->lut_reply_bitmap[i] != NULL)
355 OBD_FREE_LARGE(lut->lut_reply_bitmap[i],
356 BITS_TO_LONGS(LUT_REPLY_SLOTS_PER_CHUNK) *
358 lut->lut_reply_bitmap[i] = NULL;
360 OBD_FREE(lut->lut_reply_bitmap,
361 LUT_REPLY_SLOTS_MAX_CHUNKS * sizeof(unsigned long *));
363 lut->lut_reply_bitmap = NULL;
364 if (lut->lut_client_bitmap) {
365 OBD_FREE(lut->lut_client_bitmap, LR_MAX_CLIENTS >> 3);
366 lut->lut_client_bitmap = NULL;
368 if (lut->lut_last_rcvd) {
369 dt_txn_callback_del(lut->lut_bottom, &lut->lut_txn_cb);
370 dt_object_put(env, lut->lut_last_rcvd);
371 lut->lut_last_rcvd = NULL;
375 EXPORT_SYMBOL(tgt_fini);
377 static struct kmem_cache *tgt_thread_kmem;
378 static struct kmem_cache *tgt_session_kmem;
379 static struct lu_kmem_descr tgt_caches[] = {
381 .ckd_cache = &tgt_thread_kmem,
382 .ckd_name = "tgt_thread_kmem",
383 .ckd_size = sizeof(struct tgt_thread_info),
386 .ckd_cache = &tgt_session_kmem,
387 .ckd_name = "tgt_session_kmem",
388 .ckd_size = sizeof(struct tgt_session_info)
396 /* context key constructor/destructor: tg_key_init, tg_key_fini */
397 static void *tgt_key_init(const struct lu_context *ctx,
398 struct lu_context_key *key)
400 struct tgt_thread_info *thread;
402 OBD_SLAB_ALLOC_PTR_GFP(thread, tgt_thread_kmem, GFP_NOFS);
404 return ERR_PTR(-ENOMEM);
409 static void tgt_key_fini(const struct lu_context *ctx,
410 struct lu_context_key *key, void *data)
412 struct tgt_thread_info *info = data;
413 struct thandle_exec_args *args = &info->tti_tea;
416 for (i = 0; i < args->ta_alloc_args; i++) {
417 if (args->ta_args[i] != NULL)
418 OBD_FREE_PTR(args->ta_args[i]);
421 if (args->ta_args != NULL)
422 OBD_FREE(args->ta_args, sizeof(args->ta_args[0]) *
423 args->ta_alloc_args);
424 OBD_SLAB_FREE_PTR(info, tgt_thread_kmem);
427 static void tgt_key_exit(const struct lu_context *ctx,
428 struct lu_context_key *key, void *data)
430 struct tgt_thread_info *tti = data;
432 tti->tti_has_trans = 0;
433 tti->tti_mult_trans = 0;
436 /* context key: tg_thread_key */
437 struct lu_context_key tgt_thread_key = {
438 .lct_tags = LCT_MD_THREAD | LCT_DT_THREAD,
439 .lct_init = tgt_key_init,
440 .lct_fini = tgt_key_fini,
441 .lct_exit = tgt_key_exit,
444 LU_KEY_INIT_GENERIC(tgt);
446 static void *tgt_ses_key_init(const struct lu_context *ctx,
447 struct lu_context_key *key)
449 struct tgt_session_info *session;
451 OBD_SLAB_ALLOC_PTR_GFP(session, tgt_session_kmem, GFP_NOFS);
453 return ERR_PTR(-ENOMEM);
458 static void tgt_ses_key_fini(const struct lu_context *ctx,
459 struct lu_context_key *key, void *data)
461 struct tgt_session_info *session = data;
463 OBD_SLAB_FREE_PTR(session, tgt_session_kmem);
466 /* context key: tgt_session_key */
467 struct lu_context_key tgt_session_key = {
468 .lct_tags = LCT_SERVER_SESSION,
469 .lct_init = tgt_ses_key_init,
470 .lct_fini = tgt_ses_key_fini,
472 EXPORT_SYMBOL(tgt_session_key);
474 LU_KEY_INIT_GENERIC(tgt_ses);
477 * this page is allocated statically when module is initializing
478 * it is used to simulate data corruptions, see ost_checksum_bulk()
479 * for details. as the original pages provided by the layers below
480 * can be remain in the internal cache, we do not want to modify
483 struct page *tgt_page_to_corrupt;
485 int tgt_mod_init(void)
490 result = lu_kmem_init(tgt_caches);
494 tgt_page_to_corrupt = alloc_page(GFP_KERNEL);
496 tgt_key_init_generic(&tgt_thread_key, NULL);
497 lu_context_key_register_many(&tgt_thread_key, NULL);
499 tgt_ses_key_init_generic(&tgt_session_key, NULL);
500 lu_context_key_register_many(&tgt_session_key, NULL);
508 void tgt_mod_exit(void)
511 if (tgt_page_to_corrupt != NULL)
512 put_page(tgt_page_to_corrupt);
514 lu_context_key_degister(&tgt_thread_key);
515 lu_context_key_degister(&tgt_session_key);
518 lu_kmem_fini(tgt_caches);