4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 021110-1307, USA
24 * Copyright (c) 2012, 2015, Intel Corporation.
27 * lustre/target/tgt_main.c
29 * Lustre Unified Target main initialization code
31 * Author: Mikhail Pershin <mike.pershin@intel.com>
34 #define DEBUG_SUBSYSTEM S_CLASS
37 #include "tgt_internal.h"
38 #include "../ptlrpc/ptlrpc_internal.h"
40 static spinlock_t uncommitted_slc_locks_guard;
41 static struct list_head uncommitted_slc_locks;
44 * Save cross-MDT lock in uncommitted_slc_locks.
46 * Lock R/W count is not saved, but released in unlock (not canceled remotely),
47 * instead only a refcount is taken, so that the remote MDT where the object
48 * resides can detect conflict with this lock there.
50 * \param lock cross-MDT lock to save
51 * \param transno when the transaction with this transno is committed, this lock
54 void tgt_save_slc_lock(struct ldlm_lock *lock, __u64 transno)
56 spin_lock(&uncommitted_slc_locks_guard);
57 lock_res_and_lock(lock);
58 if (ldlm_is_cbpending(lock)) {
59 /* if it was canceld by server, don't save, because remote MDT
60 * will do Sync-on-Cancel. */
63 lock->l_transno = transno;
64 /* if this lock is in the list already, there are two operations
65 * both use this lock, and save it after use, so for the second
66 * one, just put the refcount. */
67 if (list_empty(&lock->l_slc_link))
68 list_add_tail(&lock->l_slc_link,
69 &uncommitted_slc_locks);
73 unlock_res_and_lock(lock);
74 spin_unlock(&uncommitted_slc_locks_guard);
76 EXPORT_SYMBOL(tgt_save_slc_lock);
79 * Discard cross-MDT lock from uncommitted_slc_locks.
81 * This is called upon BAST, just remove lock from uncommitted_slc_locks and put
82 * lock refcount. The BAST will cancel this lock.
84 * \param lock cross-MDT lock to discard
86 void tgt_discard_slc_lock(struct ldlm_lock *lock)
88 spin_lock(&uncommitted_slc_locks_guard);
89 lock_res_and_lock(lock);
90 /* may race with tgt_cancel_slc_locks() */
91 if (lock->l_transno != 0) {
92 LASSERT(!list_empty(&lock->l_slc_link));
93 LASSERT(ldlm_is_cbpending(lock));
94 list_del_init(&lock->l_slc_link);
98 unlock_res_and_lock(lock);
99 spin_unlock(&uncommitted_slc_locks_guard);
101 EXPORT_SYMBOL(tgt_discard_slc_lock);
104 * Cancel cross-MDT locks upon transaction commit.
106 * Remove cross-MDT locks from uncommitted_slc_locks, cancel them and put lock
109 * \param transno transaction with this number was committed.
111 void tgt_cancel_slc_locks(__u64 transno)
113 struct ldlm_lock *lock, *next;
115 struct lustre_handle lockh;
117 spin_lock(&uncommitted_slc_locks_guard);
118 list_for_each_entry_safe(lock, next, &uncommitted_slc_locks,
120 lock_res_and_lock(lock);
121 LASSERT(lock->l_transno != 0);
122 if (lock->l_transno > transno) {
123 unlock_res_and_lock(lock);
126 /* ouch, another operation is using it after it's saved */
127 if (lock->l_readers != 0 || lock->l_writers != 0) {
128 unlock_res_and_lock(lock);
131 /* set CBPENDING so that this lock won't be used again */
132 ldlm_set_cbpending(lock);
134 list_move(&lock->l_slc_link, &list);
135 unlock_res_and_lock(lock);
137 spin_unlock(&uncommitted_slc_locks_guard);
139 list_for_each_entry_safe(lock, next, &list, l_slc_link) {
140 list_del_init(&lock->l_slc_link);
141 ldlm_lock2handle(lock, &lockh);
142 ldlm_cli_cancel(&lockh, LCF_ASYNC);
147 int tgt_init(const struct lu_env *env, struct lu_target *lut,
148 struct obd_device *obd, struct dt_device *dt,
149 struct tgt_opc_slice *slice, int request_fail_id,
152 struct dt_object_format dof;
163 lut->lut_bottom = dt;
164 lut->lut_last_rcvd = NULL;
165 lut->lut_client_bitmap = NULL;
166 atomic_set(&lut->lut_num_clients, 0);
167 atomic_set(&lut->lut_client_generation, 0);
168 lut->lut_reply_data = NULL;
169 lut->lut_reply_bitmap = NULL;
170 obd->u.obt.obt_lut = lut;
171 obd->u.obt.obt_magic = OBT_MAGIC;
173 /* set request handler slice and parameters */
174 lut->lut_slice = slice;
175 lut->lut_reply_fail_id = reply_fail_id;
176 lut->lut_request_fail_id = request_fail_id;
178 /* sptlrcp variables init */
179 rwlock_init(&lut->lut_sptlrpc_lock);
180 sptlrpc_rule_set_init(&lut->lut_sptlrpc_rset);
182 spin_lock_init(&lut->lut_flags_lock);
183 lut->lut_sync_lock_cancel = NEVER_SYNC_ON_CANCEL;
185 /* last_rcvd initialization is needed by replayable targets only */
186 if (!obd->obd_replayable)
189 spin_lock_init(&lut->lut_translock);
190 spin_lock_init(&lut->lut_client_bitmap_lock);
192 OBD_ALLOC(lut->lut_client_bitmap, LR_MAX_CLIENTS >> 3);
193 if (lut->lut_client_bitmap == NULL)
196 memset(&attr, 0, sizeof(attr));
197 attr.la_valid = LA_MODE;
198 attr.la_mode = S_IFREG | S_IRUGO | S_IWUSR;
199 dof.dof_type = dt_mode_to_dft(S_IFREG);
201 lu_local_obj_fid(&fid, LAST_RECV_OID);
203 o = dt_find_or_create(env, lut->lut_bottom, &fid, &dof, &attr);
206 CERROR("%s: cannot open LAST_RCVD: rc = %d\n", tgt_name(lut),
211 lut->lut_last_rcvd = o;
212 rc = tgt_server_data_init(env, lut);
216 /* prepare transactions callbacks */
217 lut->lut_txn_cb.dtc_txn_start = tgt_txn_start_cb;
218 lut->lut_txn_cb.dtc_txn_stop = tgt_txn_stop_cb;
219 lut->lut_txn_cb.dtc_txn_commit = NULL;
220 lut->lut_txn_cb.dtc_cookie = lut;
221 lut->lut_txn_cb.dtc_tag = LCT_DT_THREAD | LCT_MD_THREAD;
222 INIT_LIST_HEAD(&lut->lut_txn_cb.dtc_linkage);
224 dt_txn_callback_add(lut->lut_bottom, &lut->lut_txn_cb);
225 lut->lut_bottom->dd_lu_dev.ld_site->ls_tgt = lut;
227 /* reply_data is supported by MDT targets only for now */
228 if (strncmp(obd->obd_type->typ_name, LUSTRE_MDT_NAME, 3) != 0)
231 OBD_ALLOC(lut->lut_reply_bitmap,
232 LUT_REPLY_SLOTS_MAX_CHUNKS * sizeof(unsigned long *));
233 if (lut->lut_reply_bitmap == NULL)
236 memset(&attr, 0, sizeof(attr));
237 attr.la_valid = LA_MODE;
238 attr.la_mode = S_IFREG | S_IRUGO | S_IWUSR;
239 dof.dof_type = dt_mode_to_dft(S_IFREG);
241 lu_local_obj_fid(&fid, REPLY_DATA_OID);
243 o = dt_find_or_create(env, lut->lut_bottom, &fid, &dof, &attr);
246 CERROR("%s: cannot open REPLY_DATA: rc = %d\n", tgt_name(lut),
250 lut->lut_reply_data = o;
252 rc = tgt_reply_data_init(env, lut);
256 atomic_set(&lut->lut_sync_count, 0);
261 dt_txn_callback_del(lut->lut_bottom, &lut->lut_txn_cb);
263 obd->u.obt.obt_magic = 0;
264 obd->u.obt.obt_lut = NULL;
265 if (lut->lut_last_rcvd != NULL) {
266 lu_object_put(env, &lut->lut_last_rcvd->do_lu);
267 lut->lut_last_rcvd = NULL;
269 if (lut->lut_client_bitmap != NULL)
270 OBD_FREE(lut->lut_client_bitmap, LR_MAX_CLIENTS >> 3);
271 lut->lut_client_bitmap = NULL;
272 if (lut->lut_reply_data != NULL)
273 lu_object_put(env, &lut->lut_reply_data->do_lu);
274 lut->lut_reply_data = NULL;
275 if (lut->lut_reply_bitmap != NULL)
276 OBD_FREE(lut->lut_reply_bitmap,
277 LUT_REPLY_SLOTS_MAX_CHUNKS * sizeof(unsigned long *));
278 lut->lut_reply_bitmap = NULL;
281 EXPORT_SYMBOL(tgt_init);
283 void tgt_fini(const struct lu_env *env, struct lu_target *lut)
289 if (lut->lut_lsd.lsd_feature_incompat & OBD_INCOMPAT_MULTI_RPCS &&
290 atomic_read(&lut->lut_num_clients) == 0) {
291 /* Clear MULTI RPCS incompatibility flag that prevents previous
292 * Lustre versions to mount a target with reply_data file */
293 lut->lut_lsd.lsd_feature_incompat &= ~OBD_INCOMPAT_MULTI_RPCS;
294 rc = tgt_server_data_update(env, lut, 1);
296 CERROR("%s: unable to clear MULTI RPCS "
297 "incompatibility flag\n",
298 lut->lut_obd->obd_name);
301 sptlrpc_rule_set_free(&lut->lut_sptlrpc_rset);
303 if (lut->lut_reply_data != NULL)
304 lu_object_put(env, &lut->lut_reply_data->do_lu);
305 lut->lut_reply_data = NULL;
306 if (lut->lut_reply_bitmap != NULL) {
307 for (i = 0; i < LUT_REPLY_SLOTS_MAX_CHUNKS; i++) {
308 if (lut->lut_reply_bitmap[i] != NULL)
309 OBD_FREE(lut->lut_reply_bitmap[i],
310 BITS_TO_LONGS(LUT_REPLY_SLOTS_PER_CHUNK) *
312 lut->lut_reply_bitmap[i] = NULL;
314 OBD_FREE(lut->lut_reply_bitmap,
315 LUT_REPLY_SLOTS_MAX_CHUNKS * sizeof(unsigned long *));
317 lut->lut_reply_bitmap = NULL;
318 if (lut->lut_client_bitmap) {
319 OBD_FREE(lut->lut_client_bitmap, LR_MAX_CLIENTS >> 3);
320 lut->lut_client_bitmap = NULL;
322 if (lut->lut_last_rcvd) {
323 dt_txn_callback_del(lut->lut_bottom, &lut->lut_txn_cb);
324 lu_object_put(env, &lut->lut_last_rcvd->do_lu);
325 lut->lut_last_rcvd = NULL;
329 EXPORT_SYMBOL(tgt_fini);
331 /* context key constructor/destructor: tg_key_init, tg_key_fini */
332 LU_KEY_INIT(tgt, struct tgt_thread_info);
334 static void tgt_key_fini(const struct lu_context *ctx,
335 struct lu_context_key *key, void *data)
337 struct tgt_thread_info *info = data;
338 struct thandle_exec_args *args = &info->tti_tea;
341 for (i = 0; i < args->ta_alloc_args; i++) {
342 if (args->ta_args[i] != NULL)
343 OBD_FREE_PTR(args->ta_args[i]);
346 if (args->ta_args != NULL)
347 OBD_FREE(args->ta_args, sizeof(args->ta_args[0]) *
348 args->ta_alloc_args);
352 static void tgt_key_exit(const struct lu_context *ctx,
353 struct lu_context_key *key, void *data)
355 struct tgt_thread_info *tti = data;
357 tti->tti_has_trans = 0;
358 tti->tti_mult_trans = 0;
361 /* context key: tg_thread_key */
362 struct lu_context_key tgt_thread_key = {
363 .lct_tags = LCT_MD_THREAD | LCT_DT_THREAD,
364 .lct_init = tgt_key_init,
365 .lct_fini = tgt_key_fini,
366 .lct_exit = tgt_key_exit,
369 LU_KEY_INIT_GENERIC(tgt);
371 /* context key constructor/destructor: tgt_ses_key_init, tgt_ses_key_fini */
372 LU_KEY_INIT_FINI(tgt_ses, struct tgt_session_info);
374 /* context key: tgt_session_key */
375 struct lu_context_key tgt_session_key = {
376 .lct_tags = LCT_SERVER_SESSION,
377 .lct_init = tgt_ses_key_init,
378 .lct_fini = tgt_ses_key_fini,
380 EXPORT_SYMBOL(tgt_session_key);
382 LU_KEY_INIT_GENERIC(tgt_ses);
385 * this page is allocated statically when module is initializing
386 * it is used to simulate data corruptions, see ost_checksum_bulk()
387 * for details. as the original pages provided by the layers below
388 * can be remain in the internal cache, we do not want to modify
391 struct page *tgt_page_to_corrupt;
393 int tgt_mod_init(void)
397 tgt_page_to_corrupt = alloc_page(GFP_KERNEL);
399 tgt_key_init_generic(&tgt_thread_key, NULL);
400 lu_context_key_register_many(&tgt_thread_key, NULL);
402 tgt_ses_key_init_generic(&tgt_session_key, NULL);
403 lu_context_key_register_many(&tgt_session_key, NULL);
407 spin_lock_init(&uncommitted_slc_locks_guard);
408 INIT_LIST_HEAD(&uncommitted_slc_locks);
413 void tgt_mod_exit(void)
415 if (tgt_page_to_corrupt != NULL)
416 page_cache_release(tgt_page_to_corrupt);
418 lu_context_key_degister(&tgt_thread_key);
419 lu_context_key_degister(&tgt_session_key);