4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2014, Intel Corporation.
26 * lustre/target/update_trans.c
28 * This file implements the update distribute transaction API.
30 * To manage the cross-MDT operation (distribute operation) transaction,
31 * the transaction will also be separated two layers on MD stack, top
32 * transaction and sub transaction.
34 * During the distribute operation, top transaction is created in the LOD
35 * layer, and represent the operation. Sub transaction is created by
36 * each OSD or OSP. Top transaction start/stop will trigger all of its sub
37 * transaction start/stop. Top transaction (the whole operation) is committed
38 * only all of its sub transaction are committed.
40 * there are three kinds of transactions
41 * 1. local transaction: All updates are in a single local OSD.
42 * 2. Remote transaction: All Updates are only in the remote OSD,
43 * i.e. locally all updates are in OSP.
44 * 3. Mixed transaction: Updates are both in local OSD and remote
47 * Author: Di Wang <di.wang@intel.com>
50 #define DEBUG_SUBSYSTEM S_CLASS
52 #include <lu_target.h>
53 #include <lustre_log.h>
54 #include <lustre_update.h>
56 #include <obd_class.h>
57 #include <tgt_internal.h>
60 * Declare write update to sub device
62 * Declare Write updates llog records to the sub device during distribute
65 * \param[in] env execution environment
66 * \param[in] record update records being written
67 * \param[in] lst sub transaction handle
69 * \retval 0 if writing succeeds
70 * \retval negative errno if writing fails
72 static int sub_declare_updates_write(const struct lu_env *env,
73 struct llog_update_record *record,
74 struct sub_thandle *lst)
76 struct llog_ctxt *ctxt;
77 struct dt_device *dt = lst->st_sub_th->th_dev;
80 /* If ctxt is NULL, it means not need to write update,
81 * for example if the the OSP is used to connect to OST */
82 ctxt = llog_get_context(dt->dd_lu_dev.ld_obd,
83 LLOG_UPDATELOG_ORIG_CTXT);
84 LASSERT(ctxt != NULL);
86 /* Not ready to record updates yet. */
87 if (ctxt->loc_handle == NULL) {
92 rc = llog_declare_add(env, ctxt->loc_handle, &record->lur_hdr,
101 * write update to sub device
103 * Write updates llog records to the sub device during distribute
106 * \param[in] env execution environment
107 * \param[in] record update records being written
108 * \param[in] lst sub transaction handle
110 * \retval 1 if writing succeeds
111 * \retval negative errno if writing fails
113 static int sub_updates_write(const struct lu_env *env,
114 struct llog_update_record *record,
115 struct sub_thandle *lst)
117 struct llog_ctxt *ctxt;
118 struct dt_device *dt = lst->st_sub_th->th_dev;
121 ctxt = llog_get_context(dt->dd_lu_dev.ld_obd,
122 LLOG_UPDATELOG_ORIG_CTXT);
123 LASSERT(ctxt != NULL);
125 /* Not ready to record updates yet, usually happens
126 * in error handler path */
127 if (ctxt->loc_handle == NULL) {
132 LASSERTF(record->lur_hdr.lrh_len == llog_update_record_size(record),
133 "lrh_len %u record_size %zu\n", record->lur_hdr.lrh_len,
134 llog_update_record_size(record));
136 rc = llog_add(env, ctxt->loc_handle, &record->lur_hdr,
137 NULL, lst->st_sub_th);
145 * write update transaction
147 * Check if there are updates being recorded in this transaction,
148 * it will write the record into the disk.
150 * \param[in] env execution environment
151 * \param[in] top_th top transaction handle
153 * \retval 0 if writing succeeds
154 * \retval negative errno if writing fails
156 static int top_updates_write(const struct lu_env *env,
157 struct top_thandle *top_th)
159 struct thandle_update_records *tur;
160 struct llog_update_record *lur;
161 struct sub_thandle *lst;
165 if (top_th->tt_update_records == NULL)
168 tur = top_th->tt_update_records;
170 /* merge the parameters and updates into one buffer */
171 rc = merge_params_updates_buf(env, tur);
175 lur = tur->tur_update_records;
176 /* Dump updates to debug log */
177 update_records_dump(&lur->lur_update_rec, D_INFO, true);
179 /* Init update record header */
180 lur->lur_hdr.lrh_len = llog_update_record_size(lur);
181 lur->lur_hdr.lrh_type = UPDATE_REC;
183 list_for_each_entry(lst, &top_th->tt_sub_thandle_list, st_sub_list) {
184 if (!lst->st_record_update)
186 rc = sub_updates_write(env, lur, lst);
198 * Create the top transaction.
200 * Create the top transaction on the master device. It will create a top
201 * thandle and a sub thandle on the master device.
203 * \param[in] env execution environment
204 * \param[in] master_dev master_dev the top thandle will be created
206 * \retval pointer to the created thandle.
207 * \retval ERR_PTR(errno) if creation failed.
210 top_trans_create(const struct lu_env *env, struct dt_device *master_dev)
212 struct top_thandle *top_th;
213 struct thandle *child_th;
215 OBD_ALLOC_GFP(top_th, sizeof(*top_th), __GFP_IO);
217 return ERR_PTR(-ENOMEM);
219 child_th = dt_trans_create(env, master_dev);
220 if (IS_ERR(child_th)) {
221 OBD_FREE_PTR(top_th);
225 top_th->tt_magic = TOP_THANDLE_MAGIC;
226 top_th->tt_master_sub_thandle = child_th;
227 child_th->th_top = &top_th->tt_super;
229 top_th->tt_update_records = NULL;
230 top_th->tt_super.th_top = &top_th->tt_super;
231 INIT_LIST_HEAD(&top_th->tt_sub_thandle_list);
233 return &top_th->tt_super;
235 EXPORT_SYMBOL(top_trans_create);
238 * start the top transaction.
240 * Start all of its sub transactions, then start master sub transaction.
242 * \param[in] env execution environment
243 * \param[in] master_dev master_dev the top thandle will be start
244 * \param[in] th top thandle
246 * \retval 0 if transaction start succeeds.
247 * \retval negative errno if start fails.
249 int top_trans_start(const struct lu_env *env, struct dt_device *master_dev,
252 struct top_thandle *top_th = container_of(th, struct top_thandle,
254 struct sub_thandle *lst;
257 LASSERT(top_th->tt_magic == TOP_THANDLE_MAGIC);
258 rc = check_and_prepare_update_record(env, th);
261 /* Check if needs to write updates */
262 list_for_each_entry(lst, &top_th->tt_sub_thandle_list, st_sub_list) {
263 struct llog_update_record *record;
265 if (!lst->st_record_update)
268 record = top_th->tt_update_records->tur_update_records;
269 rc = sub_declare_updates_write(env, record, lst);
274 list_for_each_entry(lst, &top_th->tt_sub_thandle_list, st_sub_list) {
275 lst->st_sub_th->th_sync = th->th_sync;
276 lst->st_sub_th->th_local = th->th_local;
277 rc = dt_trans_start(env, lst->st_sub_th->th_dev,
283 top_th->tt_master_sub_thandle->th_local = th->th_local;
284 top_th->tt_master_sub_thandle->th_sync = th->th_sync;
286 return dt_trans_start(env, master_dev, top_th->tt_master_sub_thandle);
288 EXPORT_SYMBOL(top_trans_start);
291 * Stop the top transaction.
293 * Stop the transaction on the master device first, then stop transactions
294 * on other sub devices.
296 * \param[in] env execution environment
297 * \param[in] master_dev master_dev the top thandle will be created
298 * \param[in] th top thandle
300 * \retval 0 if stop transaction succeeds.
301 * \retval negative errno if stop transaction fails.
303 int top_trans_stop(const struct lu_env *env, struct dt_device *master_dev,
306 struct top_thandle *top_th = container_of(th, struct top_thandle,
308 struct thandle_update_records *tur = top_th->tt_update_records;
309 struct sub_thandle *lst;
313 /* Note: we always need walk through all of sub_transaction to do
314 * transaction stop to release the resource here */
315 if (tur != NULL && th->th_result == 0) {
316 rc = top_updates_write(env, top_th);
318 CERROR("%s: cannot write updates: rc = %d\n",
319 master_dev->dd_lu_dev.ld_obd->obd_name, rc);
320 /* Still need call dt_trans_stop to release resources
321 * holding by the transaction */
323 top_th->tt_update_records = NULL;
326 LASSERT(top_th->tt_magic == TOP_THANDLE_MAGIC);
328 top_th->tt_master_sub_thandle->th_local = th->th_local;
329 top_th->tt_master_sub_thandle->th_sync = th->th_sync;
330 top_th->tt_master_sub_thandle->th_result = th->th_result;
331 /* To avoid sending RPC while holding thandle, it always stop local
332 * transaction first, then other sub thandle */
333 rc = dt_trans_stop(env, master_dev, top_th->tt_master_sub_thandle);
335 list_for_each_entry(lst, &top_th->tt_sub_thandle_list, st_sub_list) {
339 lst->st_sub_th->th_result = rc;
341 lst->st_sub_th->th_result = th->th_result;
342 lst->st_sub_th->th_sync = th->th_sync;
343 lst->st_sub_th->th_local = th->th_local;
344 rc2 = dt_trans_stop(env, lst->st_sub_th->th_dev,
346 if (unlikely(rc2 < 0 && rc == 0))
350 top_thandle_destroy(top_th);
354 EXPORT_SYMBOL(top_trans_stop);
359 * Get sub thandle from the top thandle according to the sub dt_device.
361 * \param[in] env execution environment
362 * \param[in] th thandle on the top layer.
363 * \param[in] sub_dt sub dt_device used to get sub transaction
365 * \retval thandle of sub transaction if succeed
366 * \retval PTR_ERR(errno) if failed
368 struct thandle *thandle_get_sub_by_dt(const struct lu_env *env,
370 struct dt_device *sub_dt)
372 struct sub_thandle *lst;
373 struct top_thandle *top_th;
374 struct thandle *sub_th;
377 top_th = container_of(th, struct top_thandle, tt_super);
378 LASSERT(top_th->tt_magic == TOP_THANDLE_MAGIC);
379 LASSERT(top_th->tt_master_sub_thandle != NULL);
380 if (likely(sub_dt == top_th->tt_master_sub_thandle->th_dev))
381 RETURN(top_th->tt_master_sub_thandle);
383 /* Find or create the transaction in tt_trans_list, since there is
384 * always only one thread access the list, so no need lock here */
385 list_for_each_entry(lst, &top_th->tt_sub_thandle_list, st_sub_list) {
386 if (lst->st_sub_th->th_dev == sub_dt)
387 RETURN(lst->st_sub_th);
390 sub_th = dt_trans_create(env, sub_dt);
394 /* XXX all of mixed transaction (see struct th_handle) will
395 * be synchronized until async update is done */
401 dt_trans_stop(env, sub_dt, sub_th);
402 RETURN(ERR_PTR(-ENOMEM));
405 INIT_LIST_HEAD(&lst->st_sub_list);
406 lst->st_sub_th = sub_th;
407 list_add(&lst->st_sub_list, &top_th->tt_sub_thandle_list);
408 lst->st_record_update = 1;
412 EXPORT_SYMBOL(thandle_get_sub_by_dt);
415 * Top thandle destroy
417 * Destroy the top thandle and all of its sub thandle.
419 * \param[in] top_th top thandle to be destroyed.
421 void top_thandle_destroy(struct top_thandle *top_th)
423 struct sub_thandle *st;
424 struct sub_thandle *tmp;
426 LASSERT(top_th->tt_magic == TOP_THANDLE_MAGIC);
427 list_for_each_entry_safe(st, tmp, &top_th->tt_sub_thandle_list,
429 list_del(&st->st_sub_list);
432 OBD_FREE_PTR(top_th);
434 EXPORT_SYMBOL(top_thandle_destroy);