4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2011, 2016, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
33 #ifndef _LUSTRE_LU_TARGET_H
34 #define _LUSTRE_LU_TARGET_H
36 #include <dt_object.h>
37 #include <lustre_export.h>
38 #include <lustre_update.h>
39 #include <lustre_disk.h>
40 #include <lustre_lfsck.h>
42 /* Each one represents a distribute transaction replay
43 * operation, and updates on each MDTs are linked to
45 struct distribute_txn_replay_req {
46 /* update record, may be vmalloc'd */
47 struct llog_update_record *dtrq_lur;
50 /* linked to the distribute transaction replay
51 * list (tdtd_replay_list) */
52 struct list_head dtrq_list;
53 __u64 dtrq_master_transno;
57 /* all of sub updates are linked here */
58 struct list_head dtrq_sub_list;
59 spinlock_t dtrq_sub_list_lock;
61 /* If the local update has been executed during replay */
62 __u32 dtrq_local_update_executed:1;
65 /* Each one represents a sub replay item under a distribute
66 * transaction. A distribute transaction will be operated in
67 * two or more MDTs, and updates on each MDT will be represented
68 * by this structure */
69 struct distribute_txn_replay_req_sub {
70 __u32 dtrqs_mdt_index;
72 /* All of cookies for the update will be linked here */
73 spinlock_t dtrqs_cookie_list_lock;
74 struct list_head dtrqs_cookie_list;
75 struct list_head dtrqs_list;
78 struct target_distribute_txn_data;
79 typedef int (*distribute_txn_replay_handler_t)(struct lu_env *env,
80 struct target_distribute_txn_data *tdtd,
81 struct distribute_txn_replay_req *dtrq);
82 typedef char *(*target_show_update_logs_retrievers_t)(void *data, int *size,
84 struct target_distribute_txn_data {
85 /* Distribution ID is used to identify updates log on different
86 * MDTs for one operation */
87 spinlock_t tdtd_batchid_lock;
89 struct lu_target *tdtd_lut;
90 struct dt_object *tdtd_batchid_obj;
91 struct dt_device *tdtd_dt;
93 /* Committed batchid for distribute transaction */
94 __u64 tdtd_committed_batchid;
96 /* List for distribute transaction */
97 struct list_head tdtd_list;
99 /* Threads to manage distribute transaction */
100 wait_queue_head_t tdtd_commit_thread_waitq;
101 atomic_t tdtd_refcount;
103 /* recovery update */
104 distribute_txn_replay_handler_t tdtd_replay_handler;
105 struct list_head tdtd_replay_list;
106 struct list_head tdtd_replay_finish_list;
107 spinlock_t tdtd_replay_list_lock;
108 /* last replay update transno */
109 __u32 tdtd_replay_ready:1;
111 /* Manage the llog recovery threads */
112 atomic_t tdtd_recovery_threads_count;
113 wait_queue_head_t tdtd_recovery_threads_waitq;
114 target_show_update_logs_retrievers_t
115 tdtd_show_update_logs_retrievers;
116 void *tdtd_show_retrievers_cbdata;
120 struct obd_device *lut_obd;
121 struct dt_device *lut_bottom;
123 struct target_distribute_txn_data *lut_tdtd;
124 struct ptlrpc_thread lut_tdtd_commit_thread;
126 /* supported opcodes and handlers for this target */
127 struct tgt_opc_slice *lut_slice;
128 __u32 lut_reply_fail_id;
129 __u32 lut_request_fail_id;
132 rwlock_t lut_sptlrpc_lock;
133 struct sptlrpc_rule_set lut_sptlrpc_rset;
134 spinlock_t lut_flags_lock;
135 unsigned int lut_syncjournal:1,
136 lut_sync_lock_cancel:2,
138 lut_no_reconstruct:1;
139 /** last_rcvd file */
140 struct dt_object *lut_last_rcvd;
141 /* transaction callbacks */
142 struct dt_txn_callback lut_txn_cb;
143 /** server data in last_rcvd file */
144 struct lr_server_data lut_lsd;
145 /** Server last transaction number */
146 __u64 lut_last_transno;
147 /** Lock protecting last transaction number */
148 spinlock_t lut_translock;
149 /** Lock protecting client bitmap */
150 spinlock_t lut_client_bitmap_lock;
151 /** Bitmap of known clients */
152 unsigned long *lut_client_bitmap;
153 /* Number of clients supporting multiple modify RPCs
154 * recorded in the bitmap */
155 atomic_t lut_num_clients;
156 /* Client generation to identify client slot reuse */
157 atomic_t lut_client_generation;
158 /** reply_data file */
159 struct dt_object *lut_reply_data;
160 /** Bitmap of used slots in the reply data file */
161 unsigned long **lut_reply_bitmap;
162 /** target sync count, used for debug & test */
163 atomic_t lut_sync_count;
165 /** cross MDT locks which should trigger Sync-on-Lock-Cancel */
166 spinlock_t lut_slc_locks_guard;
167 struct list_head lut_slc_locks;
170 /* number of slots in reply bitmap */
171 #define LUT_REPLY_SLOTS_PER_CHUNK (1<<20)
172 #define LUT_REPLY_SLOTS_MAX_CHUNKS 16
177 struct tg_reply_data {
178 /** chain of reply data anchored in tg_export_data */
179 struct list_head trd_list;
180 /** copy of on-disk reply data */
181 struct lsd_reply_data trd_reply;
182 /** versions for Version Based Recovery */
183 __u64 trd_pre_versions[4];
184 /** slot index in reply_data file */
186 /** tag the client used */
190 extern struct lu_context_key tgt_session_key;
192 struct tgt_session_info {
194 * The following members will be filled explicitly
195 * with specific data in tgt_ses_init().
197 struct req_capsule *tsi_pill;
200 * Lock request for "habeo clavis" operations.
202 struct ldlm_request *tsi_dlm_req;
204 /* although we have export in req, there are cases when it is not
205 * available, e.g. closing files upon export destroy */
206 struct obd_export *tsi_exp;
207 const struct lu_env *tsi_env;
208 struct lu_target *tsi_tgt;
210 const struct mdt_body *tsi_mdt_body;
211 struct ost_body *tsi_ost_body;
212 struct lu_object *tsi_corpus;
214 struct lu_fid tsi_fid;
215 struct ldlm_res_id tsi_resid;
217 /* object affected by VBR, for last_rcvd_update */
218 struct dt_object *tsi_vbr_obj;
219 /* opdata for mdt_reint_open(), has the same value as
220 * ldlm_reply:lock_policy_res1. The tgt_update_last_rcvd() stores
221 * this value onto disk for recovery when tgt_txn_stop_cb() is called.
226 * Additional fail id that can be set by handler.
228 int tsi_reply_fail_id;
229 bool tsi_preprocessed;
236 __u32 tsi_client_gen;
239 static inline struct tgt_session_info *tgt_ses_info(const struct lu_env *env)
241 struct tgt_session_info *tsi;
243 LASSERT(env->le_ses != NULL);
244 tsi = lu_context_key_get(env->le_ses, &tgt_session_key);
249 static inline void tgt_vbr_obj_set(const struct lu_env *env,
250 struct dt_object *obj)
252 struct tgt_session_info *tsi;
254 if (env->le_ses != NULL) {
255 tsi = tgt_ses_info(env);
256 tsi->tsi_vbr_obj = obj;
260 static inline void tgt_opdata_set(const struct lu_env *env, __u64 flags)
262 struct tgt_session_info *tsi;
264 if (env->le_ses != NULL) {
265 tsi = tgt_ses_info(env);
266 tsi->tsi_opdata |= flags;
270 static inline void tgt_opdata_clear(const struct lu_env *env, __u64 flags)
272 struct tgt_session_info *tsi;
274 if (env->le_ses != NULL) {
275 tsi = tgt_ses_info(env);
276 tsi->tsi_opdata &= ~flags;
281 * Generic unified target support.
283 enum tgt_handler_flags {
285 * struct *_body is passed in the incoming message, and object
286 * identified by this fid exists on disk.
288 * "habeo corpus" == "I have a body"
290 HABEO_CORPUS = (1 << 0),
292 * struct ldlm_request is passed in the incoming message.
294 * "habeo clavis" == "I have a key"
296 HABEO_CLAVIS = (1 << 1),
298 * this request has fixed reply format, so that reply message can be
299 * packed by generic code.
301 * "habeo refero" == "I have a reply"
303 HABEO_REFERO = (1 << 2),
305 * this request will modify something, so check whether the file system
306 * is readonly or not, then return -EROFS to client asap if necessary.
308 * "mutabor" == "I shall modify"
314 /* The name of this handler. */
316 /* Fail id, check at the beginning */
320 /* Flags in enum tgt_handler_flags */
322 /* Request version for this opcode */
324 /* Handler function */
325 int (*th_act)(struct tgt_session_info *tsi);
326 /* Handler function for high priority requests */
327 void (*th_hp)(struct tgt_session_info *tsi);
328 /* Request format for this request */
329 const struct req_format *th_fmt;
332 struct tgt_opc_slice {
333 __u32 tos_opc_start; /* First op code */
334 __u32 tos_opc_end; /* Last op code */
335 struct tgt_handler *tos_hs; /* Registered handler */
338 static inline struct ptlrpc_request *tgt_ses_req(struct tgt_session_info *tsi)
340 return tsi->tsi_pill ? tsi->tsi_pill->rc_req : NULL;
343 static inline __u64 tgt_conn_flags(struct tgt_session_info *tsi)
345 LASSERT(tsi->tsi_exp);
346 return exp_connect_flags(tsi->tsi_exp);
349 static inline int req_is_replay(struct ptlrpc_request *req)
351 LASSERT(req->rq_reqmsg);
352 return !!(lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY);
355 static inline bool tgt_is_multimodrpcs_client(struct obd_export *exp)
357 return exp_connect_flags(exp) & OBD_CONNECT_MULTIMODRPCS;
361 /* target/tgt_handler.c */
362 int tgt_request_handle(struct ptlrpc_request *req);
363 char *tgt_name(struct lu_target *tgt);
364 void tgt_counter_incr(struct obd_export *exp, int opcode);
365 int tgt_connect_check_sptlrpc(struct ptlrpc_request *req,
366 struct obd_export *exp);
367 int tgt_adapt_sptlrpc_conf(struct lu_target *tgt);
368 int tgt_connect(struct tgt_session_info *tsi);
369 int tgt_disconnect(struct tgt_session_info *uti);
370 int tgt_obd_ping(struct tgt_session_info *tsi);
371 int tgt_enqueue(struct tgt_session_info *tsi);
372 int tgt_convert(struct tgt_session_info *tsi);
373 int tgt_bl_callback(struct tgt_session_info *tsi);
374 int tgt_cp_callback(struct tgt_session_info *tsi);
375 int tgt_llog_open(struct tgt_session_info *tsi);
376 int tgt_llog_close(struct tgt_session_info *tsi);
377 int tgt_llog_destroy(struct tgt_session_info *tsi);
378 int tgt_llog_read_header(struct tgt_session_info *tsi);
379 int tgt_llog_next_block(struct tgt_session_info *tsi);
380 int tgt_llog_prev_block(struct tgt_session_info *tsi);
381 int tgt_sec_ctx_init(struct tgt_session_info *tsi);
382 int tgt_sec_ctx_init_cont(struct tgt_session_info *tsi);
383 int tgt_sec_ctx_fini(struct tgt_session_info *tsi);
384 int tgt_sendpage(struct tgt_session_info *tsi, struct lu_rdpg *rdpg, int nob);
385 int tgt_send_buffer(struct tgt_session_info *tsi, struct lu_rdbuf *rdbuf);
386 int tgt_validate_obdo(struct tgt_session_info *tsi, struct obdo *oa);
387 int tgt_sync(const struct lu_env *env, struct lu_target *tgt,
388 struct dt_object *obj, __u64 start, __u64 end);
390 int tgt_io_thread_init(struct ptlrpc_thread *thread);
391 void tgt_io_thread_done(struct ptlrpc_thread *thread);
393 int tgt_extent_lock(struct ldlm_namespace *ns, struct ldlm_res_id *res_id,
394 __u64 start, __u64 end, struct lustre_handle *lh,
395 int mode, __u64 *flags);
396 void tgt_extent_unlock(struct lustre_handle *lh, enum ldlm_mode mode);
397 int tgt_brw_lock(struct ldlm_namespace *ns, struct ldlm_res_id *res_id,
398 struct obd_ioobj *obj, struct niobuf_remote *nb,
399 struct lustre_handle *lh, enum ldlm_mode mode);
400 void tgt_brw_unlock(struct obd_ioobj *obj, struct niobuf_remote *niob,
401 struct lustre_handle *lh, enum ldlm_mode mode);
402 int tgt_brw_read(struct tgt_session_info *tsi);
403 int tgt_brw_write(struct tgt_session_info *tsi);
404 int tgt_hpreq_handler(struct ptlrpc_request *req);
405 void tgt_register_lfsck_in_notify(int (*notify)(const struct lu_env *,
407 struct lfsck_request *,
409 void tgt_register_lfsck_query(int (*query)(const struct lu_env *,
411 struct lfsck_request *,
412 struct lfsck_reply *,
413 struct lfsck_query *));
414 bool req_can_reconstruct(struct ptlrpc_request *req, struct tg_reply_data *trd);
416 extern struct tgt_handler tgt_sec_ctx_handlers[];
417 extern struct tgt_handler tgt_lfsck_handlers[];
418 extern struct tgt_handler tgt_obd_handlers[];
419 extern struct tgt_handler tgt_dlm_handlers[];
420 extern struct tgt_handler tgt_llog_handlers[];
421 extern struct tgt_handler tgt_out_handlers[];
422 extern struct tgt_handler fld_handlers[];
423 extern struct tgt_handler seq_handlers[];
425 typedef void (*tgt_cb_t)(struct lu_target *lut, __u64 transno,
426 void *data, int err);
427 struct tgt_commit_cb {
428 tgt_cb_t tgt_cb_func;
432 int tgt_hpreq_handler(struct ptlrpc_request *req);
434 /* target/tgt_main.c */
435 void tgt_boot_epoch_update(struct lu_target *lut);
436 void tgt_save_slc_lock(struct lu_target *lut, struct ldlm_lock *lock,
438 void tgt_discard_slc_lock(struct lu_target *lut, struct ldlm_lock *lock);
439 int tgt_init(const struct lu_env *env, struct lu_target *lut,
440 struct obd_device *obd, struct dt_device *dt,
441 struct tgt_opc_slice *slice,
442 int request_fail_id, int reply_fail_id);
443 void tgt_fini(const struct lu_env *env, struct lu_target *lut);
444 int tgt_client_alloc(struct obd_export *exp);
445 void tgt_client_free(struct obd_export *exp);
446 int tgt_client_del(const struct lu_env *env, struct obd_export *exp);
447 int tgt_client_add(const struct lu_env *env, struct obd_export *exp, int);
448 int tgt_client_new(const struct lu_env *env, struct obd_export *exp);
449 int tgt_server_data_update(const struct lu_env *env, struct lu_target *tg,
451 int tgt_reply_data_init(const struct lu_env *env, struct lu_target *tgt);
452 bool tgt_lookup_reply(struct ptlrpc_request *req, struct tg_reply_data *trd);
453 int tgt_add_reply_data(const struct lu_env *env, struct lu_target *tgt,
454 struct tg_export_data *ted, struct tg_reply_data *trd,
455 struct thandle *th, bool update_lrd_file);
456 struct tg_reply_data *tgt_lookup_reply_by_xid(struct tg_export_data *ted,
459 /* target/update_trans.c */
460 int distribute_txn_init(const struct lu_env *env,
461 struct lu_target *lut,
462 struct target_distribute_txn_data *tdtd,
464 void distribute_txn_fini(const struct lu_env *env,
465 struct target_distribute_txn_data *tdtd);
467 /* target/update_recovery.c */
468 int insert_update_records_to_replay_list(struct target_distribute_txn_data *,
469 struct llog_update_record *,
470 struct llog_cookie *, __u32);
471 void dtrq_list_dump(struct target_distribute_txn_data *tdtd,
473 void dtrq_list_destroy(struct target_distribute_txn_data *tdtd);
474 int distribute_txn_replay_handle(struct lu_env *env,
475 struct target_distribute_txn_data *tdtd,
476 struct distribute_txn_replay_req *dtrq);
477 __u64 distribute_txn_get_next_transno(struct target_distribute_txn_data *tdtd);
478 struct distribute_txn_replay_req *
479 distribute_txn_get_next_req(struct target_distribute_txn_data *tdtd);
480 void dtrq_destroy(struct distribute_txn_replay_req *dtrq);
481 struct distribute_txn_replay_req_sub *
482 dtrq_sub_lookup(struct distribute_txn_replay_req *dtrq, __u32 mdt_index);
483 struct distribute_txn_replay_req *
484 distribute_txn_lookup_finish_list(struct target_distribute_txn_data *tdtd,
486 bool is_req_replayed_by_update(struct ptlrpc_request *req);
491 static inline int err_serious(int rc)
494 return -(-rc | ESERIOUS);
497 static inline int clear_serious(int rc)
500 rc = -(-rc & ~ESERIOUS);
504 static inline int is_serious(int rc)
506 return (rc < 0 && -rc & ESERIOUS);
510 * Unified target generic handers macros and generic functions.
512 #define TGT_RPC_HANDLER_HP(base, flags, opc, fn, hp, fmt, version) \
515 .th_fail_id = OBD_FAIL_ ## opc ## _NET, \
520 .th_version = version, \
523 #define TGT_RPC_HANDLER(base, flags, opc, fn, fmt, version) \
524 TGT_RPC_HANDLER_HP(base, flags, opc, fn, NULL, fmt, version)
526 /* MDT Request with a format known in advance */
527 #define TGT_MDT_HDL(flags, name, fn) \
528 TGT_RPC_HANDLER(MDS_FIRST_OPC, flags, name, fn, &RQF_ ## name, \
530 /* Request with a format we do not yet know */
531 #define TGT_MDT_HDL_VAR(flags, name, fn) \
532 TGT_RPC_HANDLER(MDS_FIRST_OPC, flags, name, fn, NULL, \
535 /* OST Request with a format known in advance */
536 #define TGT_OST_HDL(flags, name, fn) \
537 TGT_RPC_HANDLER(OST_FIRST_OPC, flags, name, fn, &RQF_ ## name, \
539 #define TGT_OST_HDL_HP(flags, name, fn, hp) \
540 TGT_RPC_HANDLER_HP(OST_FIRST_OPC, flags, name, fn, hp, \
541 &RQF_ ## name, LUSTRE_OST_VERSION)
543 /* MGS request with a format known in advance */
544 #define TGT_MGS_HDL(flags, name, fn) \
545 TGT_RPC_HANDLER(MGS_FIRST_OPC, flags, name, fn, &RQF_ ## name, \
547 #define TGT_MGS_HDL_VAR(flags, name, fn) \
548 TGT_RPC_HANDLER(MGS_FIRST_OPC, flags, name, fn, NULL, \
552 * OBD handler macros and generic functions.
554 #define TGT_OBD_HDL(flags, name, fn) \
555 TGT_RPC_HANDLER(OBD_FIRST_OPC, flags, name, fn, &RQF_ ## name, \
557 #define TGT_OBD_HDL_VAR(flags, name, fn) \
558 TGT_RPC_HANDLER(OBD_FIRST_OPC, flags, name, fn, NULL, \
562 * DLM handler macros and generic functions.
564 #define TGT_DLM_HDL_VAR(flags, name, fn) \
565 TGT_RPC_HANDLER(LDLM_FIRST_OPC, flags, name, fn, NULL, \
567 #define TGT_DLM_HDL(flags, name, fn) \
568 TGT_RPC_HANDLER(LDLM_FIRST_OPC, flags, name, fn, &RQF_ ## name, \
572 * LLOG handler macros and generic functions.
574 #define TGT_LLOG_HDL_VAR(flags, name, fn) \
575 TGT_RPC_HANDLER(LLOG_FIRST_OPC, flags, name, fn, NULL, \
577 #define TGT_LLOG_HDL(flags, name, fn) \
578 TGT_RPC_HANDLER(LLOG_FIRST_OPC, flags, name, fn, &RQF_ ## name, \
582 * Sec context handler macros and generic functions.
584 #define TGT_SEC_HDL_VAR(flags, name, fn) \
585 TGT_RPC_HANDLER(SEC_FIRST_OPC, flags, name, fn, NULL, \
588 #define TGT_QUOTA_HDL(flags, name, fn) \
589 TGT_RPC_HANDLER(QUOTA_DQACQ, flags, name, fn, &RQF_ ## name, \
592 /* Sequence service handlers */
593 #define TGT_SEQ_HDL(flags, name, fn) \
594 TGT_RPC_HANDLER(SEQ_QUERY, flags, name, fn, &RQF_ ## name, \
597 /* FID Location Database handlers */
598 #define TGT_FLD_HDL_VAR(flags, name, fn) \
599 TGT_RPC_HANDLER(FLD_QUERY, flags, name, fn, NULL, \
603 #define TGT_LFSCK_HDL(flags, name, fn) \
604 TGT_RPC_HANDLER(LFSCK_FIRST_OPC, flags, name, fn, \
605 &RQF_ ## name, LUSTRE_OBD_VERSION)
607 /* Request with a format known in advance */
608 #define TGT_UPDATE_HDL(flags, name, fn) \
609 TGT_RPC_HANDLER(OUT_UPDATE, flags, name, fn, &RQF_ ## name, \
612 #endif /* __LUSTRE_LU_TARGET_H */