4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2014, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
37 #ifndef _LUSTRE_LU_TARGET_H
38 #define _LUSTRE_LU_TARGET_H
40 #include <dt_object.h>
41 #include <lustre_export.h>
42 #include <lustre_update.h>
43 #include <lustre_disk.h>
44 #include <lustre_lfsck.h>
46 /* Each one represents a distribute transaction replay
47 * operation, and updates on each MDTs are linked to
49 struct distribute_txn_replay_req {
51 struct llog_update_record *dtrq_lur;
54 /* linked to the distribute transaction replay
55 * list (tdtd_replay_list) */
56 struct list_head dtrq_list;
58 /* all of sub updates are linked here */
59 struct list_head dtrq_sub_list;
60 spinlock_t dtrq_sub_list_lock;
63 /* Each one represents a sub replay item under a distribute
64 * transaction. A distribute transaction will be operated in
65 * two or more MDTs, and updates on each MDT will be represented
66 * by this structure */
67 struct distribute_txn_replay_req_sub {
68 __u32 dtrqs_mdt_index;
70 /* All of cookies for the update will be linked here */
71 spinlock_t dtrqs_cookie_list_lock;
72 struct list_head dtrqs_cookie_list;
73 struct list_head dtrqs_list;
76 struct target_distribute_txn_data;
77 typedef int (*distribute_txn_replay_handler_t)(struct lu_env *env,
78 struct target_distribute_txn_data *tdtd,
79 struct distribute_txn_replay_req *dtrq);
80 struct target_distribute_txn_data {
81 /* Distribution ID is used to identify updates log on different
82 * MDTs for one operation */
83 spinlock_t tdtd_batchid_lock;
85 struct lu_target *tdtd_lut;
86 struct dt_object *tdtd_batchid_obj;
87 struct dt_device *tdtd_dt;
89 /* Committed batchid for distribute transaction */
90 __u64 tdtd_committed_batchid;
92 /* List for distribute transaction */
93 struct list_head tdtd_list;
95 /* Threads to manage distribute transaction */
96 wait_queue_head_t tdtd_commit_thread_waitq;
97 atomic_t tdtd_refcount;
100 distribute_txn_replay_handler_t tdtd_replay_handler;
101 struct list_head tdtd_replay_list;
102 spinlock_t tdtd_replay_list_lock;
103 /* last replay update transno */
104 __u64 tdtd_last_update_transno;
105 __u32 tdtd_replay_ready:1;
110 struct obd_device *lut_obd;
111 struct dt_device *lut_bottom;
113 struct target_distribute_txn_data *lut_tdtd;
114 struct ptlrpc_thread lut_tdtd_commit_thread;
116 /* supported opcodes and handlers for this target */
117 struct tgt_opc_slice *lut_slice;
118 __u32 lut_reply_fail_id;
119 __u32 lut_request_fail_id;
122 rwlock_t lut_sptlrpc_lock;
123 struct sptlrpc_rule_set lut_sptlrpc_rset;
124 spinlock_t lut_flags_lock;
126 unsigned int lut_syncjournal:1,
127 lut_sync_lock_cancel:2,
129 lut_no_reconstruct:1;
130 /** last_rcvd file */
131 struct dt_object *lut_last_rcvd;
132 /* transaction callbacks */
133 struct dt_txn_callback lut_txn_cb;
134 /** server data in last_rcvd file */
135 struct lr_server_data lut_lsd;
136 /** Server last transaction number */
137 __u64 lut_last_transno;
138 /** Lock protecting last transaction number */
139 spinlock_t lut_translock;
140 /** Lock protecting client bitmap */
141 spinlock_t lut_client_bitmap_lock;
142 /** Bitmap of known clients */
143 unsigned long *lut_client_bitmap;
144 /* Number of clients supporting multiple modify RPCs
145 * recorded in the bitmap */
146 atomic_t lut_num_clients;
147 /* Client generation to identify client slot reuse */
148 atomic_t lut_client_generation;
149 /** reply_data file */
150 struct dt_object *lut_reply_data;
151 /** Bitmap of used slots in the reply data file */
152 unsigned long **lut_reply_bitmap;
155 /* number of slots in reply bitmap */
156 #define LUT_REPLY_SLOTS_PER_CHUNK (1<<20)
157 #define LUT_REPLY_SLOTS_MAX_CHUNKS 16
162 struct tg_reply_data {
163 /** chain of reply data anchored in tg_export_data */
164 struct list_head trd_list;
165 /** copy of on-disk reply data */
166 struct lsd_reply_data trd_reply;
167 /** versions for Version Based Recovery */
168 __u64 trd_pre_versions[4];
169 /** slot index in reply_data file */
171 /** tag the client used */
175 extern struct lu_context_key tgt_session_key;
177 struct tgt_session_info {
179 * The following members will be filled explicitly
180 * with specific data in tgt_ses_init().
182 struct req_capsule *tsi_pill;
185 * Lock request for "habeo clavis" operations.
187 struct ldlm_request *tsi_dlm_req;
189 /* although we have export in req, there are cases when it is not
190 * available, e.g. closing files upon export destroy */
191 struct obd_export *tsi_exp;
192 const struct lu_env *tsi_env;
193 struct lu_target *tsi_tgt;
195 const struct mdt_body *tsi_mdt_body;
196 struct ost_body *tsi_ost_body;
197 struct lu_object *tsi_corpus;
199 struct lu_fid tsi_fid;
200 struct ldlm_res_id tsi_resid;
202 /* object affected by VBR, for last_rcvd_update */
203 struct dt_object *tsi_vbr_obj;
204 /* opdata for mdt_reint_open(), has the same value as
205 * ldlm_reply:lock_policy_res1. The tgt_update_last_rcvd() stores
206 * this value onto disk for recovery when tgt_txn_stop_cb() is called.
211 * Additional fail id that can be set by handler.
213 int tsi_reply_fail_id;
214 bool tsi_preprocessed;
219 static inline struct tgt_session_info *tgt_ses_info(const struct lu_env *env)
221 struct tgt_session_info *tsi;
223 LASSERT(env->le_ses != NULL);
224 tsi = lu_context_key_get(env->le_ses, &tgt_session_key);
229 static inline void tgt_vbr_obj_set(const struct lu_env *env,
230 struct dt_object *obj)
232 struct tgt_session_info *tsi;
234 if (env->le_ses != NULL) {
235 tsi = tgt_ses_info(env);
236 tsi->tsi_vbr_obj = obj;
240 static inline void tgt_opdata_set(const struct lu_env *env, __u64 flags)
242 struct tgt_session_info *tsi;
244 if (env->le_ses != NULL) {
245 tsi = tgt_ses_info(env);
246 tsi->tsi_opdata |= flags;
250 static inline void tgt_opdata_clear(const struct lu_env *env, __u64 flags)
252 struct tgt_session_info *tsi;
254 if (env->le_ses != NULL) {
255 tsi = tgt_ses_info(env);
256 tsi->tsi_opdata &= ~flags;
261 * Generic unified target support.
263 enum tgt_handler_flags {
265 * struct *_body is passed in the incoming message, and object
266 * identified by this fid exists on disk.
268 * "habeo corpus" == "I have a body"
270 HABEO_CORPUS = (1 << 0),
272 * struct ldlm_request is passed in the incoming message.
274 * "habeo clavis" == "I have a key"
276 HABEO_CLAVIS = (1 << 1),
278 * this request has fixed reply format, so that reply message can be
279 * packed by generic code.
281 * "habeo refero" == "I have a reply"
283 HABEO_REFERO = (1 << 2),
285 * this request will modify something, so check whether the file system
286 * is readonly or not, then return -EROFS to client asap if necessary.
288 * "mutabor" == "I shall modify"
294 /* The name of this handler. */
296 /* Fail id, check at the beginning */
300 /* Flags in enum tgt_handler_flags */
302 /* Request version for this opcode */
304 /* Handler function */
305 int (*th_act)(struct tgt_session_info *tsi);
306 /* Handler function for high priority requests */
307 void (*th_hp)(struct tgt_session_info *tsi);
308 /* Request format for this request */
309 const struct req_format *th_fmt;
312 struct tgt_opc_slice {
313 __u32 tos_opc_start; /* First op code */
314 __u32 tos_opc_end; /* Last op code */
315 struct tgt_handler *tos_hs; /* Registered handler */
318 static inline struct ptlrpc_request *tgt_ses_req(struct tgt_session_info *tsi)
320 return tsi->tsi_pill ? tsi->tsi_pill->rc_req : NULL;
323 static inline __u64 tgt_conn_flags(struct tgt_session_info *tsi)
325 LASSERT(tsi->tsi_exp);
326 return exp_connect_flags(tsi->tsi_exp);
329 static inline int req_is_replay(struct ptlrpc_request *req)
331 LASSERT(req->rq_reqmsg);
332 return !!(lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY);
335 static inline bool tgt_is_multimodrpcs_client(struct obd_export *exp)
337 return exp_connect_flags(exp) & OBD_CONNECT_MULTIMODRPCS;
341 /* target/tgt_handler.c */
342 int tgt_request_handle(struct ptlrpc_request *req);
343 char *tgt_name(struct lu_target *tgt);
344 void tgt_counter_incr(struct obd_export *exp, int opcode);
345 int tgt_connect_check_sptlrpc(struct ptlrpc_request *req,
346 struct obd_export *exp);
347 int tgt_adapt_sptlrpc_conf(struct lu_target *tgt, int initial);
348 int tgt_connect(struct tgt_session_info *tsi);
349 int tgt_disconnect(struct tgt_session_info *uti);
350 int tgt_obd_ping(struct tgt_session_info *tsi);
351 int tgt_enqueue(struct tgt_session_info *tsi);
352 int tgt_convert(struct tgt_session_info *tsi);
353 int tgt_bl_callback(struct tgt_session_info *tsi);
354 int tgt_cp_callback(struct tgt_session_info *tsi);
355 int tgt_llog_open(struct tgt_session_info *tsi);
356 int tgt_llog_close(struct tgt_session_info *tsi);
357 int tgt_llog_destroy(struct tgt_session_info *tsi);
358 int tgt_llog_read_header(struct tgt_session_info *tsi);
359 int tgt_llog_next_block(struct tgt_session_info *tsi);
360 int tgt_llog_prev_block(struct tgt_session_info *tsi);
361 int tgt_sec_ctx_init(struct tgt_session_info *tsi);
362 int tgt_sec_ctx_init_cont(struct tgt_session_info *tsi);
363 int tgt_sec_ctx_fini(struct tgt_session_info *tsi);
364 int tgt_sendpage(struct tgt_session_info *tsi, struct lu_rdpg *rdpg, int nob);
365 int tgt_validate_obdo(struct tgt_session_info *tsi, struct obdo *oa);
366 int tgt_sync(const struct lu_env *env, struct lu_target *tgt,
367 struct dt_object *obj, __u64 start, __u64 end);
369 int tgt_io_thread_init(struct ptlrpc_thread *thread);
370 void tgt_io_thread_done(struct ptlrpc_thread *thread);
372 int tgt_extent_lock(struct ldlm_namespace *ns, struct ldlm_res_id *res_id,
373 __u64 start, __u64 end, struct lustre_handle *lh,
374 int mode, __u64 *flags);
375 void tgt_extent_unlock(struct lustre_handle *lh, ldlm_mode_t mode);
376 int tgt_brw_lock(struct ldlm_namespace *ns, struct ldlm_res_id *res_id,
377 struct obd_ioobj *obj, struct niobuf_remote *nb,
378 struct lustre_handle *lh, int mode);
379 void tgt_brw_unlock(struct obd_ioobj *obj, struct niobuf_remote *niob,
380 struct lustre_handle *lh, int mode);
381 int tgt_brw_read(struct tgt_session_info *tsi);
382 int tgt_brw_write(struct tgt_session_info *tsi);
383 int tgt_hpreq_handler(struct ptlrpc_request *req);
384 void tgt_register_lfsck_in_notify(int (*notify)(const struct lu_env *,
386 struct lfsck_request *,
388 void tgt_register_lfsck_query(int (*query)(const struct lu_env *,
390 struct lfsck_request *));
391 bool req_can_reconstruct(struct ptlrpc_request *req, struct tg_reply_data *trd);
393 extern struct tgt_handler tgt_lfsck_handlers[];
394 extern struct tgt_handler tgt_obd_handlers[];
395 extern struct tgt_handler tgt_dlm_handlers[];
396 extern struct tgt_handler tgt_llog_handlers[];
397 extern struct tgt_handler tgt_out_handlers[];
398 extern struct tgt_handler fld_handlers[];
399 extern struct tgt_handler seq_handlers[];
401 typedef void (*tgt_cb_t)(struct lu_target *lut, __u64 transno,
402 void *data, int err);
403 struct tgt_commit_cb {
404 tgt_cb_t tgt_cb_func;
408 int tgt_hpreq_handler(struct ptlrpc_request *req);
410 /* target/tgt_main.c */
411 void tgt_boot_epoch_update(struct lu_target *lut);
412 int tgt_last_commit_cb_add(struct thandle *th, struct lu_target *lut,
413 struct obd_export *exp, __u64 transno);
414 int tgt_new_client_cb_add(struct thandle *th, struct obd_export *exp);
415 int tgt_init(const struct lu_env *env, struct lu_target *lut,
416 struct obd_device *obd, struct dt_device *dt,
417 struct tgt_opc_slice *slice,
418 int request_fail_id, int reply_fail_id);
419 void tgt_fini(const struct lu_env *env, struct lu_target *lut);
420 int tgt_client_alloc(struct obd_export *exp);
421 void tgt_client_free(struct obd_export *exp);
422 int tgt_client_del(const struct lu_env *env, struct obd_export *exp);
423 int tgt_client_add(const struct lu_env *env, struct obd_export *exp, int);
424 int tgt_client_new(const struct lu_env *env, struct obd_export *exp);
425 int tgt_client_data_read(const struct lu_env *env, struct lu_target *tg,
426 struct lsd_client_data *lcd, loff_t *off, int index);
427 int tgt_client_data_write(const struct lu_env *env, struct lu_target *tg,
428 struct lsd_client_data *lcd, loff_t *off, struct thandle *th);
429 int tgt_server_data_read(const struct lu_env *env, struct lu_target *tg);
430 int tgt_server_data_write(const struct lu_env *env, struct lu_target *tg,
432 int tgt_server_data_update(const struct lu_env *env, struct lu_target *tg,
434 int tgt_truncate_last_rcvd(const struct lu_env *env, struct lu_target *tg,
436 int tgt_reply_data_init(const struct lu_env *env, struct lu_target *tgt);
437 bool tgt_lookup_reply(struct ptlrpc_request *req, struct tg_reply_data *trd);
439 /* target/update_trans.c */
440 int distribute_txn_init(const struct lu_env *env,
441 struct lu_target *lut,
442 struct target_distribute_txn_data *tdtd,
444 void distribute_txn_fini(const struct lu_env *env,
445 struct target_distribute_txn_data *tdtd);
447 /* target/update_recovery.c */
448 int insert_update_records_to_replay_list(struct target_distribute_txn_data *,
449 struct llog_update_record *,
450 struct llog_cookie *, __u32);
451 void dtrq_list_dump(struct target_distribute_txn_data *tdtd,
453 void dtrq_list_destroy(struct target_distribute_txn_data *tdtd);
454 int distribute_txn_replay_handle(struct lu_env *env,
455 struct target_distribute_txn_data *tdtd,
456 struct distribute_txn_replay_req *dtrq);
457 __u64 distribute_txn_get_next_transno(struct target_distribute_txn_data *tdtd);
458 struct distribute_txn_replay_req *
459 distribute_txn_get_next_req(struct target_distribute_txn_data *tdtd);
460 void dtrq_destroy(struct distribute_txn_replay_req *dtrq);
461 struct distribute_txn_replay_req_sub *
462 dtrq_sub_lookup(struct distribute_txn_replay_req *dtrq, __u32 mdt_index);
468 static inline int err_serious(int rc)
471 return -(-rc | ESERIOUS);
474 static inline int clear_serious(int rc)
477 rc = -(-rc & ~ESERIOUS);
481 static inline int is_serious(int rc)
483 return (rc < 0 && -rc & ESERIOUS);
487 * Do not return server-side uid/gid to remote client
489 static inline void tgt_drop_id(struct obd_export *exp, struct obdo *oa)
491 if (unlikely(exp_connect_rmtclient(exp))) {
494 oa->o_valid &= ~(OBD_MD_FLUID | OBD_MD_FLGID);
499 * Unified target generic handers macros and generic functions.
501 #define TGT_RPC_HANDLER_HP(base, flags, opc, fn, hp, fmt, version) \
504 .th_fail_id = OBD_FAIL_ ## opc ## _NET, \
509 .th_version = version, \
512 #define TGT_RPC_HANDLER(base, flags, opc, fn, fmt, version) \
513 TGT_RPC_HANDLER_HP(base, flags, opc, fn, NULL, fmt, version)
515 /* MDT Request with a format known in advance */
516 #define TGT_MDT_HDL(flags, name, fn) \
517 TGT_RPC_HANDLER(MDS_FIRST_OPC, flags, name, fn, &RQF_ ## name, \
519 /* Request with a format we do not yet know */
520 #define TGT_MDT_HDL_VAR(flags, name, fn) \
521 TGT_RPC_HANDLER(MDS_FIRST_OPC, flags, name, fn, NULL, \
524 /* OST Request with a format known in advance */
525 #define TGT_OST_HDL(flags, name, fn) \
526 TGT_RPC_HANDLER(OST_FIRST_OPC, flags, name, fn, &RQF_ ## name, \
528 #define TGT_OST_HDL_HP(flags, name, fn, hp) \
529 TGT_RPC_HANDLER_HP(OST_FIRST_OPC, flags, name, fn, hp, \
530 &RQF_ ## name, LUSTRE_OST_VERSION)
532 /* MGS request with a format known in advance */
533 #define TGT_MGS_HDL(flags, name, fn) \
534 TGT_RPC_HANDLER(MGS_FIRST_OPC, flags, name, fn, &RQF_ ## name, \
536 #define TGT_MGS_HDL_VAR(flags, name, fn) \
537 TGT_RPC_HANDLER(MGS_FIRST_OPC, flags, name, fn, NULL, \
541 * OBD handler macros and generic functions.
543 #define TGT_OBD_HDL(flags, name, fn) \
544 TGT_RPC_HANDLER(OBD_FIRST_OPC, flags, name, fn, &RQF_ ## name, \
546 #define TGT_OBD_HDL_VAR(flags, name, fn) \
547 TGT_RPC_HANDLER(OBD_FIRST_OPC, flags, name, fn, NULL, \
551 * DLM handler macros and generic functions.
553 #define TGT_DLM_HDL_VAR(flags, name, fn) \
554 TGT_RPC_HANDLER(LDLM_FIRST_OPC, flags, name, fn, NULL, \
556 #define TGT_DLM_HDL(flags, name, fn) \
557 TGT_RPC_HANDLER(LDLM_FIRST_OPC, flags, name, fn, &RQF_ ## name, \
561 * LLOG handler macros and generic functions.
563 #define TGT_LLOG_HDL_VAR(flags, name, fn) \
564 TGT_RPC_HANDLER(LLOG_FIRST_OPC, flags, name, fn, NULL, \
566 #define TGT_LLOG_HDL(flags, name, fn) \
567 TGT_RPC_HANDLER(LLOG_FIRST_OPC, flags, name, fn, &RQF_ ## name, \
571 * Sec context handler macros and generic functions.
573 #define TGT_SEC_HDL_VAR(flags, name, fn) \
574 TGT_RPC_HANDLER(SEC_FIRST_OPC, flags, name, fn, NULL, \
577 #define TGT_QUOTA_HDL(flags, name, fn) \
578 TGT_RPC_HANDLER(QUOTA_DQACQ, flags, name, fn, &RQF_ ## name, \
581 /* Sequence service handlers */
582 #define TGT_SEQ_HDL(flags, name, fn) \
583 TGT_RPC_HANDLER(SEQ_QUERY, flags, name, fn, &RQF_ ## name, \
586 /* FID Location Database handlers */
587 #define TGT_FLD_HDL_VAR(flags, name, fn) \
588 TGT_RPC_HANDLER(FLD_QUERY, flags, name, fn, NULL, \
592 #define TGT_LFSCK_HDL(flags, name, fn) \
593 TGT_RPC_HANDLER(LFSCK_FIRST_OPC, flags, name, fn, \
594 &RQF_ ## name, LUSTRE_OBD_VERSION)
596 /* Request with a format known in advance */
597 #define TGT_UPDATE_HDL(flags, name, fn) \
598 TGT_RPC_HANDLER(OUT_UPDATE, flags, name, fn, &RQF_ ## name, \
601 #endif /* __LUSTRE_LU_TARGET_H */