*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2011, 2012, Intel Corporation.
+ * Copyright (c) 2011, 2017, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
#include <lustre_disk.h>
#include <lustre_lfsck.h>
+/* Each one represents a distribute transaction replay
+ * operation, and updates on each MDTs are linked to
+ * dtr_sub_list */
+struct distribute_txn_replay_req {
+ /* update record, may be vmalloc'd */
+ struct llog_update_record *dtrq_lur;
+ int dtrq_lur_size;
+
+ /* linked to the distribute transaction replay
+ * list (tdtd_replay_list) */
+ struct list_head dtrq_list;
+ __u64 dtrq_master_transno;
+ __u64 dtrq_batchid;
+ __u64 dtrq_xid;
+
+ /* all of sub updates are linked here */
+ struct list_head dtrq_sub_list;
+ spinlock_t dtrq_sub_list_lock;
+
+ /* If the local update has been executed during replay */
+ __u32 dtrq_local_update_executed:1;
+};
+
+/* Each one represents a sub replay item under a distribute
+ * transaction. A distribute transaction will be operated in
+ * two or more MDTs, and updates on each MDT will be represented
+ * by this structure */
+struct distribute_txn_replay_req_sub {
+ __u32 dtrqs_mdt_index;
+
+ /* All of cookies for the update will be linked here */
+ spinlock_t dtrqs_cookie_list_lock;
+ struct list_head dtrqs_cookie_list;
+ struct list_head dtrqs_list;
+};
+
+struct target_distribute_txn_data;
+typedef int (*distribute_txn_replay_handler_t)(struct lu_env *env,
+ struct target_distribute_txn_data *tdtd,
+ struct distribute_txn_replay_req *dtrq);
+typedef char *(*target_show_update_logs_retrievers_t)(void *data, int *size,
+ int *count);
+struct target_distribute_txn_data {
+ /* Distribution ID is used to identify updates log on different
+ * MDTs for one operation */
+ spinlock_t tdtd_batchid_lock;
+ __u64 tdtd_batchid;
+ struct lu_target *tdtd_lut;
+ struct dt_object *tdtd_batchid_obj;
+ struct dt_device *tdtd_dt;
+
+ /* Committed batchid for distribute transaction */
+ __u64 tdtd_committed_batchid;
+
+ /* List for distribute transaction */
+ struct list_head tdtd_list;
+
+ /* Threads to manage distribute transaction */
+ struct task_struct *tdtd_commit_task;
+ atomic_t tdtd_refcount;
+ struct lu_env tdtd_env;
+
+ /* recovery update */
+ distribute_txn_replay_handler_t tdtd_replay_handler;
+ struct list_head tdtd_replay_list;
+ struct list_head tdtd_replay_finish_list;
+ spinlock_t tdtd_replay_list_lock;
+ /* last replay update transno */
+ __u32 tdtd_replay_ready:1;
+
+ /* Manage the llog recovery threads */
+ atomic_t tdtd_recovery_threads_count;
+ wait_queue_head_t tdtd_recovery_threads_waitq;
+ target_show_update_logs_retrievers_t
+ tdtd_show_update_logs_retrievers;
+ void *tdtd_show_retrievers_cbdata;
+};
+
+struct tg_grants_data {
+ /* grants: all values in bytes */
+ /* grant lock to protect all grant counters */
+ spinlock_t tgd_grant_lock;
+ /* total amount of dirty data reported by clients in incoming obdo */
+ u64 tgd_tot_dirty;
+ /* sum of filesystem space granted to clients for async writes */
+ u64 tgd_tot_granted;
+ /* grant used by I/Os in progress (between prepare and commit) */
+ u64 tgd_tot_pending;
+ /* amount of available space in percentage that is never used for
+ * grants, used on MDT to always keep space for metadata. */
+ u64 tgd_reserved_pcnt;
+ /* number of clients using grants */
+ int tgd_tot_granted_clients;
+ /* shall we grant space to clients not
+ * supporting OBD_CONNECT_GRANT_PARAM? */
+ int tgd_grant_compat_disable;
+ /* protect all statfs-related counters */
+ spinlock_t tgd_osfs_lock;
+ time64_t tgd_osfs_age;
+ int tgd_blockbits;
+ /* counters used during statfs update, protected by ofd_osfs_lock.
+ * record when some statfs refresh are in progress */
+ int tgd_statfs_inflight;
+ /* writes between prep & commit which might be accounted twice in
+ * ofd_osfs.os_bavail */
+ u64 tgd_osfs_unstable;
+ /* track writes completed while statfs refresh is underway.
+ * tracking is only effective when ofd_statfs_inflight > 1 */
+ u64 tgd_osfs_inflight;
+ /* statfs optimization: we cache a bit */
+ struct obd_statfs tgd_osfs;
+};
+
struct lu_target {
struct obd_device *lut_obd;
struct dt_device *lut_bottom;
+ struct dt_device_param lut_dt_conf;
+
+ struct target_distribute_txn_data *lut_tdtd;
/* supported opcodes and handlers for this target */
struct tgt_opc_slice *lut_slice;
rwlock_t lut_sptlrpc_lock;
struct sptlrpc_rule_set lut_sptlrpc_rset;
spinlock_t lut_flags_lock;
- int lut_sec_level;
- unsigned int lut_mds_capa:1,
- lut_oss_capa:1,
- lut_syncjournal:1,
+ unsigned int lut_syncjournal:1,
lut_sync_lock_cancel:2,
/* e.g. OST node */
- lut_no_reconstruct:1;
+ lut_no_reconstruct:1,
+ /* enforce recovery for local clients */
+ lut_local_recovery:1;
/** last_rcvd file */
struct dt_object *lut_last_rcvd;
/* transaction callbacks */
spinlock_t lut_client_bitmap_lock;
/** Bitmap of known clients */
unsigned long *lut_client_bitmap;
+ /* Number of clients supporting multiple modify RPCs
+ * recorded in the bitmap */
+ atomic_t lut_num_clients;
+ /* Client generation to identify client slot reuse */
+ atomic_t lut_client_generation;
+ /** reply_data file */
+ struct dt_object *lut_reply_data;
+ /** Bitmap of used slots in the reply data file */
+ unsigned long **lut_reply_bitmap;
+ /** target sync count, used for debug & test */
+ atomic_t lut_sync_count;
+
+ /** cross MDT locks which should trigger Sync-on-Lock-Cancel */
+ spinlock_t lut_slc_locks_guard;
+ struct list_head lut_slc_locks;
+
+ /* target grants fields */
+ struct tg_grants_data lut_tgd;
+
+ /* target tunables */
+ const struct attribute **lut_attrs;
+
+ /* FMD (file modification data) values */
+ int lut_fmd_max_num;
+ time64_t lut_fmd_max_age;
+};
+
+#define LUT_FMD_MAX_NUM_DEFAULT 128
+#define LUT_FMD_MAX_AGE_DEFAULT (obd_timeout + 10)
+
+/* number of slots in reply bitmap */
+#define LUT_REPLY_SLOTS_PER_CHUNK (1<<20)
+#define LUT_REPLY_SLOTS_MAX_CHUNKS 16
+
+#define TRD_INDEX_MEMORY -1
+
+/**
+ * Target reply data
+ */
+struct tg_reply_data {
+ /** chain of reply data anchored in tg_export_data */
+ struct list_head trd_list;
+ /** copy of on-disk reply data */
+ struct lsd_reply_data trd_reply;
+ /** versions for Version Based Recovery */
+ __u64 trd_pre_versions[4];
+ /** slot index in reply_data file */
+ int trd_index;
+ /** tag the client used */
+ __u16 trd_tag;
};
extern struct lu_context_key tgt_session_key;
bool tsi_preprocessed;
/* request JobID */
char *tsi_jobid;
+
+ /* update replay */
+ __u64 tsi_xid;
+ __u32 tsi_result;
+ __u32 tsi_client_gen;
};
static inline struct tgt_session_info *tgt_ses_info(const struct lu_env *env)
/*
* struct *_body is passed in the incoming message, and object
* identified by this fid exists on disk.
- * *
- * "habeo corpus" == "I have a body"
*/
- HABEO_CORPUS = (1 << 0),
+ HAS_BODY = BIT(0),
/*
* struct ldlm_request is passed in the incoming message.
- *
- * "habeo clavis" == "I have a key"
- * */
- HABEO_CLAVIS = (1 << 1),
+ */
+ HAS_KEY = BIT(1),
/*
* this request has fixed reply format, so that reply message can be
* packed by generic code.
- *
- * "habeo refero" == "I have a reply"
*/
- HABEO_REFERO = (1 << 2),
+ HAS_REPLY = BIT(2),
/*
* this request will modify something, so check whether the file system
* is readonly or not, then return -EROFS to client asap if necessary.
- *
- * "mutabor" == "I shall modify"
*/
- MUTABOR = (1 << 3)
+ IS_MUTABLE = BIT(3)
};
struct tgt_handler {
/* Flags in enum tgt_handler_flags */
__u32 th_flags;
/* Request version for this opcode */
- int th_version;
+ enum lustre_msg_version th_version;
/* Handler function */
int (*th_act)(struct tgt_session_info *tsi);
/* Handler function for high priority requests */
return !!(lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY);
}
+static inline bool tgt_is_multimodrpcs_client(struct obd_export *exp)
+{
+ return exp_connect_flags(exp) & OBD_CONNECT_MULTIMODRPCS;
+}
+
+static inline bool tgt_is_increasing_xid_client(struct obd_export *exp)
+{
+ return exp_connect_flags2(exp) & OBD_CONNECT2_INC_XID;
+}
+
/* target/tgt_handler.c */
int tgt_request_handle(struct ptlrpc_request *req);
char *tgt_name(struct lu_target *tgt);
void tgt_counter_incr(struct obd_export *exp, int opcode);
int tgt_connect_check_sptlrpc(struct ptlrpc_request *req,
struct obd_export *exp);
-int tgt_adapt_sptlrpc_conf(struct lu_target *tgt, int initial);
+int tgt_adapt_sptlrpc_conf(struct lu_target *tgt);
int tgt_connect(struct tgt_session_info *tsi);
int tgt_disconnect(struct tgt_session_info *uti);
int tgt_obd_ping(struct tgt_session_info *tsi);
int tgt_bl_callback(struct tgt_session_info *tsi);
int tgt_cp_callback(struct tgt_session_info *tsi);
int tgt_llog_open(struct tgt_session_info *tsi);
-int tgt_llog_close(struct tgt_session_info *tsi);
-int tgt_llog_destroy(struct tgt_session_info *tsi);
int tgt_llog_read_header(struct tgt_session_info *tsi);
int tgt_llog_next_block(struct tgt_session_info *tsi);
int tgt_llog_prev_block(struct tgt_session_info *tsi);
int tgt_sec_ctx_init_cont(struct tgt_session_info *tsi);
int tgt_sec_ctx_fini(struct tgt_session_info *tsi);
int tgt_sendpage(struct tgt_session_info *tsi, struct lu_rdpg *rdpg, int nob);
+int tgt_send_buffer(struct tgt_session_info *tsi, struct lu_rdbuf *rdbuf);
int tgt_validate_obdo(struct tgt_session_info *tsi, struct obdo *oa);
int tgt_sync(const struct lu_env *env, struct lu_target *tgt,
- struct dt_object *obj);
+ struct dt_object *obj, __u64 start, __u64 end);
int tgt_io_thread_init(struct ptlrpc_thread *thread);
void tgt_io_thread_done(struct ptlrpc_thread *thread);
-int tgt_extent_lock(struct ldlm_namespace *ns, struct ldlm_res_id *res_id,
- __u64 start, __u64 end, struct lustre_handle *lh,
- int mode, __u64 *flags);
-void tgt_extent_unlock(struct lustre_handle *lh, ldlm_mode_t mode);
-int tgt_brw_lock(struct ldlm_namespace *ns, struct ldlm_res_id *res_id,
- struct obd_ioobj *obj, struct niobuf_remote *nb,
- struct lustre_handle *lh, int mode);
-void tgt_brw_unlock(struct obd_ioobj *obj, struct niobuf_remote *niob,
- struct lustre_handle *lh, int mode);
+int tgt_mdt_data_lock(struct ldlm_namespace *ns, struct ldlm_res_id *res_id,
+ struct lustre_handle *lh, int mode, __u64 *flags);
+void tgt_mdt_data_unlock(struct lustre_handle *lh, enum ldlm_mode mode);
+int tgt_extent_lock(const struct lu_env *env, struct ldlm_namespace *ns,
+ struct ldlm_res_id *res_id, __u64 start, __u64 end,
+ struct lustre_handle *lh, int mode, __u64 *flags);
+void tgt_extent_unlock(struct lustre_handle *lh, enum ldlm_mode mode);
int tgt_brw_read(struct tgt_session_info *tsi);
int tgt_brw_write(struct tgt_session_info *tsi);
int tgt_hpreq_handler(struct ptlrpc_request *req);
+void tgt_register_lfsck_in_notify_local(int (*notify)(const struct lu_env *,
+ struct dt_device *,
+ struct lfsck_req_local *,
+ struct thandle *));
void tgt_register_lfsck_in_notify(int (*notify)(const struct lu_env *,
struct dt_device *,
struct lfsck_request *));
void tgt_register_lfsck_query(int (*query)(const struct lu_env *,
struct dt_device *,
- struct lfsck_request *));
+ struct lfsck_request *,
+ struct lfsck_reply *,
+ struct lfsck_query *));
+int req_can_reconstruct(struct ptlrpc_request *req, struct tg_reply_data *trd);
extern struct tgt_handler tgt_sec_ctx_handlers[];
extern struct tgt_handler tgt_lfsck_handlers[];
/* target/tgt_main.c */
void tgt_boot_epoch_update(struct lu_target *lut);
-int tgt_last_commit_cb_add(struct thandle *th, struct lu_target *lut,
- struct obd_export *exp, __u64 transno);
-int tgt_new_client_cb_add(struct thandle *th, struct obd_export *exp);
+void tgt_save_slc_lock(struct lu_target *lut, struct ldlm_lock *lock,
+ __u64 transno);
+void tgt_discard_slc_lock(struct lu_target *lut, struct ldlm_lock *lock);
int tgt_init(const struct lu_env *env, struct lu_target *lut,
struct obd_device *obd, struct dt_device *dt,
struct tgt_opc_slice *slice,
int tgt_client_del(const struct lu_env *env, struct obd_export *exp);
int tgt_client_add(const struct lu_env *env, struct obd_export *exp, int);
int tgt_client_new(const struct lu_env *env, struct obd_export *exp);
-int tgt_client_data_read(const struct lu_env *env, struct lu_target *tg,
- struct lsd_client_data *lcd, loff_t *off, int index);
-int tgt_client_data_write(const struct lu_env *env, struct lu_target *tg,
- struct lsd_client_data *lcd, loff_t *off, struct thandle *th);
-int tgt_server_data_read(const struct lu_env *env, struct lu_target *tg);
-int tgt_server_data_write(const struct lu_env *env, struct lu_target *tg,
- struct thandle *th);
int tgt_server_data_update(const struct lu_env *env, struct lu_target *tg,
int sync);
-int tgt_truncate_last_rcvd(const struct lu_env *env, struct lu_target *tg,
- loff_t off);
-
-/* target/out_lib.c */
-struct dt_update_request *
-out_find_update(struct thandle_update *tu, struct dt_device *dt_dev);
-void out_destroy_update_req(struct dt_update_request *update);
-struct dt_update_request *out_create_update_req(struct dt_device *dt);
-struct dt_update_request *out_find_create_update_loc(struct thandle *th,
- struct dt_object *dt);
-int out_prep_update_req(const struct lu_env *env, struct obd_import *imp,
- const struct object_update_request *ureq,
- struct ptlrpc_request **reqp);
-int out_remote_sync(const struct lu_env *env, struct obd_import *imp,
- struct dt_update_request *update,
- struct ptlrpc_request **reqp);
-int out_insert_update(const struct lu_env *env,
- struct dt_update_request *update,
- int op, const struct lu_fid *fid, int count,
- int *lens, const char **bufs);
+int tgt_reply_data_init(const struct lu_env *env, struct lu_target *tgt);
+int tgt_lookup_reply(struct ptlrpc_request *req, struct tg_reply_data *trd);
+int tgt_mk_reply_data(const struct lu_env *env, struct lu_target *tgt,
+ struct tg_export_data *ted, struct ptlrpc_request *req,
+ __u64 opdata, struct thandle *th, bool write_update,
+ __u64 transno);
+struct tg_reply_data *tgt_lookup_reply_by_xid(struct tg_export_data *ted,
+ __u64 xid);
+int tgt_tunables_init(struct lu_target *lut);
+void tgt_tunables_fini(struct lu_target *lut);
+
+/* target/tgt_grant.c */
+static inline int exp_grant_param_supp(struct obd_export *exp)
+{
+ return !!(exp_connect_flags(exp) & OBD_CONNECT_GRANT_PARAM);
+}
+/* Blocksize used for client not supporting OBD_CONNECT_GRANT_PARAM.
+ * That's 4KB=2^12 which is the biggest block size known to work whatever
+ * the client's page size is. */
+#define COMPAT_BSIZE_SHIFT 12
+
+void tgt_grant_sanity_check(struct obd_device *obd, const char *func);
+void tgt_grant_connect(const struct lu_env *env, struct obd_export *exp,
+ struct obd_connect_data *data, bool new_conn);
+void tgt_grant_discard(struct obd_export *exp);
+void tgt_grant_prepare_read(const struct lu_env *env, struct obd_export *exp,
+ struct obdo *oa);
+void tgt_grant_prepare_write(const struct lu_env *env, struct obd_export *exp,
+ struct obdo *oa, struct niobuf_remote *rnb,
+ int niocount);
+void tgt_grant_commit(struct obd_export *exp, unsigned long grant_used, int rc);
+int tgt_grant_commit_cb_add(struct thandle *th, struct obd_export *exp,
+ unsigned long grant);
+long tgt_grant_create(const struct lu_env *env, struct obd_export *exp,
+ s64 *nr);
+int tgt_statfs_internal(const struct lu_env *env, struct lu_target *lut,
+ struct obd_statfs *osfs, time64_t max_age,
+ int *from_cache);
+ssize_t tot_dirty_show(struct kobject *kobj, struct attribute *attr,
+ char *buf);
+ssize_t tot_granted_show(struct kobject *kobj, struct attribute *attr,
+ char *buf);
+ssize_t tot_pending_show(struct kobject *kobj, struct attribute *attr,
+ char *buf);
+ssize_t grant_compat_disable_show(struct kobject *kobj, struct attribute *attr,
+ char *buf);
+ssize_t grant_compat_disable_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buffer, size_t count);
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 16, 53, 0)
+ssize_t sync_lock_cancel_show(struct kobject *kobj,
+ struct attribute *attr, char *buf);
+ssize_t sync_lock_cancel_store(struct kobject *kobj, struct attribute *attr,
+ const char *buffer, size_t count);
+#endif
+
+/* FMD */
+void tgt_fmd_update(struct obd_export *exp, const struct lu_fid *fid,
+ __u64 xid);
+bool tgt_fmd_check(struct obd_export *exp, const struct lu_fid *fid,
+ __u64 xid);
+#ifdef DO_FMD_DROP
+void tgt_fmd_drop(struct obd_export *exp, const struct lu_fid *fid);
+#else
+#define tgt_fmd_drop(exp, fid) do {} while (0)
+#endif
+
+/* target/update_trans.c */
+int distribute_txn_init(const struct lu_env *env,
+ struct lu_target *lut,
+ struct target_distribute_txn_data *tdtd,
+ __u32 index);
+void distribute_txn_fini(const struct lu_env *env,
+ struct target_distribute_txn_data *tdtd);
+
+/* target/update_recovery.c */
+int insert_update_records_to_replay_list(struct target_distribute_txn_data *,
+ struct llog_update_record *,
+ struct llog_cookie *, __u32);
+void dtrq_list_dump(struct target_distribute_txn_data *tdtd,
+ unsigned int mask);
+void dtrq_list_destroy(struct target_distribute_txn_data *tdtd);
+int distribute_txn_replay_handle(struct lu_env *env,
+ struct target_distribute_txn_data *tdtd,
+ struct distribute_txn_replay_req *dtrq);
+__u64 distribute_txn_get_next_transno(struct target_distribute_txn_data *tdtd);
+struct distribute_txn_replay_req *
+distribute_txn_get_next_req(struct target_distribute_txn_data *tdtd);
+void dtrq_destroy(struct distribute_txn_replay_req *dtrq);
+struct distribute_txn_replay_req_sub *
+dtrq_sub_lookup(struct distribute_txn_replay_req *dtrq, __u32 mdt_index);
+struct distribute_txn_replay_req *
+distribute_txn_lookup_finish_list(struct target_distribute_txn_data *tdtd,
+ __u64 transno);
+bool is_req_replayed_by_update(struct ptlrpc_request *req);
enum {
ESERIOUS = 0x0001000
};
return (rc < 0 && -rc & ESERIOUS);
}
-/**
- * Do not return server-side uid/gid to remote client
- */
-static inline void tgt_drop_id(struct obd_export *exp, struct obdo *oa)
-{
- if (unlikely(exp_connect_rmtclient(exp))) {
- oa->o_uid = -1;
- oa->o_gid = -1;
- oa->o_valid &= ~(OBD_MD_FLUID | OBD_MD_FLGID);
- }
-}
-
/*
* Unified target generic handers macros and generic functions.
*/