X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Finclude%2Flu_target.h;h=e16e5bc52182bac30e0a8c3ace8021a97454b9fd;hp=929a4d43c941f283c04e85a836ff664cf164ef29;hb=c438fba7f068b0713d96dce1f0183ec6da7ab000;hpb=386818f0c56e438779e17d0ca12b481f17c53682 diff --git a/lustre/include/lu_target.h b/lustre/include/lu_target.h index 929a4d4..e16e5bc 100644 --- a/lustre/include/lu_target.h +++ b/lustre/include/lu_target.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -27,7 +23,7 @@ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2011, 2012, Intel Corporation. + * Copyright (c) 2011, 2017, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -43,9 +39,125 @@ #include #include +/* Each one represents a distribute transaction replay + * operation, and updates on each MDTs are linked to + * dtr_sub_list */ +struct distribute_txn_replay_req { + /* update record, may be vmalloc'd */ + struct llog_update_record *dtrq_lur; + int dtrq_lur_size; + + /* linked to the distribute transaction replay + * list (tdtd_replay_list) */ + struct list_head dtrq_list; + __u64 dtrq_master_transno; + __u64 dtrq_batchid; + __u64 dtrq_xid; + + /* all of sub updates are linked here */ + struct list_head dtrq_sub_list; + spinlock_t dtrq_sub_list_lock; + + /* If the local update has been executed during replay */ + __u32 dtrq_local_update_executed:1; +}; + +/* Each one represents a sub replay item under a distribute + * transaction. A distribute transaction will be operated in + * two or more MDTs, and updates on each MDT will be represented + * by this structure */ +struct distribute_txn_replay_req_sub { + __u32 dtrqs_mdt_index; + + /* All of cookies for the update will be linked here */ + spinlock_t dtrqs_cookie_list_lock; + struct list_head dtrqs_cookie_list; + struct list_head dtrqs_list; +}; + +struct target_distribute_txn_data; +typedef int (*distribute_txn_replay_handler_t)(struct lu_env *env, + struct target_distribute_txn_data *tdtd, + struct distribute_txn_replay_req *dtrq); +typedef char *(*target_show_update_logs_retrievers_t)(void *data, int *size, + int *count); +struct target_distribute_txn_data { + /* Distribution ID is used to identify updates log on different + * MDTs for one operation */ + spinlock_t tdtd_batchid_lock; + __u64 tdtd_batchid; + struct lu_target *tdtd_lut; + struct dt_object *tdtd_batchid_obj; + struct dt_device *tdtd_dt; + + /* Committed batchid for distribute transaction */ + __u64 tdtd_committed_batchid; + + /* List for distribute transaction */ + struct list_head tdtd_list; + + /* Threads to manage distribute transaction */ + struct task_struct *tdtd_commit_task; + atomic_t tdtd_refcount; + struct lu_env tdtd_env; + + /* recovery update */ + distribute_txn_replay_handler_t tdtd_replay_handler; + struct list_head tdtd_replay_list; + struct list_head tdtd_replay_finish_list; + spinlock_t tdtd_replay_list_lock; + /* last replay update transno */ + __u32 tdtd_replay_ready:1; + + /* Manage the llog recovery threads */ + atomic_t tdtd_recovery_threads_count; + wait_queue_head_t tdtd_recovery_threads_waitq; + target_show_update_logs_retrievers_t + tdtd_show_update_logs_retrievers; + void *tdtd_show_retrievers_cbdata; +}; + +struct tg_grants_data { + /* grants: all values in bytes */ + /* grant lock to protect all grant counters */ + spinlock_t tgd_grant_lock; + /* total amount of dirty data reported by clients in incoming obdo */ + u64 tgd_tot_dirty; + /* sum of filesystem space granted to clients for async writes */ + u64 tgd_tot_granted; + /* grant used by I/Os in progress (between prepare and commit) */ + u64 tgd_tot_pending; + /* amount of available space in percentage that is never used for + * grants, used on MDT to always keep space for metadata. */ + u64 tgd_reserved_pcnt; + /* number of clients using grants */ + int tgd_tot_granted_clients; + /* shall we grant space to clients not + * supporting OBD_CONNECT_GRANT_PARAM? */ + int tgd_grant_compat_disable; + /* protect all statfs-related counters */ + spinlock_t tgd_osfs_lock; + time64_t tgd_osfs_age; + int tgd_blockbits; + /* counters used during statfs update, protected by ofd_osfs_lock. + * record when some statfs refresh are in progress */ + int tgd_statfs_inflight; + /* writes between prep & commit which might be accounted twice in + * ofd_osfs.os_bavail */ + u64 tgd_osfs_unstable; + /* track writes completed while statfs refresh is underway. + * tracking is only effective when ofd_statfs_inflight > 1 */ + u64 tgd_osfs_inflight; + /* statfs optimization: we cache a bit */ + struct obd_statfs tgd_osfs; +}; + struct lu_target { struct obd_device *lut_obd; struct dt_device *lut_bottom; + struct dt_device_param lut_dt_conf; + + struct target_distribute_txn_data *lut_tdtd; /* supported opcodes and handlers for this target */ struct tgt_opc_slice *lut_slice; @@ -56,13 +168,12 @@ struct lu_target { rwlock_t lut_sptlrpc_lock; struct sptlrpc_rule_set lut_sptlrpc_rset; spinlock_t lut_flags_lock; - int lut_sec_level; - unsigned int lut_mds_capa:1, - lut_oss_capa:1, - lut_syncjournal:1, + unsigned int lut_syncjournal:1, lut_sync_lock_cancel:2, /* e.g. OST node */ - lut_no_reconstruct:1; + lut_no_reconstruct:1, + /* enforce recovery for local clients */ + lut_local_recovery:1; /** last_rcvd file */ struct dt_object *lut_last_rcvd; /* transaction callbacks */ @@ -77,6 +188,56 @@ struct lu_target { spinlock_t lut_client_bitmap_lock; /** Bitmap of known clients */ unsigned long *lut_client_bitmap; + /* Number of clients supporting multiple modify RPCs + * recorded in the bitmap */ + atomic_t lut_num_clients; + /* Client generation to identify client slot reuse */ + atomic_t lut_client_generation; + /** reply_data file */ + struct dt_object *lut_reply_data; + /** Bitmap of used slots in the reply data file */ + unsigned long **lut_reply_bitmap; + /** target sync count, used for debug & test */ + atomic_t lut_sync_count; + + /** cross MDT locks which should trigger Sync-on-Lock-Cancel */ + spinlock_t lut_slc_locks_guard; + struct list_head lut_slc_locks; + + /* target grants fields */ + struct tg_grants_data lut_tgd; + + /* target tunables */ + const struct attribute **lut_attrs; + + /* FMD (file modification data) values */ + int lut_fmd_max_num; + time64_t lut_fmd_max_age; +}; + +#define LUT_FMD_MAX_NUM_DEFAULT 128 +#define LUT_FMD_MAX_AGE_DEFAULT (obd_timeout + 10) + +/* number of slots in reply bitmap */ +#define LUT_REPLY_SLOTS_PER_CHUNK (1<<20) +#define LUT_REPLY_SLOTS_MAX_CHUNKS 16 + +#define TRD_INDEX_MEMORY -1 + +/** + * Target reply data + */ +struct tg_reply_data { + /** chain of reply data anchored in tg_export_data */ + struct list_head trd_list; + /** copy of on-disk reply data */ + struct lsd_reply_data trd_reply; + /** versions for Version Based Recovery */ + __u64 trd_pre_versions[4]; + /** slot index in reply_data file */ + int trd_index; + /** tag the client used */ + __u16 trd_tag; }; extern struct lu_context_key tgt_session_key; @@ -121,6 +282,11 @@ struct tgt_session_info { bool tsi_preprocessed; /* request JobID */ char *tsi_jobid; + + /* update replay */ + __u64 tsi_xid; + __u32 tsi_result; + __u32 tsi_client_gen; }; static inline struct tgt_session_info *tgt_ses_info(const struct lu_env *env) @@ -171,30 +337,22 @@ enum tgt_handler_flags { /* * struct *_body is passed in the incoming message, and object * identified by this fid exists on disk. - * * - * "habeo corpus" == "I have a body" */ - HABEO_CORPUS = (1 << 0), + HAS_BODY = BIT(0), /* * struct ldlm_request is passed in the incoming message. - * - * "habeo clavis" == "I have a key" - * */ - HABEO_CLAVIS = (1 << 1), + */ + HAS_KEY = BIT(1), /* * this request has fixed reply format, so that reply message can be * packed by generic code. - * - * "habeo refero" == "I have a reply" */ - HABEO_REFERO = (1 << 2), + HAS_REPLY = BIT(2), /* * this request will modify something, so check whether the file system * is readonly or not, then return -EROFS to client asap if necessary. - * - * "mutabor" == "I shall modify" */ - MUTABOR = (1 << 3) + IS_MUTABLE = BIT(3) }; struct tgt_handler { @@ -207,7 +365,7 @@ struct tgt_handler { /* Flags in enum tgt_handler_flags */ __u32 th_flags; /* Request version for this opcode */ - int th_version; + enum lustre_msg_version th_version; /* Handler function */ int (*th_act)(struct tgt_session_info *tsi); /* Handler function for high priority requests */ @@ -239,13 +397,23 @@ static inline int req_is_replay(struct ptlrpc_request *req) return !!(lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY); } +static inline bool tgt_is_multimodrpcs_client(struct obd_export *exp) +{ + return exp_connect_flags(exp) & OBD_CONNECT_MULTIMODRPCS; +} + +static inline bool tgt_is_increasing_xid_client(struct obd_export *exp) +{ + return exp_connect_flags2(exp) & OBD_CONNECT2_INC_XID; +} + /* target/tgt_handler.c */ int tgt_request_handle(struct ptlrpc_request *req); char *tgt_name(struct lu_target *tgt); void tgt_counter_incr(struct obd_export *exp, int opcode); int tgt_connect_check_sptlrpc(struct ptlrpc_request *req, struct obd_export *exp); -int tgt_adapt_sptlrpc_conf(struct lu_target *tgt, int initial); +int tgt_adapt_sptlrpc_conf(struct lu_target *tgt); int tgt_connect(struct tgt_session_info *tsi); int tgt_disconnect(struct tgt_session_info *uti); int tgt_obd_ping(struct tgt_session_info *tsi); @@ -254,8 +422,6 @@ int tgt_convert(struct tgt_session_info *tsi); int tgt_bl_callback(struct tgt_session_info *tsi); int tgt_cp_callback(struct tgt_session_info *tsi); int tgt_llog_open(struct tgt_session_info *tsi); -int tgt_llog_close(struct tgt_session_info *tsi); -int tgt_llog_destroy(struct tgt_session_info *tsi); int tgt_llog_read_header(struct tgt_session_info *tsi); int tgt_llog_next_block(struct tgt_session_info *tsi); int tgt_llog_prev_block(struct tgt_session_info *tsi); @@ -263,6 +429,7 @@ int tgt_sec_ctx_init(struct tgt_session_info *tsi); int tgt_sec_ctx_init_cont(struct tgt_session_info *tsi); int tgt_sec_ctx_fini(struct tgt_session_info *tsi); int tgt_sendpage(struct tgt_session_info *tsi, struct lu_rdpg *rdpg, int nob); +int tgt_send_buffer(struct tgt_session_info *tsi, struct lu_rdbuf *rdbuf); int tgt_validate_obdo(struct tgt_session_info *tsi, struct obdo *oa); int tgt_sync(const struct lu_env *env, struct lu_target *tgt, struct dt_object *obj, __u64 start, __u64 end); @@ -270,24 +437,29 @@ int tgt_sync(const struct lu_env *env, struct lu_target *tgt, int tgt_io_thread_init(struct ptlrpc_thread *thread); void tgt_io_thread_done(struct ptlrpc_thread *thread); -int tgt_extent_lock(struct ldlm_namespace *ns, struct ldlm_res_id *res_id, - __u64 start, __u64 end, struct lustre_handle *lh, - int mode, __u64 *flags); -void tgt_extent_unlock(struct lustre_handle *lh, ldlm_mode_t mode); -int tgt_brw_lock(struct ldlm_namespace *ns, struct ldlm_res_id *res_id, - struct obd_ioobj *obj, struct niobuf_remote *nb, - struct lustre_handle *lh, int mode); -void tgt_brw_unlock(struct obd_ioobj *obj, struct niobuf_remote *niob, - struct lustre_handle *lh, int mode); +int tgt_mdt_data_lock(struct ldlm_namespace *ns, struct ldlm_res_id *res_id, + struct lustre_handle *lh, int mode, __u64 *flags); +void tgt_mdt_data_unlock(struct lustre_handle *lh, enum ldlm_mode mode); +int tgt_extent_lock(const struct lu_env *env, struct ldlm_namespace *ns, + struct ldlm_res_id *res_id, __u64 start, __u64 end, + struct lustre_handle *lh, int mode, __u64 *flags); +void tgt_extent_unlock(struct lustre_handle *lh, enum ldlm_mode mode); int tgt_brw_read(struct tgt_session_info *tsi); int tgt_brw_write(struct tgt_session_info *tsi); int tgt_hpreq_handler(struct ptlrpc_request *req); +void tgt_register_lfsck_in_notify_local(int (*notify)(const struct lu_env *, + struct dt_device *, + struct lfsck_req_local *, + struct thandle *)); void tgt_register_lfsck_in_notify(int (*notify)(const struct lu_env *, struct dt_device *, struct lfsck_request *)); void tgt_register_lfsck_query(int (*query)(const struct lu_env *, struct dt_device *, - struct lfsck_request *)); + struct lfsck_request *, + struct lfsck_reply *, + struct lfsck_query *)); +int req_can_reconstruct(struct ptlrpc_request *req, struct tg_reply_data *trd); extern struct tgt_handler tgt_sec_ctx_handlers[]; extern struct tgt_handler tgt_lfsck_handlers[]; @@ -309,9 +481,9 @@ int tgt_hpreq_handler(struct ptlrpc_request *req); /* target/tgt_main.c */ void tgt_boot_epoch_update(struct lu_target *lut); -int tgt_last_commit_cb_add(struct thandle *th, struct lu_target *lut, - struct obd_export *exp, __u64 transno); -int tgt_new_client_cb_add(struct thandle *th, struct obd_export *exp); +void tgt_save_slc_lock(struct lu_target *lut, struct ldlm_lock *lock, + __u64 transno); +void tgt_discard_slc_lock(struct lu_target *lut, struct ldlm_lock *lock); int tgt_init(const struct lu_env *env, struct lu_target *lut, struct obd_device *obd, struct dt_device *dt, struct tgt_opc_slice *slice, @@ -322,18 +494,104 @@ void tgt_client_free(struct obd_export *exp); int tgt_client_del(const struct lu_env *env, struct obd_export *exp); int tgt_client_add(const struct lu_env *env, struct obd_export *exp, int); int tgt_client_new(const struct lu_env *env, struct obd_export *exp); -int tgt_client_data_read(const struct lu_env *env, struct lu_target *tg, - struct lsd_client_data *lcd, loff_t *off, int index); -int tgt_client_data_write(const struct lu_env *env, struct lu_target *tg, - struct lsd_client_data *lcd, loff_t *off, struct thandle *th); -int tgt_server_data_read(const struct lu_env *env, struct lu_target *tg); -int tgt_server_data_write(const struct lu_env *env, struct lu_target *tg, - struct thandle *th); int tgt_server_data_update(const struct lu_env *env, struct lu_target *tg, int sync); -int tgt_truncate_last_rcvd(const struct lu_env *env, struct lu_target *tg, - loff_t off); +int tgt_reply_data_init(const struct lu_env *env, struct lu_target *tgt); +int tgt_lookup_reply(struct ptlrpc_request *req, struct tg_reply_data *trd); +int tgt_mk_reply_data(const struct lu_env *env, struct lu_target *tgt, + struct tg_export_data *ted, struct ptlrpc_request *req, + __u64 opdata, struct thandle *th, bool write_update, + __u64 transno); +struct tg_reply_data *tgt_lookup_reply_by_xid(struct tg_export_data *ted, + __u64 xid); +int tgt_tunables_init(struct lu_target *lut); +void tgt_tunables_fini(struct lu_target *lut); + +/* target/tgt_grant.c */ +static inline int exp_grant_param_supp(struct obd_export *exp) +{ + return !!(exp_connect_flags(exp) & OBD_CONNECT_GRANT_PARAM); +} +/* Blocksize used for client not supporting OBD_CONNECT_GRANT_PARAM. + * That's 4KB=2^12 which is the biggest block size known to work whatever + * the client's page size is. */ +#define COMPAT_BSIZE_SHIFT 12 + +void tgt_grant_sanity_check(struct obd_device *obd, const char *func); +void tgt_grant_connect(const struct lu_env *env, struct obd_export *exp, + struct obd_connect_data *data, bool new_conn); +void tgt_grant_discard(struct obd_export *exp); +void tgt_grant_prepare_read(const struct lu_env *env, struct obd_export *exp, + struct obdo *oa); +void tgt_grant_prepare_write(const struct lu_env *env, struct obd_export *exp, + struct obdo *oa, struct niobuf_remote *rnb, + int niocount); +void tgt_grant_commit(struct obd_export *exp, unsigned long grant_used, int rc); +int tgt_grant_commit_cb_add(struct thandle *th, struct obd_export *exp, + unsigned long grant); +long tgt_grant_create(const struct lu_env *env, struct obd_export *exp, + s64 *nr); +int tgt_statfs_internal(const struct lu_env *env, struct lu_target *lut, + struct obd_statfs *osfs, time64_t max_age, + int *from_cache); +ssize_t tot_dirty_show(struct kobject *kobj, struct attribute *attr, + char *buf); +ssize_t tot_granted_show(struct kobject *kobj, struct attribute *attr, + char *buf); +ssize_t tot_pending_show(struct kobject *kobj, struct attribute *attr, + char *buf); +ssize_t grant_compat_disable_show(struct kobject *kobj, struct attribute *attr, + char *buf); +ssize_t grant_compat_disable_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, size_t count); +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 16, 53, 0) +ssize_t sync_lock_cancel_show(struct kobject *kobj, + struct attribute *attr, char *buf); +ssize_t sync_lock_cancel_store(struct kobject *kobj, struct attribute *attr, + const char *buffer, size_t count); +#endif + +/* FMD */ +void tgt_fmd_update(struct obd_export *exp, const struct lu_fid *fid, + __u64 xid); +bool tgt_fmd_check(struct obd_export *exp, const struct lu_fid *fid, + __u64 xid); +#ifdef DO_FMD_DROP +void tgt_fmd_drop(struct obd_export *exp, const struct lu_fid *fid); +#else +#define tgt_fmd_drop(exp, fid) do {} while (0) +#endif + +/* target/update_trans.c */ +int distribute_txn_init(const struct lu_env *env, + struct lu_target *lut, + struct target_distribute_txn_data *tdtd, + __u32 index); +void distribute_txn_fini(const struct lu_env *env, + struct target_distribute_txn_data *tdtd); + +/* target/update_recovery.c */ +int insert_update_records_to_replay_list(struct target_distribute_txn_data *, + struct llog_update_record *, + struct llog_cookie *, __u32); +void dtrq_list_dump(struct target_distribute_txn_data *tdtd, + unsigned int mask); +void dtrq_list_destroy(struct target_distribute_txn_data *tdtd); +int distribute_txn_replay_handle(struct lu_env *env, + struct target_distribute_txn_data *tdtd, + struct distribute_txn_replay_req *dtrq); +__u64 distribute_txn_get_next_transno(struct target_distribute_txn_data *tdtd); +struct distribute_txn_replay_req * +distribute_txn_get_next_req(struct target_distribute_txn_data *tdtd); +void dtrq_destroy(struct distribute_txn_replay_req *dtrq); +struct distribute_txn_replay_req_sub * +dtrq_sub_lookup(struct distribute_txn_replay_req *dtrq, __u32 mdt_index); +struct distribute_txn_replay_req * +distribute_txn_lookup_finish_list(struct target_distribute_txn_data *tdtd, + __u64 transno); +bool is_req_replayed_by_update(struct ptlrpc_request *req); enum { ESERIOUS = 0x0001000 }; @@ -356,18 +614,6 @@ static inline int is_serious(int rc) return (rc < 0 && -rc & ESERIOUS); } -/** - * Do not return server-side uid/gid to remote client - */ -static inline void tgt_drop_id(struct obd_export *exp, struct obdo *oa) -{ - if (unlikely(exp_connect_rmtclient(exp))) { - oa->o_uid = -1; - oa->o_gid = -1; - oa->o_valid &= ~(OBD_MD_FLUID | OBD_MD_FLGID); - } -} - /* * Unified target generic handers macros and generic functions. */