X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fmdt%2Fmdt_internal.h;h=dc21cd5cc9bdf2c65a116a79468c8a6632dde455;hp=2ba6f9acb379d4c26bc778b4df7e237e0dd0e7b2;hb=f543cd02f604cd154e29271b2ef3ca6592337f40;hpb=9e5ac2b0a3c3623698ed2dbb5487bccd927a6144 diff --git a/lustre/mdt/mdt_internal.h b/lustre/mdt/mdt_internal.h index 2ba6f9a..dc21cd5 100644 --- a/lustre/mdt/mdt_internal.h +++ b/lustre/mdt/mdt_internal.h @@ -23,7 +23,7 @@ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2011, 2016, Intel Corporation. + * Copyright (c) 2011, 2017, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -65,15 +65,17 @@ struct mdt_object; /* file data for open files on MDS */ struct mdt_file_data { /** portals handle must be first */ - struct portals_handle mfd_handle; + struct portals_handle mfd_open_handle; + /* export data of portals_handle */ + const struct mdt_export_data *mfd_owner; /** open mode provided by client */ - __u64 mfd_mode; + u64 mfd_open_flags; /** protected by med_open_lock */ struct list_head mfd_list; /** xid of the open request */ __u64 mfd_xid; /** old handle in replay case */ - struct lustre_handle mfd_old_handle; + struct lustre_handle mfd_open_handle_old; /** point to opened object */ struct mdt_object *mfd_object; }; @@ -132,12 +134,12 @@ struct coordinator { struct lu_env cdt_env; /**< coordinator lustre * env */ struct lu_context cdt_session; /** session for lu_ucred */ - struct proc_dir_entry *cdt_proc_dir; /**< cdt /proc directory */ + struct dentry *cdt_debugfs_dir; /**< cdt debugfs directory */ + struct completion cdt_kobj_unregister; + struct kobject cdt_hsm_kobj; /* hsm sysfs object */ __u64 cdt_policy; /**< policy flags */ enum cdt_states cdt_state; /**< state */ - spinlock_t cdt_state_lock; /**< cdt_state lock */ - atomic_t cdt_compound_id; /**< compound id - * counter */ + struct mutex cdt_state_lock; /**< cdt_state lock */ __u64 cdt_last_cookie; /**< last cookie * allocated */ struct rw_semaphore cdt_llog_lock; /**< protect llog @@ -147,17 +149,21 @@ struct coordinator { * list */ struct mutex cdt_restore_lock; /**< protect restore * list */ - cfs_time_t cdt_loop_period; /**< llog scan period */ - cfs_time_t cdt_grace_delay; /**< request grace + time_t cdt_loop_period; /**< llog scan period */ + time_t cdt_grace_delay; /**< request grace * delay */ - cfs_time_t cdt_active_req_timeout; /**< request timeout */ + time_t cdt_active_req_timeout; /**< request timeout */ __u32 cdt_default_archive_id; /**< archive id used * when none are * specified */ - __u64 cdt_max_requests; /**< max count of started + u64 cdt_max_requests; /**< max count of started * requests */ - atomic_t cdt_request_count; /**< current count of - * started requests */ + /** Current count of active requests */ + atomic_t cdt_request_count; /** total */ + atomic_t cdt_archive_count; + atomic_t cdt_restore_count; + atomic_t cdt_remove_count; + /* started requests (struct cdt_agent_req:car_cookie_hash) * indexed by cookie */ struct cfs_hash *cdt_request_cookie_hash; @@ -165,8 +171,7 @@ struct coordinator { struct list_head cdt_request_list; struct list_head cdt_agents; /**< list of register * agents */ - struct list_head cdt_restore_hdl; /**< list of restore lock - * handles */ + struct list_head cdt_restore_handle_list; /* Hash of cookies to locations of record locations in agent * request log. */ @@ -187,6 +192,19 @@ struct coordinator { #define MDT_FL_CFGLOG 0 #define MDT_FL_SYNCED 1 +/* possible values for mo_dom_lock */ +enum { + NO_DOM_LOCK_ON_OPEN = 0, + TRYLOCK_DOM_ON_OPEN = 1, + ALWAYS_DOM_LOCK_ON_OPEN = 2, + NUM_DOM_LOCK_ON_OPEN_MODES +}; + +struct mdt_statfs_cache { + struct obd_statfs msf_osfs; + __u64 msf_age; +}; + struct mdt_device { /* super-class */ struct lu_device mdt_lu_dev; @@ -200,6 +218,7 @@ struct mdt_device { struct md_device *mdt_child; struct dt_device *mdt_bottom; struct obd_export *mdt_bottom_exp; + struct local_oid_storage *mdt_los; /** target device */ struct lu_target mdt_lut; /* @@ -209,7 +228,10 @@ struct mdt_device { unsigned int mo_user_xattr:1, mo_acl:1, mo_cos:1, - mo_evict_tgt_nids:1; + mo_evict_tgt_nids:1, + mo_dom_read_open:1, + mo_migrate_hsm_allowed:1; + unsigned int mo_dom_lock; } mdt_opts; /* mdt state flags */ unsigned long mdt_state; @@ -223,21 +245,30 @@ struct mdt_device { int mdt_max_ea_size; + /* preferred BRW size, decided by storage type and capability */ + __u32 mdt_brw_size; + struct upcall_cache *mdt_identity_cache; unsigned int mdt_capa_conf:1, /* Enable remote dir on non-MDT0 */ mdt_enable_remote_dir:1, - mdt_skip_lfsck:1; - + mdt_enable_striped_dir:1, + mdt_enable_dir_migration:1, + mdt_enable_remote_rename:1, + mdt_skip_lfsck:1, + mdt_readonly:1; + + /* user with gid can create remote/striped + * dir, and set default dir stripe */ gid_t mdt_enable_remote_dir_gid; /* lock for osfs and md_root */ spinlock_t mdt_lock; /* statfs optimization: we cache a bit */ - struct obd_statfs mdt_osfs; - __u64 mdt_osfs_age; + struct mdt_statfs_cache mdt_sum_osfs; + struct mdt_statfs_cache mdt_osfs; /* root squash */ struct root_squash_info mdt_squash; @@ -264,6 +295,8 @@ struct mdt_device { #define MDT_SERVICE_WATCHDOG_FACTOR (2) #define MDT_COS_DEFAULT (0) +#define ENOENT_VERSION 1 /** 'virtual' version of non-existent object */ + struct mdt_object { struct lu_object_header mot_header; struct lu_object mot_obj; @@ -272,8 +305,12 @@ struct mdt_object { * attribute cache */ int mot_write_count; spinlock_t mot_write_lock; + /* Lock to protect object's SOM update. */ + struct mutex mot_som_mutex; /* Lock to protect create_data */ struct mutex mot_lov_mutex; + /* lock to protect read/write stages for Data-on-MDT files */ + struct rw_semaphore mot_dom_sem; /* Lock to protect lease open. * Lease open acquires write lock; normal open acquires read lock */ struct rw_semaphore mot_open_sem; @@ -324,7 +361,8 @@ enum { struct mdt_reint_record { enum mds_reint_op rr_opcode; - const struct lustre_handle *rr_handle; + const struct lustre_handle *rr_open_handle; + const struct lustre_handle *rr_lease_handle; const struct lu_fid *rr_fid1; const struct lu_fid *rr_fid2; struct lu_name rr_name; @@ -332,6 +370,7 @@ struct mdt_reint_record { void *rr_eadata; int rr_eadatalen; __u32 rr_flags; + __u16 rr_mirror_id; }; enum mdt_reint_flag { @@ -343,91 +382,92 @@ enum mdt_reint_flag { * reduce stack consumption. */ struct mdt_thread_info { - /* - * XXX: Part One: - * The following members will be filled explicitly - * with specific data in mdt_thread_info_init(). - */ - /* TODO: move this into mdt_session_key(with LCT_SESSION), because - * request handling may migrate from one server thread to another. - */ - struct req_capsule *mti_pill; - - /* although we have export in req, there are cases when it is not - * available, e.g. closing files upon export destroy */ - struct obd_export *mti_exp; - /* - * A couple of lock handles. - */ - struct mdt_lock_handle mti_lh[MDT_LH_NR]; - - struct mdt_device *mti_mdt; - const struct lu_env *mti_env; - - /* transaction number of current request */ - __u64 mti_transno; - - - /* - * XXX: Part Two: - * The following members will be filled expilictly - * with zero in mdt_thread_info_init(). These members may be used - * by all requests. - */ - - /* - * Object attributes. - */ - struct md_attr mti_attr; - /* - * Body for "habeo corpus" operations. - */ - const struct mdt_body *mti_body; - /* - * Host object. This is released at the end of mdt_handler(). - */ - struct mdt_object *mti_object; - /* - * Lock request for "habeo clavis" operations. - */ - const struct ldlm_request *mti_dlm_req; - - __u32 mti_has_trans:1, /* has txn already? */ + /* + * XXX: Part One: + * The following members will be filled explicitly + * with specific data in mdt_thread_info_init(). + */ + /* TODO: move this into mdt_session_key(with LCT_SESSION), because + * request handling may migrate from one server thread to another. + */ + struct req_capsule *mti_pill; + + /* although we have export in req, there are cases when it is not + * available, e.g. closing files upon export destroy */ + struct obd_export *mti_exp; + /* + * A couple of lock handles. + */ + struct mdt_lock_handle mti_lh[MDT_LH_NR]; + + struct mdt_device *mti_mdt; + const struct lu_env *mti_env; + + /* transaction number of current request */ + __u64 mti_transno; + + /* + * XXX: Part Two: + * The following members will be filled expilictly + * with zero in mdt_thread_info_init(). These members may be used + * by all requests. + */ + + /* + * Object attributes. + */ + struct md_attr mti_attr; + struct md_attr mti_attr2; /* mdt_lvb.c */ + /* + * Body for "habeo corpus" operations. + */ + const struct mdt_body *mti_body; + /* + * Host object. This is released at the end of mdt_handler(). + */ + struct mdt_object *mti_object; + /* + * Lock request for "habeo clavis" operations. + */ + const struct ldlm_request *mti_dlm_req; + + __u32 mti_has_trans:1, /* has txn already? */ mti_cross_ref:1, /* big_lmm buffer was used and must be used in reply */ mti_big_lmm_used:1, - mti_big_acl_used:1; - - /* opdata for mdt_reint_open(), has the same as - * ldlm_reply:lock_policy_res1. mdt_update_last_rcvd() stores this - * value onto disk for recovery when mdt_trans_stop_cb() is called. - */ - __u64 mti_opdata; - - /* - * XXX: Part Three: - * The following members will be filled explicitly - * with zero in mdt_reint_unpack(), because they are only used - * by reint requests (including mdt_reint_open()). - */ - - /* - * reint record. contains information for reint operations. - */ - struct mdt_reint_record mti_rr; - - __u64 mti_ver[PTLRPC_NUM_VERSIONS]; - /* - * Operation specification (currently create and lookup) - */ - struct md_op_spec mti_spec; - - /* - * XXX: Part Four: - * The following members will _NOT_ be initialized at all. - * DO NOT expect them to contain any valid value. - * They should be initialized explicitly by the user themselves. - */ + mti_big_acl_used:1, + mti_som_valid:1; + + /* opdata for mdt_reint_open(), has the same as + * ldlm_reply:lock_policy_res1. mdt_update_last_rcvd() stores this + * value onto disk for recovery when mdt_trans_stop_cb() is called. + */ + __u64 mti_opdata; + + /* + * XXX: Part Three: + * The following members will be filled explicitly + * with zero in mdt_reint_unpack(), because they are only used + * by reint requests (including mdt_reint_open()). + */ + + /* + * reint record. contains information for reint operations. + */ + struct mdt_reint_record mti_rr; + + __u64 mti_ver[PTLRPC_NUM_VERSIONS]; + /* + * Operation specification (currently create and lookup) + */ + struct md_op_spec mti_spec; + + /* + * XXX: Part Four: + * The following members will _NOT_ be initialized at all. + * DO NOT expect them to contain any valid value. + * They should be initialized explicitly by the user themselves. + */ /* XXX: If something is in a union, make sure they do not conflict */ struct lu_fid mti_tmp_fid1; @@ -439,24 +479,28 @@ struct mdt_thread_info { char ns_name[48];/* for mdt_init0() */ struct lustre_cfg_bufs bufs; /* for mdt_stack_fini() */ struct obd_statfs osfs; /* for mdt_statfs() */ - struct { - /* for mdt_readpage() */ - struct lu_rdpg mti_rdpg; - /* for mdt_sendpage() */ - struct l_wait_info mti_wait_info; - } rdpg; + struct { + /* for mdt_readpage() */ + struct lu_rdpg mti_rdpg; + /* for mdt_sendpage() */ + struct l_wait_info mti_wait_info; + } rdpg; struct { struct md_attr attr; } hsm; - } mti_u; + struct { + struct md_attr attr; + } som; + } mti_u; - struct lustre_handle mti_close_handle; - loff_t mti_off; - struct lu_buf mti_buf; - struct lu_buf mti_big_buf; + struct lustre_handle mti_open_handle; + loff_t mti_off; + struct lu_buf mti_buf; + struct lu_buf mti_big_buf; - /* Ops object filename */ - struct lu_name mti_name; + /* Ops object filename */ + struct lu_name mti_name; + char mti_filename[NAME_MAX + 1]; /* per-thread values, can be re-used, may be vmalloc'd */ void *mti_big_lmm; void *mti_big_acl; @@ -464,8 +508,13 @@ struct mdt_thread_info { int mti_big_aclsize; /* should be enough to fit lustre_mdt_attrs */ char mti_xattr_buf[128]; - struct ldlm_enqueue_info mti_einfo; + struct ldlm_enqueue_info mti_einfo[2]; + /* einfo used by mdt_remote_object_lock_try() */ + struct ldlm_enqueue_info mti_remote_einfo; struct tg_reply_data *mti_reply_data; + + /* FLR: layout change API */ + struct md_layout_change mti_layout; }; extern struct lu_context_key mdt_thread_key; @@ -495,13 +544,12 @@ struct cdt_agent_req { struct hlist_node car_cookie_hash; /**< find req by cookie */ struct list_head car_request_list; /**< to chain all the req. */ atomic_t car_refcount; /**< reference counter */ - __u64 car_compound_id; /**< compound id */ __u64 car_flags; /**< request original flags */ struct obd_uuid car_uuid; /**< agent doing the req. */ __u32 car_archive_id; /**< archive id */ int car_canceled; /**< request was canceled */ - cfs_time_t car_req_start; /**< start time */ - cfs_time_t car_req_update; /**< last update time */ + time64_t car_req_start; /**< start time */ + time64_t car_req_update; /**< last update time */ struct hsm_action_item *car_hai; /**< req. to the agent */ struct cdt_req_progress car_progress; /**< track data mvt * progress */ @@ -615,6 +663,104 @@ static inline bool mdt_is_striped_client(struct obd_export *exp) return exp_connect_flags(exp) & OBD_CONNECT_DIR_STRIPE; } +enum { + LMM_NO_DOM, + LMM_DOM_ONLY, + LMM_DOM_OST +}; + +/* XXX Look into layout in MDT layer. This must be done in LOD. */ +static inline int mdt_lmm_dom_entry(struct lov_mds_md *lmm) +{ + struct lov_comp_md_v1 *comp_v1; + struct lov_mds_md *v1; + __u32 off; + bool has_dom = true; + int i; + + if (le32_to_cpu(lmm->lmm_magic) != LOV_MAGIC_COMP_V1) + return LMM_NO_DOM; + + comp_v1 = (struct lov_comp_md_v1 *)lmm; + off = le32_to_cpu(comp_v1->lcm_entries[0].lcme_offset); + v1 = (struct lov_mds_md *)((char *)comp_v1 + off); + + /* DoM entry is the first entry always */ + if (lov_pattern(le32_to_cpu(v1->lmm_pattern)) != LOV_PATTERN_MDT && + le16_to_cpu(comp_v1->lcm_mirror_count) == 0) + return LMM_NO_DOM; + + for (i = 0; i < le16_to_cpu(comp_v1->lcm_entry_count); i++) { + int j; + + off = le32_to_cpu(comp_v1->lcm_entries[i].lcme_offset); + v1 = (struct lov_mds_md *)((char *)comp_v1 + off); + + if (lov_pattern(le32_to_cpu(v1->lmm_pattern)) == + LOV_PATTERN_MDT) + has_dom = true; + + for (j = 0; j < le16_to_cpu(v1->lmm_stripe_count); j++) { + /* if there is any object on OST */ + if (le32_to_cpu(v1->lmm_objects[j].l_ost_idx) != + (__u32)-1UL) + return LMM_DOM_OST; + } + } + return has_dom ? LMM_DOM_ONLY : LMM_NO_DOM; +} + +static inline bool mdt_lmm_is_flr(struct lov_mds_md *lmm) +{ + struct lov_comp_md_v1 *lcm = (typeof(lcm))lmm; + + return le32_to_cpu(lmm->lmm_magic) == LOV_MAGIC_COMP_V1 && + le16_to_cpu(lcm->lcm_mirror_count) > 0; +} + +static inline bool lmm_is_overstriping(struct lov_mds_md *lmm) +{ + if (le32_to_cpu(lmm->lmm_magic) == LOV_MAGIC_V1 || + le32_to_cpu(lmm->lmm_magic) == LOV_MAGIC_V3) + return le16_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_OVERSTRIPING; + + return false; +} + +static inline bool mdt_lmm_comp_overstriping(struct lov_mds_md *lmm) +{ + struct lov_comp_md_v1 *comp_v1; + struct lov_mds_md *v1; + __u32 off; + int i; + + comp_v1 = (struct lov_comp_md_v1 *)lmm; + + for (i = 1; i < le16_to_cpu(comp_v1->lcm_entry_count); i++) { + off = le32_to_cpu(comp_v1->lcm_entries[i].lcme_offset); + v1 = (struct lov_mds_md *)((char *)comp_v1 + off); + + if (lmm_is_overstriping(v1)) + return true; + } + + return false; +} + +static inline bool mdt_lmm_is_overstriping(struct lov_mds_md *lmm) +{ + if (le32_to_cpu(lmm->lmm_magic) == LOV_MAGIC_COMP_V1) + return mdt_lmm_comp_overstriping(lmm); + + return lmm_is_overstriping(lmm); +} + +static inline bool mdt_is_sum_statfs_client(struct obd_export *exp) +{ + return exp_connect_flags(exp) & OBD_CONNECT_FLAGS2 && + exp_connect_flags2(exp) & OBD_CONNECT2_SUM_STATFS; +} + __u64 mdt_get_disposition(struct ldlm_reply *rep, __u64 op_flag); void mdt_set_disposition(struct mdt_thread_info *info, struct ldlm_reply *rep, __u64 op_flag); @@ -645,6 +791,8 @@ int mdt_object_lock_try(struct mdt_thread_info *info, struct mdt_object *mo, void mdt_object_unlock(struct mdt_thread_info *info, struct mdt_object *mo, struct mdt_lock_handle *lh, int decref); +void mdt_save_lock(struct mdt_thread_info *info, struct lustre_handle *h, + enum ldlm_mode mode, int decref); struct mdt_object *mdt_object_new(const struct lu_env *env, struct mdt_device *, @@ -667,6 +815,16 @@ int mdt_remote_object_lock(struct mdt_thread_info *mti, struct mdt_object *o, const struct lu_fid *fid, struct lustre_handle *lh, enum ldlm_mode mode, __u64 ibits, bool cache); +int mdt_reint_striped_lock(struct mdt_thread_info *info, + struct mdt_object *o, + struct mdt_lock_handle *lh, + __u64 ibits, + struct ldlm_enqueue_info *einfo, + bool cos_incompat); +void mdt_reint_striped_unlock(struct mdt_thread_info *info, + struct mdt_object *o, + struct mdt_lock_handle *lh, + struct ldlm_enqueue_info *einfo, int decref); enum mdt_name_flags { MNF_FIX_ANON = 1, @@ -685,8 +843,9 @@ int mdt_pack_acl2body(struct mdt_thread_info *info, struct mdt_body *repbody, struct mdt_object *o, struct lu_nodemap *nodemap); #endif void mdt_pack_attr2body(struct mdt_thread_info *info, struct mdt_body *b, - const struct lu_attr *attr, const struct lu_fid *fid); - + const struct lu_attr *attr, const struct lu_fid *fid); +int mdt_pack_size2body(struct mdt_thread_info *info, + const struct lu_fid *fid, struct lustre_handle *lh); int mdt_getxattr(struct mdt_thread_info *info); int mdt_reint_setxattr(struct mdt_thread_info *info, struct mdt_lock_handle *lh); @@ -710,18 +869,16 @@ int mdt_export_stats_init(struct obd_device *obd, void *client_nid); int mdt_lock_new_child(struct mdt_thread_info *info, - struct mdt_object *o, - struct mdt_lock_handle *child_lockh); - -void mdt_mfd_set_mode(struct mdt_file_data *mfd, - __u64 mode); - -int mdt_reint_open(struct mdt_thread_info *info, - struct mdt_lock_handle *lhc); - -struct mdt_file_data *mdt_handle2mfd(struct mdt_export_data *med, - const struct lustre_handle *handle, - bool is_replay); + struct mdt_object *o, + struct mdt_lock_handle *child_lockh); +void mdt_mfd_set_mode(struct mdt_file_data *mfd, u64 open_flags); +int mdt_reint_open(struct mdt_thread_info *info, struct mdt_lock_handle *lhc); +struct mdt_file_data *mdt_open_handle2mfd(struct mdt_export_data *med, + const struct lustre_handle *open_handle, + bool is_replay); +int mdt_revoke_remote_lookup_lock(struct mdt_thread_info *info, + struct mdt_object *pobj, + struct mdt_object *obj); int mdt_get_info(struct tgt_session_info *tsi); int mdt_attr_get_complex(struct mdt_thread_info *info, @@ -730,6 +887,8 @@ int mdt_big_xattr_get(struct mdt_thread_info *info, struct mdt_object *o, const char *name); int mdt_stripe_get(struct mdt_thread_info *info, struct mdt_object *o, struct md_attr *ma, const char *name); +int mdt_attr_get_pfid(struct mdt_thread_info *info, struct mdt_object *o, + struct lu_fid *pfid); int mdt_write_get(struct mdt_object *o); void mdt_write_put(struct mdt_object *o); int mdt_write_read(struct mdt_object *o); @@ -743,6 +902,9 @@ int mdt_fix_reply(struct mdt_thread_info *info); int mdt_handle_last_unlink(struct mdt_thread_info *, struct mdt_object *, struct md_attr *); void mdt_reconstruct_open(struct mdt_thread_info *, struct mdt_lock_handle *); +int mdt_layout_change(struct mdt_thread_info *info, struct mdt_object *obj, + struct md_layout_change *spec); +int mdt_device_sync(const struct lu_env *env, struct mdt_device *mdt); struct lu_buf *mdt_buf(const struct lu_env *env, void *area, ssize_t len); const struct lu_buf *mdt_buf_const(const struct lu_env *env, @@ -765,7 +927,15 @@ void mdt_thread_info_init(struct ptlrpc_request *req, struct mdt_thread_info *mti); void mdt_thread_info_fini(struct mdt_thread_info *mti); struct mdt_thread_info *tsi2mdt_info(struct tgt_session_info *tsi); - +void mdt_intent_fixup_resent(struct mdt_thread_info *info, + struct ldlm_lock *new_lock, + struct mdt_lock_handle *lh, __u64 flags); +int mdt_intent_lock_replace(struct mdt_thread_info *info, + struct ldlm_lock **lockp, + struct mdt_lock_handle *lh, + __u64 flags, int result); + +int hsm_init_ucred(struct lu_ucred *uc); int mdt_hsm_attr_set(struct mdt_thread_info *info, struct mdt_object *obj, const struct md_hsm *mh); @@ -776,6 +946,8 @@ int mdt_links_read(struct mdt_thread_info *info, struct linkea_data *ldata); int mdt_close_internal(struct mdt_thread_info *info, struct ptlrpc_request *req, struct mdt_body *repbody); +void mdt_pack_secctx_in_reply(struct mdt_thread_info *info, + struct mdt_object *child); static inline struct mdt_device *mdt_dev(struct lu_device *d) { @@ -837,8 +1009,8 @@ int cdt_llog_process(const struct lu_env *env, struct mdt_device *mdt, llog_cb_t cb, void *data, u32 start_cat_idx, u32 start_rec_idx, int rw); int mdt_agent_record_add(const struct lu_env *env, struct mdt_device *mdt, - __u64 compound_id, __u32 archive_id, - __u64 flags, struct hsm_action_item *hai); + __u32 archive_id, __u64 flags, + struct hsm_action_item *hai); int mdt_agent_record_update(const struct lu_env *env, struct mdt_device *mdt, struct hsm_record_update *updates, unsigned int updates_count); @@ -865,13 +1037,14 @@ int mdt_hsm_find_best_agent(struct coordinator *cdt, __u32 archive, struct obd_uuid *uuid); int mdt_hsm_agent_send(struct mdt_thread_info *mti, struct hsm_action_list *hal, bool purge); -int mdt_hsm_coordinator_update(struct mdt_thread_info *mti, - struct hsm_progress_kernel *pgs); /* mdt/mdt_hsm_cdt_client.c */ int mdt_hsm_add_actions(struct mdt_thread_info *info, struct hsm_action_list *hal); -int mdt_hsm_get_actions(struct mdt_thread_info *mti, - struct hsm_action_list *hal); +int mdt_hsm_get_action(struct mdt_thread_info *mti, + const struct lu_fid *fid, + enum hsm_copytool_action *action, + enum agent_req_status *status, + struct hsm_extent *extent); bool mdt_hsm_restore_is_running(struct mdt_thread_info *mti, const struct lu_fid *fid); /* mdt/mdt_hsm_cdt_requests.c */ @@ -879,8 +1052,8 @@ extern struct cfs_hash_ops cdt_request_cookie_hash_ops; extern struct cfs_hash_ops cdt_agent_record_hash_ops; extern const struct file_operations mdt_hsm_active_requests_fops; void dump_requests(char *prefix, struct coordinator *cdt); -struct cdt_agent_req *mdt_cdt_alloc_request(__u64 compound_id, __u32 archive_id, - __u64 flags, struct obd_uuid *uuid, +struct cdt_agent_req *mdt_cdt_alloc_request(__u32 archive_id, __u64 flags, + struct obd_uuid *uuid, struct hsm_action_item *hai); void mdt_cdt_free_request(struct cdt_agent_req *car); int mdt_cdt_add_request(struct coordinator *cdt, struct cdt_agent_req *new_car); @@ -894,8 +1067,13 @@ int mdt_cdt_remove_request(struct coordinator *cdt, __u64 cookie); /* mdt/mdt_coordinator.c */ void mdt_hsm_dump_hal(int level, const char *prefix, struct hsm_action_list *hal); -struct cdt_restore_handle *mdt_hsm_restore_hdl_find(struct coordinator *cdt, - const struct lu_fid *fid); +int cdt_restore_handle_add(struct mdt_thread_info *mti, struct coordinator *cdt, + const struct lu_fid *fid, + const struct hsm_extent *he); +struct cdt_restore_handle *cdt_restore_handle_find(struct coordinator *cdt, + const struct lu_fid *fid); +void cdt_restore_handle_del(struct mdt_thread_info *mti, + struct coordinator *cdt, const struct lu_fid *fid); /* coordinator management */ int mdt_hsm_cdt_init(struct mdt_device *mdt); int mdt_hsm_cdt_stop(struct mdt_device *mdt); @@ -910,14 +1088,13 @@ static inline void mdt_hsm_cdt_event(struct coordinator *cdt) cdt->cdt_event = true; } -/* coordinator control /proc interface */ -ssize_t mdt_hsm_cdt_control_seq_write(struct file *file, - const char __user *buffer, - size_t count, loff_t *off); -int mdt_hsm_cdt_control_seq_show(struct seq_file *m, void *data); -int hsm_cdt_procfs_init(struct mdt_device *mdt); -void hsm_cdt_procfs_fini(struct mdt_device *mdt); -struct lprocfs_vars *hsm_cdt_get_proc_vars(void); +/* coordinator control sysfs interface */ +ssize_t hsm_control_show(struct kobject *kobj, struct attribute *attr, + char *buf); +ssize_t hsm_control_store(struct kobject *kobj, struct attribute *attr, + const char *buffer, size_t count); +int hsm_cdt_tunables_init(struct mdt_device *mdt); +void hsm_cdt_tunables_fini(struct mdt_device *mdt); /* md_hsm helpers */ struct mdt_object *mdt_hsm_get_md_hsm(struct mdt_thread_info *mti, const struct lu_fid *fid, @@ -929,8 +1106,7 @@ bool mdt_hsm_is_action_compat(const struct hsm_action_item *hai, u32 archive_id, u64 rq_flags, const struct md_hsm *hsm); int mdt_hsm_update_request_state(struct mdt_thread_info *mti, - struct hsm_progress_kernel *pgs, - const int update_record); + struct hsm_progress_kernel *pgs); int mdt_close_swap_layouts(struct mdt_thread_info *info, struct mdt_object *o, struct md_attr *ma); @@ -963,10 +1139,9 @@ static inline struct mdt_device *mdt_exp2dev(struct obd_export *exp) static inline bool mdt_rdonly(struct obd_export *exp) { - if (exp_connect_flags(exp) & OBD_CONNECT_RDONLY || - mdt_exp2dev(exp)->mdt_bottom->dd_rdonly) - return true; - return false; + return (exp_connect_flags(exp) & OBD_CONNECT_RDONLY || + mdt_exp2dev(exp)->mdt_bottom->dd_rdonly || + mdt_exp2dev(exp)->mdt_readonly); } typedef void (*mdt_reconstruct_t)(struct mdt_thread_info *mti, @@ -1008,22 +1183,30 @@ static inline int is_identity_get_disabled(struct upcall_cache *cache) int mdt_blocking_ast(struct ldlm_lock*, struct ldlm_lock_desc*, void*, int); +static int mdt_dom_glimpse_ast(struct ldlm_lock *lock, void *reqp) +{ + return -ELDLM_NO_LOCK_DATA; +} + /* Issues dlm lock on passed @ns, @f stores it lock handle into @lh. */ -static inline int mdt_fid_lock(struct ldlm_namespace *ns, +static inline int mdt_fid_lock(const struct lu_env *env, + struct ldlm_namespace *ns, struct lustre_handle *lh, enum ldlm_mode mode, union ldlm_policy_data *policy, const struct ldlm_res_id *res_id, __u64 flags, const __u64 *client_cookie) { int rc; + bool glimpse = policy->l_inodebits.bits & MDS_INODELOCK_DOM; LASSERT(ns != NULL); LASSERT(lh != NULL); - rc = ldlm_cli_enqueue_local(ns, res_id, LDLM_IBITS, policy, + rc = ldlm_cli_enqueue_local(env, ns, res_id, LDLM_IBITS, policy, mode, &flags, mdt_blocking_ast, - ldlm_completion_ast, NULL, NULL, 0, - LVB_T_NONE, client_cookie, lh); + ldlm_completion_ast, + glimpse ? mdt_dom_glimpse_ast : NULL, + NULL, 0, LVB_T_NONE, client_cookie, lh); return rc == ELDLM_OK ? 0 : -EIO; } @@ -1054,15 +1237,27 @@ static inline enum ldlm_mode mdt_mdl_mode2dlm_mode(mdl_mode_t mode) return mdt_dlm_lock_modes[mode]; } +/* mdt_som.c */ +int mdt_set_som(struct mdt_thread_info *info, struct mdt_object *obj, + enum lustre_som_flags flag, __u64 size, __u64 blocks); +int mdt_get_som(struct mdt_thread_info *info, struct mdt_object *obj, + struct md_attr *ma); +int mdt_lsom_downgrade(struct mdt_thread_info *info, struct mdt_object *obj); +int mdt_lsom_update(struct mdt_thread_info *info, struct mdt_object *obj, + bool truncate); + /* mdt_lvb.c */ extern struct ldlm_valblock_ops mdt_lvbo; +int mdt_dom_lvb_is_valid(struct ldlm_resource *res); +int mdt_dom_lvbo_update(struct ldlm_resource *res, struct ldlm_lock *lock, + struct ptlrpc_request *req, bool increase_only); -void mdt_enable_cos(struct mdt_device *, int); +void mdt_enable_cos(struct mdt_device *dev, bool enable); int mdt_cos_is_enabled(struct mdt_device *); /* lprocfs stuff */ -enum { - LPROC_MDT_OPEN = 0, +enum mdt_stat_idx { + LPROC_MDT_OPEN, LPROC_MDT_CLOSE, LPROC_MDT_MKNOD, LPROC_MDT_LINK, @@ -1076,14 +1271,18 @@ enum { LPROC_MDT_SETXATTR, LPROC_MDT_STATFS, LPROC_MDT_SYNC, - LPROC_MDT_SAMEDIR_RENAME, - LPROC_MDT_CROSSDIR_RENAME, - LPROC_MDT_LAST, + LPROC_MDT_SAMEDIR_RENAME, + LPROC_MDT_CROSSDIR_RENAME, + LPROC_MDT_IO_READ, + LPROC_MDT_IO_WRITE, + LPROC_MDT_IO_PUNCH, + LPROC_MDT_LAST, }; + void mdt_counter_incr(struct ptlrpc_request *req, int opcode); void mdt_stats_counter_init(struct lprocfs_stats *stats); -int mdt_procfs_init(struct mdt_device *mdt, const char *name); -void mdt_procfs_fini(struct mdt_device *mdt); +int mdt_tunables_init(struct mdt_device *mdt, const char *name); +void mdt_tunables_fini(struct mdt_device *mdt); /* lustre/mdt_mdt_lproc.c */ int lprocfs_mdt_open_files_seq_open(struct inode *inode, @@ -1119,4 +1318,53 @@ static inline char *mdt_req_get_jobid(struct ptlrpc_request *req) return jobid; } +/* MDT IO */ + +#define VALID_FLAGS (LA_TYPE | LA_MODE | LA_SIZE | LA_BLOCKS | \ + LA_BLKSIZE | LA_ATIME | LA_MTIME | LA_CTIME) + +int mdt_obd_preprw(const struct lu_env *env, int cmd, struct obd_export *exp, + struct obdo *oa, int objcount, struct obd_ioobj *obj, + struct niobuf_remote *rnb, int *nr_local, + struct niobuf_local *lnb); + +int mdt_obd_commitrw(const struct lu_env *env, int cmd, struct obd_export *exp, + struct obdo *oa, int objcount, struct obd_ioobj *obj, + struct niobuf_remote *rnb, int npages, + struct niobuf_local *lnb, int old_rc); +int mdt_punch_hdl(struct tgt_session_info *tsi); +int mdt_glimpse_enqueue(struct mdt_thread_info *mti, struct ldlm_namespace *ns, + struct ldlm_lock **lockp, __u64 flags); +int mdt_brw_enqueue(struct mdt_thread_info *info, struct ldlm_namespace *ns, + struct ldlm_lock **lockp, __u64 flags); +int mdt_dom_read_on_open(struct mdt_thread_info *mti, struct mdt_device *mdt, + struct lustre_handle *lh); +void mdt_dom_discard_data(struct mdt_thread_info *info, struct mdt_object *mo); +int mdt_dom_disk_lvbo_update(const struct lu_env *env, struct mdt_object *mo, + struct ldlm_resource *res, bool increase_only); +void mdt_dom_obj_lvb_update(const struct lu_env *env, struct mdt_object *mo, + bool increase_only); +int mdt_dom_lvb_alloc(struct ldlm_resource *res); + +static inline bool mdt_dom_check_for_discard(struct mdt_thread_info *mti, + struct mdt_object *mo) +{ + return lu_object_is_dying(&mo->mot_header) && + S_ISREG(lu_object_attr(&mo->mot_obj)); +} + +int mdt_dom_object_size(const struct lu_env *env, struct mdt_device *mdt, + const struct lu_fid *fid, struct mdt_body *mb, + bool dom_lock); +bool mdt_dom_client_has_lock(struct mdt_thread_info *info, + const struct lu_fid *fid); +void mdt_hp_brw(struct tgt_session_info *tsi); +void mdt_hp_punch(struct tgt_session_info *tsi); +int mdt_data_version_get(struct tgt_session_info *tsi); + +/* grants */ +long mdt_grant_connect(const struct lu_env *env, struct obd_export *exp, + u64 want, bool conservative); +extern struct kmem_cache *ldlm_glimpse_work_kmem; + #endif /* _MDT_INTERNAL_H */