X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fofd%2Fofd_internal.h;h=c0f17c4c27e405f6484f9158536a8356c28004dc;hp=9285a1f877c34d58a94ae87d8b438ed32e220375;hb=2b905746ee3b5d9dbafcdb1af5930aea18120a7b;hpb=9761d5c52aeef31a8c3112c3fb3ec9e24b37c800 diff --git a/lustre/ofd/ofd_internal.h b/lustre/ofd/ofd_internal.h index 9285a1f..c0f17c4 100644 --- a/lustre/ofd/ofd_internal.h +++ b/lustre/ofd/ofd_internal.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -27,7 +23,7 @@ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2012, 2013, Intel Corporation. + * Copyright (c) 2012, 2017, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -42,13 +38,8 @@ #include #include #include -#include -#include #define OFD_INIT_OBJID 0 -#define OFD_ROCOMPAT_SUPP (0) -#define OFD_INCOMPAT_SUPP (OBD_INCOMPAT_GROUPS | OBD_INCOMPAT_OST | \ - OBD_INCOMPAT_COMMON_LR) #define OFD_PRECREATE_BATCH_DEFAULT (OBJ_SUBDIR_COUNT * 4) /* on small filesystems we should not precreate too many objects in @@ -60,18 +51,6 @@ #define OFD_VALID_FLAGS (LA_TYPE | LA_MODE | LA_SIZE | LA_BLOCKS | \ LA_BLKSIZE | LA_ATIME | LA_MTIME | LA_CTIME) -/* per-client-per-object persistent state (LRU) */ -struct ofd_mod_data { - cfs_list_t fmd_list; /* linked to fed_mod_list */ - struct lu_fid fmd_fid; /* FID being written to */ - __u64 fmd_mactime_xid; /* xid highest {m,a,c}time setattr */ - cfs_time_t fmd_expire; /* time when the fmd should expire */ - int fmd_refcount; /* reference counter - list holds 1 */ -}; - -#define OFD_FMD_MAX_NUM_DEFAULT 128 -#define OFD_FMD_MAX_AGE_DEFAULT ((obd_timeout + 10) * HZ) - #define OFD_SOFT_SYNC_LIMIT_DEFAULT 16 /* request stats */ @@ -109,82 +88,67 @@ static inline void ofd_counter_incr(struct obd_export *exp, int opcode, } struct ofd_seq { - cfs_list_t os_list; + struct list_head os_list; struct ost_id os_oi; spinlock_t os_last_oid_lock; struct mutex os_create_lock; - cfs_atomic_t os_refc; + atomic_t os_refc; + atomic_t os_precreate_in_progress; struct dt_object *os_lastid_obj; - unsigned long os_destroys_in_progress:1; + unsigned long os_destroys_in_progress:1, + os_last_id_synced:1; }; struct ofd_device { struct dt_device ofd_dt_dev; struct dt_device *ofd_osd; struct obd_export *ofd_osd_exp; - struct dt_device_param ofd_dt_conf; /* DLM name-space for meta-data locks maintained by this server */ struct ldlm_namespace *ofd_namespace; /* last_rcvd file */ struct lu_target ofd_lut; struct dt_object *ofd_health_check_file; + struct local_oid_storage *ofd_los; - int ofd_subdir_count; + __u64 ofd_inconsistency_self_detected; + __u64 ofd_inconsistency_self_repaired; - cfs_list_t ofd_seq_list; + struct list_head ofd_seq_list; rwlock_t ofd_seq_list_lock; int ofd_seq_count; int ofd_precreate_batch; spinlock_t ofd_batch_lock; - /* protect all statfs-related counters */ - spinlock_t ofd_osfs_lock; - /* statfs optimization: we cache a bit */ - struct obd_statfs ofd_osfs; - __u64 ofd_osfs_age; - int ofd_blockbits; - /* writes which might be be accounted twice in ofd_osfs.os_bavail */ - obd_size ofd_osfs_unstable; - - /* counters used during statfs update, protected by ofd_osfs_lock. - * record when some statfs refresh are in progress */ - int ofd_statfs_inflight; - /* track writes completed while statfs refresh is underway. - * tracking is only effective when ofd_statfs_inflight > 1 */ - obd_size ofd_osfs_inflight; - - /* grants: all values in bytes */ - /* grant lock to protect all grant counters */ - spinlock_t ofd_grant_lock; - /* total amount of dirty data reported by clients in incoming obdo */ - obd_size ofd_tot_dirty; - /* sum of filesystem space granted to clients for async writes */ - obd_size ofd_tot_granted; - /* grant used by I/Os in progress (between prepare and commit) */ - obd_size ofd_tot_pending; - /* free space threshold over which we stop granting space to clients - * ofd_grant_ratio is stored as a fixed-point fraction using - * OFD_GRANT_RATIO_SHIFT of the remaining free space, not in percentage - * values */ - int ofd_grant_ratio; - /* number of clients using grants */ - int ofd_tot_granted_clients; - - /* ofd mod data: ofd_device wide values */ - int ofd_fmd_max_num; /* per ofd ofd_mod_data */ - cfs_duration_t ofd_fmd_max_age; /* time to fmd expiry */ + /* preferred BRW size, decided by storage type and capability */ + __u32 ofd_brw_size; + /* checksum types supported on this node */ + enum cksum_types ofd_cksum_types_supported; spinlock_t ofd_flags_lock; unsigned long ofd_raid_degraded:1, /* sync journal on writes */ - ofd_syncjournal:1, - /* shall we grant space to clients not - * supporting OBD_CONNECT_GRANT_PARAM? */ - ofd_grant_compat_disable:1; + ofd_sync_journal:1, + /* Protected by ofd_lastid_rwsem. */ + ofd_lastid_rebuilding:1, + ofd_record_fid_accessed:1, + ofd_lfsck_verify_pfid:1, + ofd_skip_lfsck:1, + /* Whether to enforce T10PI checksum of RPC */ + ofd_checksum_t10pi_enforce:1; struct seq_server_site ofd_seq_site; /* the limit of SOFT_SYNC RPCs that will trigger a soft sync */ unsigned int ofd_soft_sync_limit; + /* Protect ::ofd_lastid_rebuilding */ + struct rw_semaphore ofd_lastid_rwsem; + __u64 ofd_lastid_gen; + struct ptlrpc_thread ofd_inconsistency_thread; + struct list_head ofd_inconsistency_list; + spinlock_t ofd_inconsistency_lock; + /* Backwards compatibility */ + struct attribute *ofd_read_cache_enable; + struct attribute *ofd_read_cache_max_filesize; + struct attribute *ofd_write_cache_enable; }; static inline struct ofd_device *ofd_dev(struct lu_device *d) @@ -210,7 +174,9 @@ static inline char *ofd_name(struct ofd_device *ofd) struct ofd_object { struct lu_object_header ofo_header; struct dt_object ofo_obj; - int ofo_ff_exists; + struct filter_fid ofo_ff; + unsigned int ofo_pfid_checking:1, + ofo_pfid_verified:1; }; static inline struct ofd_object *ofd_obj(struct lu_object *o) @@ -243,13 +209,6 @@ static inline struct ofd_device *ofd_obj2dev(const struct ofd_object *fo) return ofd_dev(fo->ofo_obj.do_lu.lo_dev); } -static inline struct lustre_capa *ofd_object_capa(const struct lu_env *env, - const struct ofd_object *obj) -{ - /* TODO: see mdd_object_capa() */ - return BYPASS_CAPA; -} - static inline void ofd_read_lock(const struct lu_env *env, struct ofd_object *fo) { @@ -311,43 +270,37 @@ struct ofd_thread_info { struct lu_buf fti_buf; loff_t fti_off; - /* Space used by the I/O, used by grant code */ - unsigned long fti_used; struct ost_lvb fti_lvb; + union { + struct lfsck_req_local fti_lrl; + struct obd_connect_data fti_ocd; + }; }; extern void target_recovery_fini(struct obd_device *obd); extern void target_recovery_init(struct lu_target *lut, svc_handler_t handler); -/* ofd_capa.c */ -int ofd_update_capa_key(struct ofd_device *ofd, struct lustre_capa_key *key); -int ofd_auth_capa(struct obd_export *exp, struct lu_fid *fid, obd_seq seq, - struct lustre_capa *capa, __u64 opc); -void ofd_free_capa_keys(struct ofd_device *ofd); - /* ofd_dev.c */ extern struct lu_context_key ofd_thread_key; int ofd_postrecov(const struct lu_env *env, struct ofd_device *ofd); +int ofd_fiemap_get(const struct lu_env *env, struct ofd_device *ofd, + struct lu_fid *fid, struct fiemap *fiemap); /* ofd_obd.c */ extern struct obd_ops ofd_obd_ops; -int ofd_statfs_internal(const struct lu_env *env, struct ofd_device *ofd, - struct obd_statfs *osfs, __u64 max_age, - int *from_cache); -int ofd_orphans_destroy(const struct lu_env *env, struct obd_export *exp, - struct ofd_device *ofd, struct obdo *oa); int ofd_destroy_by_fid(const struct lu_env *env, struct ofd_device *ofd, const struct lu_fid *fid, int orphan); int ofd_statfs(const struct lu_env *env, struct obd_export *exp, - struct obd_statfs *osfs, __u64 max_age, __u32 flags); + struct obd_statfs *osfs, time64_t max_age, __u32 flags); +int ofd_obd_disconnect(struct obd_export *exp); /* ofd_fs.c */ -obd_id ofd_seq_last_oid(struct ofd_seq *oseq); -void ofd_seq_last_oid_set(struct ofd_seq *oseq, obd_id id); +u64 ofd_seq_last_oid(struct ofd_seq *oseq); +void ofd_seq_last_oid_set(struct ofd_seq *oseq, u64 id); int ofd_seq_last_oid_write(const struct lu_env *env, struct ofd_device *ofd, struct ofd_seq *oseq); int ofd_seqs_init(const struct lu_env *env, struct ofd_device *ofd); -struct ofd_seq *ofd_seq_get(struct ofd_device *ofd, obd_seq seq); +struct ofd_seq *ofd_seq_get(struct ofd_device *ofd, u64 seq); void ofd_seq_put(const struct lu_env *env, struct ofd_seq *oseq); int ofd_fs_setup(const struct lu_env *env, struct ofd_device *ofd, @@ -355,20 +308,25 @@ int ofd_fs_setup(const struct lu_env *env, struct ofd_device *ofd, void ofd_fs_cleanup(const struct lu_env *env, struct ofd_device *ofd); int ofd_precreate_batch(struct ofd_device *ofd, int batch); struct ofd_seq *ofd_seq_load(const struct lu_env *env, struct ofd_device *ofd, - obd_seq seq); + u64 seq); void ofd_seqs_fini(const struct lu_env *env, struct ofd_device *ofd); +void ofd_seqs_free(const struct lu_env *env, struct ofd_device *ofd); /* ofd_io.c */ +int ofd_start_inconsistency_verification_thread(struct ofd_device *ofd); +int ofd_stop_inconsistency_verification_thread(struct ofd_device *ofd); +int ofd_verify_ff(const struct lu_env *env, struct ofd_object *fo, + struct obdo *oa); +int ofd_verify_layout_version(const struct lu_env *env, + struct ofd_object *fo, const struct obdo *oa); int ofd_preprw(const struct lu_env *env,int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, struct niobuf_remote *rnb, int *nr_local, - struct niobuf_local *lnb, struct obd_trans_info *oti, - struct lustre_capa *capa); + struct niobuf_local *lnb); int ofd_commitrw(const struct lu_env *env, int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, struct niobuf_remote *rnb, int npages, - struct niobuf_local *lnb, struct obd_trans_info *oti, - int old_rc); + struct niobuf_local *lnb, int old_rc); /* ofd_trans.c */ struct thandle *ofd_trans_create(const struct lu_env *env, @@ -376,20 +334,16 @@ struct thandle *ofd_trans_create(const struct lu_env *env, int ofd_trans_start(const struct lu_env *env, struct ofd_device *ofd, struct ofd_object *fo, struct thandle *th); -void ofd_trans_stop(const struct lu_env *env, struct ofd_device *ofd, +int ofd_trans_stop(const struct lu_env *env, struct ofd_device *ofd, struct thandle *th, int rc); int ofd_txn_stop_cb(const struct lu_env *env, struct thandle *txn, void *cookie); /* lproc_ofd.c */ -#ifdef LPROCFS -void lprocfs_ofd_init_vars(struct lprocfs_static_vars *lvars); +int ofd_tunables_init(struct ofd_device *ofd); +#ifdef CONFIG_PROC_FS void ofd_stats_counter_init(struct lprocfs_stats *stats); #else -static void lprocfs_ofd_init_vars(struct lprocfs_static_vars *lvars) -{ - memset(lvars, 0, sizeof(*lvars)); -} static inline void ofd_stats_counter_init(struct lprocfs_stats *stats) {} #endif @@ -397,30 +351,32 @@ static inline void ofd_stats_counter_init(struct lprocfs_stats *stats) {} struct ofd_object *ofd_object_find(const struct lu_env *env, struct ofd_device *ofd, const struct lu_fid *fid); -struct ofd_object *ofd_object_find_or_create(const struct lu_env *env, - struct ofd_device *ofd, - const struct lu_fid *fid, - struct lu_attr *attr); -int ofd_object_ff_check(const struct lu_env *env, struct ofd_object *fo); +int ofd_object_ff_load(const struct lu_env *env, struct ofd_object *fo); +int ofd_object_ff_update(const struct lu_env *env, struct ofd_object *fo, + const struct obdo *oa, struct filter_fid *ff); int ofd_precreate_objects(const struct lu_env *env, struct ofd_device *ofd, - obd_id id, struct ofd_seq *oseq, int nr, int sync); + u64 id, struct ofd_seq *oseq, int nr, int sync); -void ofd_object_put(const struct lu_env *env, struct ofd_object *fo); +static inline void ofd_object_put(const struct lu_env *env, + struct ofd_object *fo) +{ + dt_object_put(env, &fo->ofo_obj); +} int ofd_attr_set(const struct lu_env *env, struct ofd_object *fo, - struct lu_attr *la, struct filter_fid *ff); + struct lu_attr *la, struct obdo *oa); int ofd_object_punch(const struct lu_env *env, struct ofd_object *fo, __u64 start, __u64 end, struct lu_attr *la, - struct filter_fid *ff); -int ofd_object_destroy(const struct lu_env *, struct ofd_object *, int); + struct obdo *oa); +int ofd_destroy(const struct lu_env *, struct ofd_object *, int); int ofd_attr_get(const struct lu_env *env, struct ofd_object *fo, struct lu_attr *la); -int ofd_attr_handle_ugid(const struct lu_env *env, struct ofd_object *fo, +int ofd_attr_handle_id(const struct lu_env *env, struct ofd_object *fo, struct lu_attr *la, int is_setattr); static inline struct ofd_object *ofd_object_find_exists(const struct lu_env *env, struct ofd_device *ofd, - struct lu_fid *fid) + const struct lu_fid *fid) { struct ofd_object *fo; @@ -432,76 +388,6 @@ struct ofd_object *ofd_object_find_exists(const struct lu_env *env, return fo; } -/* ofd_grants.c */ -#define OFD_GRANT_RATIO_SHIFT 8 -static inline __u64 ofd_grant_reserved(struct ofd_device *ofd, obd_size bavail) -{ - return (bavail * ofd->ofd_grant_ratio) >> OFD_GRANT_RATIO_SHIFT; -} - -static inline int ofd_grant_ratio_conv(int percentage) -{ - return (percentage << OFD_GRANT_RATIO_SHIFT) / 100; -} - -static inline int ofd_grant_param_supp(struct obd_export *exp) -{ - return !!(exp_connect_flags(exp) & OBD_CONNECT_GRANT_PARAM); -} - -/* Blocksize used for client not supporting OBD_CONNECT_GRANT_PARAM. - * That's 4KB=2^12 which is the biggest block size known to work whatever - * the client's page size is. */ -#define COMPAT_BSIZE_SHIFT 12 -static inline int ofd_grant_compat(struct obd_export *exp, - struct ofd_device *ofd) -{ - /* Clients which don't support OBD_CONNECT_GRANT_PARAM cannot handle - * a block size > page size and consume PAGE_CACHE_SIZE of grant when - * dirtying a page regardless of the block size */ - return !!(ofd_obd(ofd)->obd_self_export != exp && - ofd->ofd_blockbits > COMPAT_BSIZE_SHIFT && - !ofd_grant_param_supp(exp)); -} - -static inline int ofd_grant_prohibit(struct obd_export *exp, - struct ofd_device *ofd) -{ - /* When ofd_grant_compat_disable is set, we don't grant any space to - * clients not supporting OBD_CONNECT_GRANT_PARAM. - * Otherwise, space granted to such a client is inflated since it - * consumes PAGE_CACHE_SIZE of grant space per block */ - return !!(ofd_grant_compat(exp, ofd) && ofd->ofd_grant_compat_disable); -} - -void ofd_grant_sanity_check(struct obd_device *obd, const char *func); -long ofd_grant_connect(const struct lu_env *env, struct obd_export *exp, - obd_size want, bool conservative); -void ofd_grant_discard(struct obd_export *exp); -void ofd_grant_prepare_read(const struct lu_env *env, struct obd_export *exp, - struct obdo *oa); -void ofd_grant_prepare_write(const struct lu_env *env, struct obd_export *exp, - struct obdo *oa, struct niobuf_remote *rnb, - int niocount); -void ofd_grant_commit(const struct lu_env *env, struct obd_export *exp, int rc); -int ofd_grant_create(const struct lu_env *env, struct obd_export *exp, int *nr); - -/* ofd_fmd.c */ -int ofd_fmd_init(void); -void ofd_fmd_exit(void); -struct ofd_mod_data *ofd_fmd_find(struct obd_export *exp, - struct lu_fid *fid); -struct ofd_mod_data *ofd_fmd_get(struct obd_export *exp, - struct lu_fid *fid); -void ofd_fmd_put(struct obd_export *exp, struct ofd_mod_data *fmd); -void ofd_fmd_expire(struct obd_export *exp); -void ofd_fmd_cleanup(struct obd_export *exp); -#ifdef DO_FMD_DROP -void ofd_fmd_drop(struct obd_export *exp, struct lu_fid *fid); -#else -#define ofd_fmd_drop(exp, fid) do {} while (0) -#endif - /* ofd_dev.c */ int ofd_fid_set_index(const struct lu_env *env, struct ofd_device *ofd, int index); @@ -512,9 +398,11 @@ int ofd_fid_fini(const struct lu_env *env, struct ofd_device *ofd); extern struct ldlm_valblock_ops ofd_lvbo; /* ofd_dlm.c */ -int ofd_intent_policy(struct ldlm_namespace *ns, struct ldlm_lock **lockp, - void *req_cookie, ldlm_mode_t mode, __u64 flags, - void *data); +extern struct kmem_cache *ldlm_glimpse_work_kmem; + +int ofd_intent_policy(const struct lu_env *env, struct ldlm_namespace *ns, + struct ldlm_lock **lockp, void *req_cookie, + enum ldlm_mode mode, __u64 flags, void *data); static inline struct ofd_thread_info *ofd_info(const struct lu_env *env) { @@ -566,50 +454,29 @@ static inline struct ofd_thread_info *tsi2ofd_info(struct tgt_session_info *tsi) return info; } -static inline void ofd_oti2info(struct ofd_thread_info *info, - struct obd_trans_info *oti) -{ - info->fti_xid = oti->oti_xid; - info->fti_pre_version = oti->oti_pre_version; -} - -static inline void ofd_info2oti(struct ofd_thread_info *info, - struct obd_trans_info *oti) -{ - oti->oti_xid = info->fti_xid; - oti->oti_pre_version = info->fti_pre_version; -} - /* sync on lock cancel is useless when we force a journal flush, * and if we enable async journal commit, we should also turn on * sync on lock cancel if it is not enabled already. */ static inline void ofd_slc_set(struct ofd_device *ofd) { - if (ofd->ofd_syncjournal == 1) + if (ofd->ofd_sync_journal == 1) ofd->ofd_lut.lut_sync_lock_cancel = NEVER_SYNC_ON_CANCEL; else if (ofd->ofd_lut.lut_sync_lock_cancel == NEVER_SYNC_ON_CANCEL) ofd->ofd_lut.lut_sync_lock_cancel = ALWAYS_SYNC_ON_CANCEL; } -static inline void ofd_prepare_fidea(struct filter_fid *ff, - const struct obdo *oa) +static inline int ofd_validate_seq(struct obd_export *exp, __u64 seq) { - /* packing fid and converting it to LE for storing into EA. - * Here ->o_stripe_idx should be filled by LOV and rest of - * fields - by client. */ - ff->ff_parent.f_seq = cpu_to_le64(oa->o_parent_seq); - ff->ff_parent.f_oid = cpu_to_le32(oa->o_parent_oid); - /* XXX: we are ignoring o_parent_ver here, since this should - * be the same for all objects in this fileset. */ - ff->ff_parent.f_ver = cpu_to_le32(oa->o_stripe_idx); -} + struct filter_export_data *fed = &exp->exp_filter_data; -/* niobuf_remote has no rnb_ prefix in master */ -#define rnb_offset offset -#define rnb_flags flags -#define rnb_len len -/* the same for niobuf_local */ -#define lnb_flags flags -#define lnb_rc rc + if (unlikely(seq == FID_SEQ_OST_MDT0 && fed->fed_group != 0)) { + /* IDIF request only operates on MDT0 group */ + CERROR("%s: Invalid sequence %#llx for group %u\n", + exp->exp_obd->obd_name, seq, fed->fed_group); + RETURN(-EINVAL); + } + + return 0; +} #endif /* _OFD_INTERNAL_H */