*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2012, 2014, Intel Corporation.
+ * Copyright (c) 2012, 2017, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
*/
#ifndef _OFD_INTERNAL_H
#define OFD_VALID_FLAGS (LA_TYPE | LA_MODE | LA_SIZE | LA_BLOCKS | \
LA_BLKSIZE | LA_ATIME | LA_MTIME | LA_CTIME)
-/* per-client-per-object persistent state (LRU) */
-struct ofd_mod_data {
- struct list_head fmd_list; /* linked to fed_mod_list */
- struct lu_fid fmd_fid; /* FID being written to */
- __u64 fmd_mactime_xid; /* xid highest {m,a,c}time setattr */
- cfs_time_t fmd_expire; /* time when the fmd should expire */
- int fmd_refcount; /* reference counter - list holds 1 */
-};
-
-#define OFD_FMD_MAX_NUM_DEFAULT 128
-#define OFD_FMD_MAX_AGE_DEFAULT msecs_to_jiffies((obd_timeout+10)*MSEC_PER_SEC)
-
#define OFD_SOFT_SYNC_LIMIT_DEFAULT 16
+/*
+ * update atime if on-disk value older than client's one
+ * by OFD_ATIME_DIFF or more
+ */
+#define OFD_DEF_ATIME_DIFF 0 /* disabled */
+
/* request stats */
enum {
- LPROC_OFD_STATS_READ = 0,
+ LPROC_OFD_STATS_READ_BYTES = 0,
+ LPROC_OFD_STATS_WRITE_BYTES,
+ LPROC_OFD_STATS_READ,
LPROC_OFD_STATS_WRITE,
LPROC_OFD_STATS_GETATTR,
LPROC_OFD_STATS_SETATTR,
LPROC_OFD_STATS_GET_INFO,
LPROC_OFD_STATS_SET_INFO,
LPROC_OFD_STATS_QUOTACTL,
+ LPROC_OFD_STATS_PREALLOC,
LPROC_OFD_STATS_LAST,
};
if (exp->exp_obd && exp->exp_obd->obd_stats)
lprocfs_counter_add(exp->exp_obd->obd_stats, opcode, amount);
- if (exp->exp_obd && exp->exp_obd->u.obt.obt_jobstats.ojs_hash &&
+ if (exp->exp_obd && obd2obt(exp->exp_obd)->obt_jobstats.ojs_hash &&
(exp_connect_flags(exp) & OBD_CONNECT_JOBSTATS))
lprocfs_job_stats_log(exp->exp_obd, jobid, opcode, amount);
struct ost_id os_oi;
spinlock_t os_last_oid_lock;
struct mutex os_create_lock;
- atomic_t os_refc;
+ refcount_t os_refc;
+ atomic_t os_precreate_in_progress;
struct dt_object *os_lastid_obj;
- unsigned long os_destroys_in_progress:1;
+ unsigned long os_destroys_in_progress:1,
+ os_last_id_synced:1;
};
struct ofd_device {
struct dt_device ofd_dt_dev;
struct dt_device *ofd_osd;
struct obd_export *ofd_osd_exp;
- struct dt_device_param ofd_dt_conf;
/* DLM name-space for meta-data locks maintained by this server */
struct ldlm_namespace *ofd_namespace;
/* last_rcvd file */
struct lu_target ofd_lut;
struct dt_object *ofd_health_check_file;
+ struct local_oid_storage *ofd_los;
- int ofd_subdir_count;
__u64 ofd_inconsistency_self_detected;
__u64 ofd_inconsistency_self_repaired;
+ struct ofd_access_log *ofd_access_log;
+ unsigned int ofd_access_log_size;
+ unsigned int ofd_access_log_mask;
+
struct list_head ofd_seq_list;
rwlock_t ofd_seq_list_lock;
int ofd_seq_count;
int ofd_precreate_batch;
spinlock_t ofd_batch_lock;
- /* protect all statfs-related counters */
- spinlock_t ofd_osfs_lock;
- /* statfs optimization: we cache a bit */
- struct obd_statfs ofd_osfs;
- __u64 ofd_osfs_age;
- int ofd_blockbits;
- /* writes which might be be accounted twice in ofd_osfs.os_bavail */
- u64 ofd_osfs_unstable;
-
- /* counters used during statfs update, protected by ofd_osfs_lock.
- * record when some statfs refresh are in progress */
- int ofd_statfs_inflight;
- /* track writes completed while statfs refresh is underway.
- * tracking is only effective when ofd_statfs_inflight > 1 */
- u64 ofd_osfs_inflight;
-
- /* grants: all values in bytes */
- /* grant lock to protect all grant counters */
- spinlock_t ofd_grant_lock;
- /* total amount of dirty data reported by clients in incoming obdo */
- u64 ofd_tot_dirty;
- /* sum of filesystem space granted to clients for async writes */
- u64 ofd_tot_granted;
- /* grant used by I/Os in progress (between prepare and commit) */
- u64 ofd_tot_pending;
- /* free space threshold over which we stop granting space to clients
- * ofd_grant_ratio is stored as a fixed-point fraction using
- * OFD_GRANT_RATIO_SHIFT of the remaining free space, not in percentage
- * values */
- int ofd_grant_ratio;
- /* number of clients using grants */
- int ofd_tot_granted_clients;
-
- /* ofd mod data: ofd_device wide values */
- int ofd_fmd_max_num; /* per ofd ofd_mod_data */
- cfs_duration_t ofd_fmd_max_age; /* time to fmd expiry */
-
+ /* preferred BRW size, decided by storage type and capability */
+ __u32 ofd_brw_size;
spinlock_t ofd_flags_lock;
unsigned long ofd_raid_degraded:1,
/* sync journal on writes */
- ofd_syncjournal:1,
- /* shall we grant space to clients not
- * supporting OBD_CONNECT_GRANT_PARAM? */
- ofd_grant_compat_disable:1,
+ ofd_sync_journal:1,
/* Protected by ofd_lastid_rwsem. */
ofd_lastid_rebuilding:1,
ofd_record_fid_accessed:1,
- ofd_lfsck_verify_pfid:1;
+ ofd_lfsck_verify_pfid:1,
+ ofd_skip_lfsck:1;
struct seq_server_site ofd_seq_site;
/* the limit of SOFT_SYNC RPCs that will trigger a soft sync */
unsigned int ofd_soft_sync_limit;
/* Protect ::ofd_lastid_rebuilding */
struct rw_semaphore ofd_lastid_rwsem;
__u64 ofd_lastid_gen;
- struct ptlrpc_thread ofd_inconsistency_thread;
+ struct task_struct *ofd_inconsistency_task;
struct list_head ofd_inconsistency_list;
spinlock_t ofd_inconsistency_lock;
+ /* Backwards compatibility */
+ struct attribute *ofd_read_cache_enable;
+ struct attribute *ofd_read_cache_max_filesize;
+ struct attribute *ofd_write_cache_enable;
+ time64_t ofd_atime_diff;
};
static inline struct ofd_device *ofd_dev(struct lu_device *d)
{
- return container_of0(d, struct ofd_device, ofd_dt_dev.dd_lu_dev);
+ return container_of_safe(d, struct ofd_device, ofd_dt_dev.dd_lu_dev);
}
static inline struct obd_device *ofd_obd(struct ofd_device *ofd)
return ofd->ofd_dt_dev.dd_lu_dev.ld_obd->obd_name;
}
+/**
+ * for compatibility, filter_fid could occupy more space in newer version and
+ * downgraded Lustre would fail reading it with -ERANGE, so it can read it
+ * again with more space to hold it.
+ */
+#define FILTER_FID_EXTRA_SIZE 32
+
struct ofd_object {
struct lu_object_header ofo_header;
struct dt_object ofo_obj;
- struct lu_fid ofo_pfid;
+ struct filter_fid ofo_ff;
+ time64_t ofo_atime_ondisk;
unsigned int ofo_pfid_checking:1,
ofo_pfid_verified:1;
};
static inline struct ofd_object *ofd_obj(struct lu_object *o)
{
- return container_of0(o, struct ofd_object, ofo_obj.do_lu);
+ return container_of_safe(o, struct ofd_object, ofo_obj.do_lu);
}
static inline int ofd_object_exists(struct ofd_object *obj)
{
struct lu_object *lu = &(_obj)->ofo_obj.do_lu;
- return container_of0(lu_object_next(lu), struct dt_object, do_lu);
+ return container_of(lu_object_next(lu), struct dt_object, do_lu);
}
static inline struct ofd_device *ofd_obj2dev(const struct ofd_object *fo)
struct lu_attr fti_attr;
struct lu_attr fti_attr2;
struct ldlm_res_id fti_resid;
- union {
- struct filter_fid fti_mds_fid;
- struct filter_fid_old fti_mds_fid_old;
- };
+ struct filter_fid fti_mds_fid;
struct ost_id fti_ostid;
struct ofd_object *fti_obj;
union {
struct lu_buf fti_buf;
loff_t fti_off;
- /* Space used by the I/O, used by grant code */
- unsigned long fti_used;
struct ost_lvb fti_lvb;
- struct lfsck_request fti_lr;
+ union {
+ struct lfsck_req_local fti_lrl;
+ struct obd_connect_data fti_ocd;
+ };
};
extern void target_recovery_fini(struct obd_device *obd);
extern void target_recovery_init(struct lu_target *lut, svc_handler_t handler);
+/* ofd_access_log.c */
+bool ofd_access_log_size_is_valid(unsigned int size);
+int ofd_access_log_module_init(void);
+void ofd_access_log_module_exit(void);
+
+struct ofd_access_log;
+struct ofd_access_log *ofd_access_log_create(const char *ofd_name, size_t size);
+void ofd_access_log_delete(struct ofd_access_log *oal);
+void ofd_access(const struct lu_env *env, struct ofd_device *m,
+ const struct lu_fid *parent_fid, __u64 begin, __u64 end,
+ unsigned int size, unsigned int segment_count, int rw);
+
/* ofd_dev.c */
extern struct lu_context_key ofd_thread_key;
int ofd_postrecov(const struct lu_env *env, struct ofd_device *ofd);
struct lu_fid *fid, struct fiemap *fiemap);
/* ofd_obd.c */
-extern struct obd_ops ofd_obd_ops;
-int ofd_statfs_internal(const struct lu_env *env, struct ofd_device *ofd,
- struct obd_statfs *osfs, __u64 max_age,
- int *from_cache);
+extern const struct obd_ops ofd_obd_ops;
int ofd_destroy_by_fid(const struct lu_env *env, struct ofd_device *ofd,
const struct lu_fid *fid, int orphan);
int ofd_statfs(const struct lu_env *env, struct obd_export *exp,
- struct obd_statfs *osfs, __u64 max_age, __u32 flags);
+ struct obd_statfs *osfs, time64_t max_age, __u32 flags);
int ofd_obd_disconnect(struct obd_export *exp);
/* ofd_fs.c */
int ofd_stop_inconsistency_verification_thread(struct ofd_device *ofd);
int ofd_verify_ff(const struct lu_env *env, struct ofd_object *fo,
struct obdo *oa);
+int ofd_verify_layout_version(const struct lu_env *env,
+ struct ofd_object *fo, const struct obdo *oa);
int ofd_preprw(const struct lu_env *env,int cmd, struct obd_export *exp,
struct obdo *oa, int objcount, struct obd_ioobj *obj,
struct niobuf_remote *rnb, int *nr_local,
int ofd_commitrw(const struct lu_env *env, int cmd, struct obd_export *exp,
struct obdo *oa, int objcount, struct obd_ioobj *obj,
struct niobuf_remote *rnb, int npages,
- struct niobuf_local *lnb, int old_rc);
+ struct niobuf_local *lnb, int old_rc, int nob,
+ ktime_t kstart);
/* ofd_trans.c */
struct thandle *ofd_trans_create(const struct lu_env *env,
int ofd_trans_start(const struct lu_env *env,
struct ofd_device *ofd, struct ofd_object *fo,
struct thandle *th);
-void ofd_trans_stop(const struct lu_env *env, struct ofd_device *ofd,
+int ofd_trans_stop(const struct lu_env *env, struct ofd_device *ofd,
struct thandle *th, int rc);
int ofd_txn_stop_cb(const struct lu_env *env, struct thandle *txn,
void *cookie);
/* lproc_ofd.c */
+int ofd_tunables_init(struct ofd_device *ofd);
#ifdef CONFIG_PROC_FS
-extern struct lprocfs_vars lprocfs_ofd_obd_vars[];
-void ofd_stats_counter_init(struct lprocfs_stats *stats);
+void ofd_stats_counter_init(struct lprocfs_stats *stats, unsigned int offset,
+ enum lprocfs_counter_config cntr_umask);
#else
-static inline void ofd_stats_counter_init(struct lprocfs_stats *stats) {}
+static inline void
+ofd_stats_counter_init(struct lprocfs_stats *stats,
+ unsigned int offset,
+ enum lprocfs_counter_config cntr_umask) {}
#endif
/* ofd_objects.c */
struct ofd_device *ofd,
const struct lu_fid *fid);
int ofd_object_ff_load(const struct lu_env *env, struct ofd_object *fo);
+int ofd_object_ff_update(const struct lu_env *env, struct ofd_object *fo,
+ const struct obdo *oa, struct filter_fid *ff);
int ofd_precreate_objects(const struct lu_env *env, struct ofd_device *ofd,
- u64 id, struct ofd_seq *oseq, int nr, int sync);
+ u64 id, struct ofd_seq *oseq, int nr, int sync,
+ bool trans_local);
-void ofd_object_put(const struct lu_env *env, struct ofd_object *fo);
+static inline void ofd_object_put(const struct lu_env *env,
+ struct ofd_object *fo)
+{
+ dt_object_put(env, &fo->ofo_obj);
+}
int ofd_attr_set(const struct lu_env *env, struct ofd_object *fo,
- struct lu_attr *la, struct filter_fid *ff);
+ struct lu_attr *la, struct obdo *oa);
int ofd_object_punch(const struct lu_env *env, struct ofd_object *fo,
__u64 start, __u64 end, struct lu_attr *la,
- struct filter_fid *ff, struct obdo *oa);
-int ofd_object_destroy(const struct lu_env *, struct ofd_object *, int);
+ struct obdo *oa);
+int ofd_object_fallocate(const struct lu_env *env, struct ofd_object *fo,
+ __u64 start, __u64 end, int mode, struct lu_attr *la,
+ struct obdo *oa);
+int ofd_destroy(const struct lu_env *, struct ofd_object *, int);
int ofd_attr_get(const struct lu_env *env, struct ofd_object *fo,
struct lu_attr *la);
-int ofd_attr_handle_ugid(const struct lu_env *env, struct ofd_object *fo,
+int ofd_attr_handle_id(const struct lu_env *env, struct ofd_object *fo,
struct lu_attr *la, int is_setattr);
static inline
return fo;
}
-/* ofd_grants.c */
-#define OFD_GRANT_RATIO_SHIFT 8
-static inline u64 ofd_grant_reserved(struct ofd_device *ofd, u64 bavail)
-{
- return (bavail * ofd->ofd_grant_ratio) >> OFD_GRANT_RATIO_SHIFT;
-}
-
-static inline int ofd_grant_ratio_conv(int percentage)
-{
- return (percentage << OFD_GRANT_RATIO_SHIFT) / 100;
-}
-
-static inline int ofd_grant_param_supp(struct obd_export *exp)
-{
- return !!(exp_connect_flags(exp) & OBD_CONNECT_GRANT_PARAM);
-}
-
-/* Blocksize used for client not supporting OBD_CONNECT_GRANT_PARAM.
- * That's 4KB=2^12 which is the biggest block size known to work whatever
- * the client's page size is. */
-#define COMPAT_BSIZE_SHIFT 12
-static inline int ofd_grant_compat(struct obd_export *exp,
- struct ofd_device *ofd)
-{
- /* Clients which don't support OBD_CONNECT_GRANT_PARAM cannot handle
- * a block size > page size and consume PAGE_CACHE_SIZE of grant when
- * dirtying a page regardless of the block size */
- return !!(ofd_obd(ofd)->obd_self_export != exp &&
- ofd->ofd_blockbits > COMPAT_BSIZE_SHIFT &&
- !ofd_grant_param_supp(exp));
-}
-
-static inline int ofd_grant_prohibit(struct obd_export *exp,
- struct ofd_device *ofd)
-{
- /* When ofd_grant_compat_disable is set, we don't grant any space to
- * clients not supporting OBD_CONNECT_GRANT_PARAM.
- * Otherwise, space granted to such a client is inflated since it
- * consumes PAGE_CACHE_SIZE of grant space per block */
- return !!(ofd_grant_compat(exp, ofd) && ofd->ofd_grant_compat_disable);
-}
-
-void ofd_grant_sanity_check(struct obd_device *obd, const char *func);
-long ofd_grant_connect(const struct lu_env *env, struct obd_export *exp,
- u64 want, bool new_conn);
-void ofd_grant_discard(struct obd_export *exp);
-void ofd_grant_prepare_read(const struct lu_env *env, struct obd_export *exp,
- struct obdo *oa);
-void ofd_grant_prepare_write(const struct lu_env *env, struct obd_export *exp,
- struct obdo *oa, struct niobuf_remote *rnb,
- int niocount);
-void ofd_grant_commit(const struct lu_env *env, struct obd_export *exp, int rc);
-int ofd_grant_create(const struct lu_env *env, struct obd_export *exp, int *nr);
-
-/* ofd_fmd.c */
-int ofd_fmd_init(void);
-void ofd_fmd_exit(void);
-struct ofd_mod_data *ofd_fmd_find(struct obd_export *exp,
- const struct lu_fid *fid);
-struct ofd_mod_data *ofd_fmd_get(struct obd_export *exp,
- const struct lu_fid *fid);
-void ofd_fmd_put(struct obd_export *exp, struct ofd_mod_data *fmd);
-void ofd_fmd_expire(struct obd_export *exp);
-void ofd_fmd_cleanup(struct obd_export *exp);
-#ifdef DO_FMD_DROP
-void ofd_fmd_drop(struct obd_export *exp, const struct lu_fid *fid);
-#else
-#define ofd_fmd_drop(exp, fid) do {} while (0)
-#endif
-
/* ofd_dev.c */
int ofd_fid_set_index(const struct lu_env *env, struct ofd_device *ofd,
int index);
extern struct ldlm_valblock_ops ofd_lvbo;
/* ofd_dlm.c */
-int ofd_intent_policy(struct ldlm_namespace *ns, struct ldlm_lock **lockp,
- void *req_cookie, ldlm_mode_t mode, __u64 flags,
- void *data);
+extern struct kmem_cache *ldlm_glimpse_work_kmem;
+
+int ofd_intent_policy(const struct lu_env *env, struct ldlm_namespace *ns,
+ struct ldlm_lock **lockp, void *req_cookie,
+ enum ldlm_mode mode, __u64 flags, void *data);
static inline struct ofd_thread_info *ofd_info(const struct lu_env *env)
{
- struct ofd_thread_info *info;
-
- lu_env_refill((void *)env);
- info = lu_context_key_get(&env->le_ctx, &ofd_thread_key);
- LASSERT(info);
- return info;
+ return lu_env_info(env, &ofd_thread_key);
}
static inline struct ofd_thread_info *ofd_info_init(const struct lu_env *env,
* sync on lock cancel if it is not enabled already. */
static inline void ofd_slc_set(struct ofd_device *ofd)
{
- if (ofd->ofd_syncjournal == 1)
- ofd->ofd_lut.lut_sync_lock_cancel = NEVER_SYNC_ON_CANCEL;
- else if (ofd->ofd_lut.lut_sync_lock_cancel == NEVER_SYNC_ON_CANCEL)
- ofd->ofd_lut.lut_sync_lock_cancel = ALWAYS_SYNC_ON_CANCEL;
+ if (ofd->ofd_sync_journal == 1)
+ ofd->ofd_lut.lut_sync_lock_cancel = SYNC_LOCK_CANCEL_NEVER;
+ else if (ofd->ofd_lut.lut_sync_lock_cancel == SYNC_LOCK_CANCEL_NEVER)
+ ofd->ofd_lut.lut_sync_lock_cancel = SYNC_LOCK_CANCEL_ALWAYS;
+}
+
+static inline int ofd_validate_seq(struct obd_export *exp, __u64 seq)
+{
+ struct filter_export_data *fed = &exp->exp_filter_data;
+
+ if (unlikely(seq == FID_SEQ_OST_MDT0 && fed->fed_group != 0)) {
+ /* IDIF request only operates on MDT0 group */
+ CERROR("%s: Invalid sequence %#llx for group %u\n",
+ exp->exp_obd->obd_name, seq, fed->fed_group);
+ RETURN(-EINVAL);
+ }
+
+ return 0;
+}
+
+/**
+ * whether the requestion IO contains older layout version than that on the
+ * disk.
+ */
+static inline bool ofd_layout_version_less(__u32 req_version,
+ __u32 ondisk_version)
+{
+ __u32 req = req_version & ~LU_LAYOUT_RESYNC;
+ __u32 ondisk = ondisk_version & ~LU_LAYOUT_RESYNC;
+
+ /**
+ * request layout version could be circularly increased to the samllest
+ * value, in that case @req < @ondisk but @req does not have the high
+ * end bit set while @ondisk does.
+ */
+ return (req < ondisk) &&
+ ((req & LU_LAYOUT_HIGEN) == (ondisk & LU_LAYOUT_HIGEN));
}
-static inline void ofd_prepare_fidea(struct filter_fid *ff,
- const struct obdo *oa)
+static inline int ofd_seq_is_exhausted(struct ofd_device *ofd,
+ const struct obdo *oa)
{
- /* packing fid and converting it to LE for storing into EA.
- * Here ->o_stripe_idx should be filled by LOV and rest of
- * fields - by client. */
- ff->ff_parent.f_seq = cpu_to_le64(oa->o_parent_seq);
- ff->ff_parent.f_oid = cpu_to_le32(oa->o_parent_oid);
- /* XXX: we are ignoring o_parent_ver here, since this should
- * be the same for all objects in this fileset. */
- ff->ff_parent.f_ver = cpu_to_le32(oa->o_stripe_idx);
+ struct seq_server_site *ss = &ofd->ofd_seq_site;
+ __u64 seq_width = ss->ss_client_seq->lcs_width;
+ __u64 seq = ostid_seq(&oa->o_oi);
+ __u64 oid = ostid_id(&oa->o_oi);
+
+ if (fid_seq_is_norm(seq))
+ return oid >= min(seq_width, OBIF_MAX_OID);
+ if (fid_seq_is_idif(seq) || fid_seq_is_mdt0(seq))
+ return oid >= min(seq_width, IDIF_MAX_OID);
+ CERROR("%s : invalid o_seq "DOSTID"\n",
+ ofd_name(ofd), POSTID(&oa->o_oi));
+ return -EINVAL;
}
#endif /* _OFD_INTERNAL_H */