*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2011, 2014, Intel Corporation.
+ * Copyright (c) 2011, 2017, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
/* struct dirent64 */
#include <linux/dirent.h>
#include <linux/statfs.h>
+#include <linux/bio.h>
#include <ldiskfs/ldiskfs.h>
#include <ldiskfs/ldiskfs_jbd2.h>
/* LUSTRE_OSD_NAME */
#include <obd.h>
-/* class_register_type(), class_unregister_type(), class_get_type() */
+/* class_register_type(), class_unregister_type() */
#include <obd_class.h>
#include <lustre_disk.h>
#include <dt_object.h>
/* OI scrub should skip this inode. */
#define LDISKFS_STATE_LUSTRE_NOSCRUB 31
+#define LDISKFS_STATE_LUSTRE_DESTROY 30
/** Enable thandle usage statistics */
#define OSD_THANDLE_STATS (0)
#define ADMIN_USR "admin_quotafile_v2.usr"
#define ADMIN_GRP "admin_quotafile_v2.grp"
+/* Statfs space reservation for fragmentation and local objects */
+#define OSD_STATFS_RESERVED (1ULL << 23) /* 8MB */
+#define OSD_STATFS_RESERVED_SHIFT (7) /* reserve 0.78% of all space */
+
+/* check if ldiskfs support project quota */
+#ifndef LDISKFS_IOC_FSSETXATTR
+#undef HAVE_PROJECT_QUOTA
+#endif
+
+#define OBD_BRW_MAPPED OBD_BRW_LOCAL1
+
struct osd_directory {
struct iam_container od_container;
struct iam_descr od_descr;
extern const int osd_dto_credits_noquota[];
struct osd_object {
- struct dt_object oo_dt;
- /**
- * Inode for file system object represented by this osd_object. This
- * inode is pinned for the whole duration of lu_object life.
- *
- * Not modified concurrently (either setup early during object
- * creation, or assigned by osd_object_create() under write lock).
- */
- struct inode *oo_inode;
- /**
- * to protect index ops.
- */
- struct htree_lock_head *oo_hl_head;
+ struct dt_object oo_dt;
+ /**
+ * Inode for file system object represented by this osd_object. This
+ * inode is pinned for the whole duration of lu_object life.
+ *
+ * Not modified concurrently (either setup early during object
+ * creation, or assigned by osd_create() under write lock).
+ */
+ struct inode *oo_inode;
+ /**
+ * to protect index ops.
+ */
+ struct htree_lock_head *oo_hl_head;
struct rw_semaphore oo_ext_idx_sem;
struct rw_semaphore oo_sem;
struct osd_directory *oo_dir;
/** protects inode attributes. */
spinlock_t oo_guard;
- /**
- * Following two members are used to indicate the presence of dot and
- * dotdot in the given directory. This is required for interop mode
- * (b11826).
- */
- int oo_compat_dot_created;
- int oo_compat_dotdot_created;
+
+ /**
+ * Following two members *compat_dot* are used to indicate
+ * the presence of dot and dotdot in the given directory.
+ * This is required for interop mode (b11826).
+ */
+ __u32 oo_destroyed:1,
+ oo_pfid_in_lma:1,
+ oo_compat_dot_created:1,
+ oo_compat_dotdot_created:1;
+
+ /* the i_flags in LMA */
+ __u32 oo_lma_flags;
const struct lu_env *oo_owner;
-#ifdef CONFIG_LOCKDEP
- struct lockdep_map oo_dep_map;
-#endif
+
+ struct list_head oo_xattr_list;
+ struct lu_object_header *oo_header;
};
struct osd_obj_seq {
struct osd_mdobj_map {
struct dentry *omm_remote_parent;
};
-
-#define osd_ldiskfs_add_entry(handle, child, cinode, hlock) \
- ldiskfs_add_entry(handle, child, cinode, hlock)
+int osd_ldiskfs_add_entry(struct osd_thread_info *info, struct osd_device *osd,
+ handle_t *handle, struct dentry *child,
+ struct inode *inode, struct htree_lock *hlock);
#define OSD_OTABLE_IT_CACHE_SIZE 64
#define OSD_OTABLE_IT_CACHE_MASK (~(OSD_OTABLE_IT_CACHE_SIZE - 1))
struct osd_inconsistent_item {
- /* link into osd_scrub::os_inconsistent_items,
- * protected by osd_scrub::os_lock. */
+ /* link into lustre_scrub::os_inconsistent_items,
+ * protected by lustre_scrub::os_lock. */
struct list_head oii_list;
/* The right FID <=> ino#/gen mapping. */
int ooc_consumer_idx;
/* How many items in ooc_cache. */
- int ooc_cached_items;
+ __u64 ooc_cached_items;
/* Position for up layer LFSCK iteration pre-loading. */
- __u32 ooc_pos_preload;
+ __u64 ooc_pos_preload;
};
struct osd_otable_it {
struct osd_device *ooi_dev;
struct osd_otable_cache ooi_cache;
+ struct osd_iit_param ooi_iit_param;
/* The following bits can be updated/checked w/o lock protection.
* If more bits will be introduced in the future and need lock to
ooi_waiting:1; /* it::next is waiting. */
};
+struct osd_obj_orphan {
+ struct list_head oor_list;
+ struct lu_env *oor_env; /* to identify "own" records */
+ __u32 oor_ino;
+};
+
+enum osd_t10_type {
+ OSD_T10_TYPE_UNKNOWN = 0,
+ OSD_T10_TYPE1_CRC,
+ OSD_T10_TYPE3_CRC,
+ OSD_T10_TYPE1_IP,
+ OSD_T10_TYPE3_IP
+};
+
/*
* osd device.
*/
*/
unsigned int od_fl_capa:1,
od_maybe_new:1,
- od_noscrub:1,
od_igif_inoi:1,
od_check_ff:1,
od_is_ost:1,
- od_lma_self_repair:1;
+ od_in_init:1,
+ od_index_in_idif:1,
+ /* Other flags */
+ od_nonrotational:1;
- unsigned long od_capa_timeout;
- __u32 od_capa_alg;
+ __s64 od_auto_scrub_interval;
__u32 od_dirent_journal;
- struct lustre_capa_key *od_capa_keys;
- struct hlist_head *od_capa_hash;
-
+ int od_index;
struct proc_dir_entry *od_proc_entry;
struct lprocfs_stats *od_stats;
/* service name associated with the osd device */
char od_svname[MAX_OBD_NAME];
char od_mntdev[MAX_OBD_NAME];
+ uuid_t od_uuid;
- /* quota slave instance */
- struct qsd_instance *od_quota_slave;
+ /* quota slave instance for inode */
+ struct qsd_instance *od_quota_slave_md;
+
+ /* quota slave instance for block */
+ struct qsd_instance *od_quota_slave_dt;
/* osd seq instance */
struct lu_client_seq *od_cl_seq;
* exceeds the osd_device::od_full_scrub_threshold_rate,
* then trigger OI scrub to scan the whole device. */
__u64 od_full_scrub_threshold_rate;
+
+ /* a list of orphaned agent inodes, protected with od_osfs_lock */
+ struct list_head od_orphan_list;
+ struct list_head od_index_backup_list;
+ struct list_head od_index_restore_list;
+ spinlock_t od_lock;
+ struct inode *od_index_backup_inode;
+ enum lustre_index_backup_policy od_index_backup_policy;
+ int od_index_backup_stop;
+ /* T10PI type, zero if not supported */
+ enum osd_t10_type od_t10_type;
};
+static inline struct qsd_instance *osd_def_qsd(struct osd_device *osd)
+{
+ if (osd->od_is_ost)
+ return osd->od_quota_slave_dt;
+ else
+ return osd->od_quota_slave_md;
+}
+
enum osd_full_scrub_ratio {
/* Trigger OI scrub to scan the whole device directly. */
OFSR_DIRECTLY = 0,
#define FULL_SCRUB_THRESHOLD_RATE_DEFAULT 60
-/* There are at most 10 uid/gids are affected in a transaction, and
+/* There are at most 15 uid/gid/projids are affected in a transaction, and
* that's rename case:
- * - 2 for source parent uid & gid;
- * - 2 for source child uid & gid ('..' entry update when child is directory);
- * - 2 for target parent uid & gid;
- * - 2 for target child uid & gid (if the target child exists);
- * - 2 for root uid & gid (last_rcvd, llog, etc);
+ * - 3 for source parent uid & gid & projid;
+ * - 3 for source child uid & gid & projid ('..' entry update when
+ * child is directory);
+ * - 3 for target parent uid & gid & projid;
+ * - 3 for target child uid & gid & projid(if the target child exists);
+ * - 3 for root uid & gid(last_rcvd, llog, etc);
*
- * The 0 to (OSD_MAX_UGID_CNT - 1) bits of ot_id_type is for indicating
- * the id type of each id in the ot_id_array.
*/
-#define OSD_MAX_UGID_CNT 10
+#define OSD_MAX_UGID_CNT 15
-enum {
+enum osd_op_type {
OSD_OT_ATTR_SET = 0,
OSD_OT_PUNCH = 1,
OSD_OT_XATTR_SET = 2,
OSD_OT_WRITE = 7,
OSD_OT_INSERT = 8,
OSD_OT_DELETE = 9,
- OSD_OT_UPDATE = 10,
- OSD_OT_QUOTA = 11,
- OSD_OT_MAX = 12
+ OSD_OT_QUOTA = 10,
+ OSD_OT_MAX = 11
+};
+
+struct osd_access_lock {
+ struct list_head tl_list;
+ struct osd_object *tl_obj;
+ bool tl_shared;
+ bool tl_truncate;
};
struct osd_thandle {
struct thandle ot_super;
handle_t *ot_handle;
struct ldiskfs_journal_cb_entry ot_jcb;
- struct list_head ot_dcb_list;
+ struct list_head ot_commit_dcb_list;
+ struct list_head ot_stop_dcb_list;
/* Link to the device, for debugging. */
struct lu_ref_link ot_dev_link;
- unsigned short ot_credits;
- unsigned short ot_id_cnt;
- unsigned short ot_id_type;
- uid_t ot_id_array[OSD_MAX_UGID_CNT];
+ unsigned int ot_credits;
+
+ /* quota IDs related to the transaction */
+ unsigned short ot_id_cnt;
+ __u8 ot_id_res[OSD_MAX_UGID_CNT];
+ __u8 ot_id_types[OSD_MAX_UGID_CNT];
+ uid_t ot_id_array[OSD_MAX_UGID_CNT];
struct lquota_trans *ot_quota_trans;
+
+ unsigned int ot_remove_agents:1;
#if OSD_THANDLE_STATS
/** time when this handle was allocated */
- cfs_time_t oth_alloced;
+ ktime_t oth_alloced;
/** time when this thanle was started */
- cfs_time_t oth_started;
+ ktime_t oth_started;
#endif
+ struct list_head ot_trunc_locks;
};
/**
* there would be one ext3 readdir for every mdd readdir page.
*/
-#define OSD_IT_EA_BUFSIZE (PAGE_CACHE_SIZE + PAGE_CACHE_SIZE/4)
+#define OSD_IT_EA_BUFSIZE (PAGE_SIZE + PAGE_SIZE/4)
/**
* This is iterator's in-memory data structure in interoperability
* mode (i.e. iterator over ldiskfs style directory)
*/
struct osd_it_ea {
- struct osd_object *oie_obj;
- /** used in ldiskfs iterator, to stored file pointer */
- struct file oie_file;
- /** how many entries have been read-cached from storage */
- int oie_rd_dirent;
- /** current entry is being iterated by caller */
- int oie_it_dirent;
- /** current processing entry */
- struct osd_it_ea_dirent *oie_dirent;
- /** buffer to hold entries, size == OSD_IT_EA_BUFSIZE */
- void *oie_buf;
+ struct osd_object *oie_obj;
+ /** used in ldiskfs iterator, to stored file pointer */
+ struct file oie_file;
+ /** how many entries have been read-cached from storage */
+ int oie_rd_dirent;
+ /** current entry is being iterated by caller */
+ int oie_it_dirent;
+ /** current processing entry */
+ struct osd_it_ea_dirent *oie_dirent;
+ /** buffer to hold entries, size == OSD_IT_EA_BUFSIZE */
+ void *oie_buf;
+ struct dentry oie_dentry;
};
/**
struct list_head oiq_list;
};
-#define MAX_BLOCKS_PER_PAGE (PAGE_CACHE_SIZE / 512)
+#define MAX_BLOCKS_PER_PAGE (PAGE_SIZE / 512)
struct osd_iobuf {
wait_queue_head_t dr_wait;
int dr_npages;
int dr_error;
int dr_frags;
- unsigned int dr_ignore_quota:1;
unsigned int dr_elapsed_valid:1; /* we really did count time */
unsigned int dr_rw:1;
struct lu_buf dr_pg_buf;
struct page **dr_pages;
+ struct niobuf_local **dr_lnbs;
struct lu_buf dr_bl_buf;
- unsigned long *dr_blocks;
- unsigned long dr_start_time;
- unsigned long dr_elapsed; /* how long io took */
+ struct lu_buf dr_lnb_buf;
+ sector_t *dr_blocks;
+ ktime_t dr_start_time;
+ ktime_t dr_elapsed; /* how long io took */
struct osd_device *dr_dev;
unsigned int dr_init_at; /* the line iobuf was initialized */
};
+#ifdef HAVE_INODE_TIMESPEC64
+# define osd_timespec timespec64
+# define osd_timespec_trunc(ts, gran) timespec64_trunc((ts), (gran))
+#else
+# define osd_timespec timespec
+# define osd_timespec_trunc(ts, gran) timespec_trunc((ts), (gran))
+#endif
+
+static inline struct osd_timespec osd_inode_time(struct inode *inode,
+ s64 seconds)
+{
+ struct osd_timespec ts = { .tv_sec = seconds };
+
+ return osd_timespec_trunc(ts, inode->i_sb->s_time_gran);
+}
+
+#define OSD_INS_CACHE_SIZE 8
+
struct osd_thread_info {
- const struct lu_env *oti_env;
- /**
- * used for index operations.
- */
- struct dentry oti_obj_dentry;
- struct dentry oti_child_dentry;
+ const struct lu_env *oti_env;
+ /**
+ * used for index operations.
+ */
+ struct dentry oti_obj_dentry;
+ struct dentry oti_child_dentry;
+
+ /** dentry for Iterator context. */
+ struct dentry oti_it_dentry;
- /** dentry for Iterator context. */
- struct dentry oti_it_dentry;
- struct htree_lock *oti_hlock;
+ union {
+ /* fake struct file for osd_object_sync */
+ struct file oti_file;
+ /* osd_statfs() */
+ struct kstatfs oti_ksfs;
+ };
- struct lu_fid oti_fid;
+ struct htree_lock *oti_hlock;
+
+ struct lu_fid oti_fid;
struct lu_fid oti_fid2;
struct lu_fid oti_fid3;
struct osd_inode_id oti_id;
struct osd_inode_id oti_id3;
struct ost_id oti_ostid;
- /*
- * XXX temporary: for ->i_op calls.
- */
- struct timespec oti_time;
- /*
- * XXX temporary: fake struct file for osd_object_sync
- */
- struct file oti_file;
- /*
- * XXX temporary: for capa operations.
- */
- struct lustre_capa_key oti_capa_key;
- struct lustre_capa oti_capa;
-
- /** osd_device reference, initialized in osd_trans_start() and
- used in osd_trans_stop() */
- struct osd_device *oti_dev;
-
/**
* following ipd and it structures are used for osd_index_iam_lookup()
* these are defined separately as we might do index operation
* in open iterator session.
*/
- /** osd iterator context used for iterator session */
-
- union {
- struct osd_it_iam oti_it;
- /* ldiskfs iterator data structure,
- * see osd_it_ea_{init, fini} */
- struct osd_it_ea oti_it_ea;
- struct osd_it_quota oti_it_quota;
- };
-
/** pre-allocated buffer used by oti_it_ea, size OSD_IT_EA_BUFSIZE */
void *oti_it_ea_buf;
+ unsigned int oti_it_ea_buf_used:1;
- struct kstatfs oti_ksfs;
-
- /** IAM iterator for index operation. */
- struct iam_iterator oti_idx_it;
+ /* IAM iterator for index operation. */
+ struct iam_iterator oti_idx_it;
/** union to guarantee that ->oti_ipd[] has proper alignment. */
union {
+ char oti_name[48];
char oti_it_ipd[DX_IPD_MAX_SIZE];
long long oti_alignment_lieutenant;
};
struct osd_idmap_cache oti_cache;
- unsigned int oti_it_inline:1;
- int oti_r_locks;
- int oti_w_locks;
- int oti_txns;
- /** used in osd_fid_set() to put xattr */
- struct lu_buf oti_buf;
- struct lu_buf oti_big_buf;
- /** used in osd_ea_fid_set() to set fid into common ea */
+ /* dedicated OI cache for insert (which needs inum) */
+ struct osd_idmap_cache *oti_ins_cache;
+ int oti_ins_cache_size;
+ int oti_ins_cache_used;
+ /* inc by osd_trans_create and dec by osd_trans_stop */
+ int oti_ins_cache_depth;
+
+ int oti_r_locks;
+ int oti_w_locks;
+ int oti_txns;
+ /** used in osd_fid_set() to put xattr */
+ struct lu_buf oti_buf;
+ struct lu_buf oti_big_buf;
+ /** used in osd_ea_fid_set() to set fid into common ea */
union {
- struct lustre_mdt_attrs oti_mdt_attrs;
- /* old LMA for compatibility */
- char oti_mdt_attrs_old[LMA_OLD_SIZE];
+ struct lustre_ost_attrs oti_ost_attrs;
+ struct filter_fid_18_23 oti_ff_old;
+ struct filter_fid oti_ff;
};
/** 0-copy IO */
- struct osd_iobuf oti_iobuf;
- struct inode oti_inode;
+ struct osd_iobuf oti_iobuf;
+ /* used to access objects in /O */
+ struct inode *oti_inode;
#define OSD_FID_REC_SZ 32
- char oti_ldp[OSD_FID_REC_SZ];
- char oti_ldp2[OSD_FID_REC_SZ];
+ char oti_ldp[OSD_FID_REC_SZ];
+ char oti_ldp2[OSD_FID_REC_SZ];
/* used by quota code */
union {
-#ifdef HAVE_DQUOT_FS_DISK_QUOTA
- struct fs_disk_quota oti_fdq;
+#if defined(HAVE_DQUOT_QC_DQBLK)
+ struct qc_dqblk oti_qdq;
#else
- struct if_dqblk oti_dqblk;
+ struct fs_disk_quota oti_fdq;
#endif
struct if_dqinfo oti_dqinfo;
};
/* Tracking for transaction credits, to allow debugging and optimizing
* cases where a large number of credits are being allocated for
* single transaction. */
- unsigned short oti_declare_ops[OSD_OT_MAX];
- unsigned short oti_declare_ops_rb[OSD_OT_MAX];
- unsigned short oti_declare_ops_cred[OSD_OT_MAX];
- bool oti_rollback;
-
- char oti_name[48];
- union {
- struct filter_fid_old oti_ff;
- struct filter_fid oti_ff_new;
- };
+ unsigned int oti_credits_before;
+ unsigned int oti_declare_ops[OSD_OT_MAX];
+ unsigned int oti_declare_ops_cred[OSD_OT_MAX];
+ unsigned int oti_declare_ops_used[OSD_OT_MAX];
+ struct osd_directory oti_iam;
+
+ struct page **oti_dio_pages;
+ int oti_dio_pages_used;
};
extern int ldiskfs_pdo;
+#ifdef HAVE_BVEC_ITER_ALL
+#define DECLARE_BVEC_ITER_ALL(iter) struct bvec_iter_all iter
+#else
+#define DECLARE_BVEC_ITER_ALL(iter) int iter
+#endif
+
+#ifndef HAVE_VFS_SETXATTR
+#define osd_setxattr(dentry, inode, name, buf, len, flag) \
+ ((inode)->i_op->setxattr(dentry, name, buf, len, flag))
+#define osd_getxattr(dentry, inode, name, buf, len) \
+ ((inode)->i_op->getxattr(dentry, name, buf, len))
+#define osd_removexattr(dentry, inode, name) \
+ ((inode)->i_op->removexattr(dentry, name))
+#else /* HAVE_VFS_SETXATTR */
+#define osd_setxattr(dentry, inode, name, buf, len, flag) \
+ __vfs_setxattr(dentry, inode, name, buf, len, flag)
+#define osd_getxattr(dentry, inode, name, buf, len) \
+ __vfs_getxattr(dentry, inode, name, buf, len)
+#define osd_removexattr(dentry, inode, name) \
+ __vfs_removexattr(dentry, name)
+#endif /* !HAVE_VFS_SETXATTR */
+
static inline int __osd_xattr_get(struct inode *inode, struct dentry *dentry,
const char *name, void *buf, int len)
{
dentry->d_inode = inode;
dentry->d_sb = inode->i_sb;
- return inode->i_op->getxattr(dentry, name, buf, len);
+ return osd_getxattr(dentry, inode, name, buf, len);
}
static inline int __osd_xattr_set(struct osd_thread_info *info,
{
struct dentry *dentry = &info->oti_child_dentry;
- ll_vfs_dq_init(inode);
+ dquot_initialize(inode);
dentry->d_inode = inode;
dentry->d_sb = inode->i_sb;
- return inode->i_op->setxattr(dentry, name, buf, buflen, fl);
+ return osd_setxattr(dentry, inode, name, buf, buflen, fl);
}
#ifdef CONFIG_PROC_FS
/* osd_lproc.c */
extern struct lprocfs_vars lprocfs_osd_obd_vars[];
-extern struct lprocfs_vars lprocfs_osd_module_vars[];
int osd_procfs_init(struct osd_device *osd, const char *name);
int osd_procfs_fini(struct osd_device *osd);
void osd_brw_stats_update(struct osd_device *osd, struct osd_iobuf *iobuf);
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 0, 52, 0)
+int osd_register_proc_index_in_idif(struct osd_device *osd);
+#endif
#endif
int osd_statfs(const struct lu_env *env, struct dt_device *dev,
- struct obd_statfs *sfs);
-int osd_object_auth(const struct lu_env *env, struct dt_object *dt,
- struct lustre_capa *capa, __u64 opc);
+ struct obd_statfs *sfs, struct obd_statfs_info *info);
struct inode *osd_iget(struct osd_thread_info *info, struct osd_device *dev,
struct osd_inode_id *id);
+struct inode *
+osd_iget_fid(struct osd_thread_info *info, struct osd_device *dev,
+ struct osd_inode_id *id, struct lu_fid *fid);
int osd_ea_fid_set(struct osd_thread_info *info, struct inode *inode,
const struct lu_fid *fid, __u32 compat, __u32 incompat);
int osd_get_lma(struct osd_thread_info *info, struct inode *inode,
- struct dentry *dentry, struct lustre_mdt_attrs *lma);
+ struct dentry *dentry, struct lustre_ost_attrs *loa);
void osd_add_oi_cache(struct osd_thread_info *info, struct osd_device *osd,
struct osd_inode_id *id, const struct lu_fid *fid);
int osd_get_idif(struct osd_thread_info *info, struct inode *inode,
const struct lu_fid *fid, const struct osd_inode_id *id,
handle_t *th);
-void osd_scrub_file_reset(struct osd_scrub *scrub, __u8 *uuid, __u64 flags);
-int osd_scrub_file_store(struct osd_scrub *scrub);
char *osd_lf_fid2name(const struct lu_fid *fid);
-int osd_scrub_start(struct osd_device *dev, __u32 flags);
+int osd_scrub_start(const struct lu_env *env, struct osd_device *dev,
+ __u32 flags);
+void osd_scrub_stop(struct osd_device *dev);
int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev);
void osd_scrub_cleanup(const struct lu_env *env, struct osd_device *dev);
int osd_oii_insert(struct osd_device *dev, struct osd_idmap_cache *oic,
int insert);
int osd_oii_lookup(struct osd_device *dev, const struct lu_fid *fid,
struct osd_inode_id *id);
-int osd_scrub_dump(struct seq_file *m, struct osd_device *dev);
+void osd_scrub_dump(struct seq_file *m, struct osd_device *dev);
+
+struct dentry *osd_lookup_one_len_unlocked(struct osd_device *dev,
+ const char *name,
+ struct dentry *base, int len);
+struct dentry *osd_lookup_one_len(struct osd_device *dev, const char *name,
+ struct dentry *base, int len);
int osd_fld_lookup(const struct lu_env *env, struct osd_device *osd,
u64 seq, struct lu_seq_range *range);
int osd_delete_from_remote_parent(const struct lu_env *env,
struct osd_device *osd,
struct osd_object *obj,
- struct osd_thandle *oh);
+ struct osd_thandle *oh, bool destroy);
int osd_add_to_remote_parent(const struct lu_env *env, struct osd_device *osd,
struct osd_object *obj, struct osd_thandle *oh);
int osd_lookup_in_remote_parent(struct osd_thread_info *oti,
struct lquota_id_info *qi, struct osd_object *obj,
bool enforce, int *flags);
int osd_declare_inode_qid(const struct lu_env *env, qid_t uid, qid_t gid,
- long long space, struct osd_thandle *oh,
- struct osd_object *obj, bool is_blk, int *flags,
- bool force);
+ __u32 projid, long long space, struct osd_thandle *oh,
+ struct osd_object *obj, int *flags,
+ enum osd_qid_declare_flags);
const struct dt_rec *osd_quota_pack(struct osd_object *obj,
const struct dt_rec *rec,
union lquota_rec *quota_rec);
void osd_quota_unpack(struct osd_object *obj, const struct dt_rec *rec);
-int osd_quota_migration(const struct lu_env *env, struct dt_object *dt,
- const struct dt_index_features *feat);
-
-static inline bool is_quota_glb_feat(const struct dt_index_features *feat)
-{
- return (feat == &dt_quota_iusr_features ||
- feat == &dt_quota_busr_features ||
- feat == &dt_quota_igrp_features ||
- feat == &dt_quota_bgrp_features) ? true : false;
-}
#ifndef HAVE_I_UID_READ
static inline uid_t i_uid_read(const struct inode *inode)
}
#endif
+#ifdef HAVE_PROJECT_QUOTA
+static inline __u32 i_projid_read(struct inode *inode)
+{
+ return (__u32)from_kprojid(&init_user_ns, LDISKFS_I(inode)->i_projid);
+}
+
+static inline void i_projid_write(struct inode *inode, __u32 projid)
+{
+ kprojid_t kprojid;
+ kprojid = make_kprojid(&init_user_ns, (projid_t)projid);
+ LDISKFS_I(inode)->i_projid = kprojid;
+}
+#else
+static inline uid_t i_projid_read(struct inode *inode)
+{
+ return 0;
+}
+static inline void i_projid_write(struct inode *inode, __u32 projid)
+{
+ return;
+}
+#endif
+
+#ifdef HAVE_LDISKFS_IGET_WITH_FLAGS
+# define osd_ldiskfs_iget(sb, ino) \
+ ldiskfs_iget((sb), (ino), LDISKFS_IGET_HANDLE)
+#else
+# define osd_ldiskfs_iget(sb, ino) ldiskfs_iget((sb), (ino))
+#endif
+
+#ifdef HAVE_LDISKFS_INFO_JINODE
+# define osd_attach_jinode(inode) ldiskfs_inode_attach_jinode(inode)
+#else /* HAVE_LDISKFS_INFO_JINODE */
+# define osd_attach_jinode(inode) 0
+#endif /* HAVE_LDISKFS_INFO_JINODE */
+
+#ifdef LDISKFS_HT_MISC
+# define osd_journal_start_sb(sb, type, nblock) \
+ ldiskfs_journal_start_sb(sb, type, nblock)
+static inline struct buffer_head *osd_ldiskfs_append(handle_t *handle,
+ struct inode *inode,
+ ldiskfs_lblk_t *nblock)
+{
+ int rc;
+
+ rc = osd_attach_jinode(inode);
+ if (rc)
+ return ERR_PTR(rc);
+ return ldiskfs_append(handle, inode, nblock);
+}
+# define osd_ldiskfs_find_entry(dir, name, de, inlined, lock) \
+ (__ldiskfs_find_entry(dir, name, de, inlined, lock) ?: \
+ ERR_PTR(-ENOENT))
+# define osd_journal_start(inode, type, nblocks) \
+ ldiskfs_journal_start(inode, type, nblocks)
+# define osd_transaction_size(dev) \
+ (osd_journal(dev)->j_max_transaction_buffers / 2)
+#else /* ! defined LDISKFS_HT_MISC */
+# define LDISKFS_HT_MISC 0
+# define osd_journal_start_sb(sb, type, nblock) \
+ ldiskfs_journal_start_sb(sb, nblock)
+
+static inline struct buffer_head *osd_ldiskfs_append(handle_t *handle,
+ struct inode *inode,
+ ldiskfs_lblk_t *nblock)
+{
+ struct buffer_head *bh;
+ int err = 0;
+
+ bh = ldiskfs_append(handle, inode, nblock, &err);
+ if (bh == NULL)
+ bh = ERR_PTR(err);
+
+ return bh;
+}
+
+# define osd_ldiskfs_find_entry(dir, name, de, inlined, lock) \
+ (__ldiskfs_find_entry(dir, name, de, lock) ?: \
+ ERR_PTR(-ENOENT))
+# define osd_journal_start(inode, type, nblocks) \
+ ldiskfs_journal_start(inode, nblocks)
+# define osd_transaction_size(dev) \
+ (osd_journal(dev)->j_max_transaction_buffers)
+#endif
+
/*
* Invariants, assertions.
*/
return dev->od_mnt->mnt_sb;
}
+static inline const char *osd_dev2name(const struct osd_device *dev)
+{
+ return osd_sb(dev)->s_id;
+}
+
+static inline const char *osd_ino2name(const struct inode *inode)
+{
+ return inode->i_sb->s_id;
+}
+
+/**
+ * Put the osd object once done with it.
+ *
+ * \param obj osd object that needs to be put
+ */
+static inline void osd_object_put(const struct lu_env *env,
+ struct osd_object *obj)
+{
+ dt_object_put(env, &obj->oo_dt);
+}
+
static inline int osd_object_is_root(const struct osd_object *obj)
{
return osd_sb(osd_obj2dev(obj))->s_root->d_inode == obj->oo_inode;
static inline char *osd_name(struct osd_device *osd)
{
- return osd->od_dt_dev.dd_lu_dev.ld_obd->obd_name;
+ return osd->od_svname;
+}
+
+static inline bool osd_is_ea_inode(struct inode *inode)
+{
+ return !!(LDISKFS_I(inode)->i_flags & LDISKFS_EA_INODE_FL);
}
extern const struct dt_body_operations osd_body_ops;
bag->ic_descr->id_ops->id_ipd_free(ipd);
}
+int osd_calc_bkmap_credits(struct super_block *sb, struct inode *inode,
+ const loff_t size, const loff_t pos,
+ const int blocks);
+
int osd_ldiskfs_read(struct inode *inode, void *buf, int size, loff_t *offs);
int osd_ldiskfs_write_record(struct inode *inode, void *buf, int bufsize,
int write_NUL, loff_t *offs, handle_t *handle);
extern int osd_trans_declare_op2rb[];
extern int ldiskfs_track_declares_assert;
+void osd_trans_dump_creds(const struct lu_env *env, struct thandle *th);
static inline void osd_trans_declare_op(const struct lu_env *env,
struct osd_thandle *oh,
- unsigned int op, int credits)
+ enum osd_op_type op, int credits)
{
struct osd_thread_info *oti = osd_oti_get(env);
LASSERT(op < OSD_OT_MAX);
} else {
CWARN("%s: Invalid operation index %d\n",
- osd_name(oti->oti_dev), op);
+ osd_name(osd_dt_dev(oh->ot_super.th_dev)), op);
libcfs_debug_dumpstack(NULL);
}
} else {
}
static inline void osd_trans_exec_op(const struct lu_env *env,
- struct thandle *th, unsigned int op)
+ struct thandle *th,
+ enum osd_op_type op)
{
struct osd_thread_info *oti = osd_oti_get(env);
struct osd_thandle *oh = container_of(th, struct osd_thandle,
ot_super);
- unsigned int rb;
+ unsigned int rb, left;
LASSERT(oh->ot_handle != NULL);
if (unlikely(op >= OSD_OT_MAX)) {
if (unlikely(ldiskfs_track_declares_assert))
LASSERT(op < OSD_OT_MAX);
else {
- CWARN("%s: Invalid operation index %d\n",
- osd_name(oti->oti_dev), op);
+ CWARN("%s: opcode %u: invalid value >= %u\n",
+ osd_name(osd_dt_dev(oh->ot_super.th_dev)),
+ op, OSD_OT_MAX);
libcfs_debug_dumpstack(NULL);
return;
}
}
- if (likely(!oti->oti_rollback && oti->oti_declare_ops[op] > 0)) {
- oti->oti_declare_ops[op]--;
- oti->oti_declare_ops_rb[op]++;
- } else {
- /* all future updates are considered rollback */
- oti->oti_rollback = true;
- rb = osd_trans_declare_op2rb[op];
- if (unlikely(rb >= OSD_OT_MAX)) {
- if (unlikely(ldiskfs_track_declares_assert))
- LASSERTF(rb < OSD_OT_MAX, "rb = %u\n", rb);
- else {
- CWARN("%s: Invalid rollback index %d\n",
- osd_name(oti->oti_dev), rb);
- libcfs_debug_dumpstack(NULL);
- return;
- }
- }
- if (unlikely(oti->oti_declare_ops_rb[rb] == 0)) {
- if (unlikely(ldiskfs_track_declares_assert))
- LASSERTF(oti->oti_declare_ops_rb[rb] > 0,
- "rb = %u\n", rb);
- else {
- CWARN("%s: Overflow in tracking declares for "
- "index, rb = %d\n",
- osd_name(oti->oti_dev), rb);
- libcfs_debug_dumpstack(NULL);
- return;
- }
- }
- oti->oti_declare_ops_rb[rb]--;
+ /* find rollback (or reverse) operation for the given one
+ * such an operation doesn't require additional credits
+ * as the same set of blocks are modified */
+ rb = osd_trans_declare_op2rb[op];
+
+ /* check whether credits for this operation were reserved at all */
+ if (unlikely(oti->oti_declare_ops_cred[op] == 0 &&
+ oti->oti_declare_ops_cred[rb] == 0)) {
+ /* the API is not perfect yet: CREATE does REF_ADD internally
+ * while DESTROY does not. To rollback CREATE the callers
+ * needs to call REF_DEL+DESTROY which is hard to detect using
+ * a simple table of rollback operations */
+ if (op == OSD_OT_REF_DEL &&
+ oti->oti_declare_ops_cred[OSD_OT_CREATE] > 0)
+ goto proceed;
+ if (op == OSD_OT_REF_ADD &&
+ oti->oti_declare_ops_cred[OSD_OT_DESTROY] > 0)
+ goto proceed;
+ CWARN("%s: opcode %u: credits = 0, rollback = %u\n",
+ osd_name(osd_dt_dev(oh->ot_super.th_dev)), op, rb);
+ osd_trans_dump_creds(env, th);
+ LASSERT(!ldiskfs_track_declares_assert);
+ }
+
+proceed:
+ /* remember how many credits we have unused before the operation */
+ oti->oti_credits_before = oh->ot_handle->h_buffer_credits;
+ left = oti->oti_declare_ops_cred[op] - oti->oti_declare_ops_used[op];
+ if (unlikely(oti->oti_credits_before < left)) {
+ CWARN("%s: opcode %u: before %u < left %u, rollback = %u\n",
+ osd_name(osd_dt_dev(oh->ot_super.th_dev)), op,
+ oti->oti_credits_before, left, rb);
+ osd_trans_dump_creds(env, th);
+ /* on a very small fs (testing?) it's possible that
+ * the transaction can't fit 1/4 of journal, so we
+ * just request less credits (see osd_trans_start()).
+ * ignore the same case here */
+ rb = osd_transaction_size(osd_dt_dev(th->th_dev));
+ if (unlikely(oh->ot_credits < rb))
+ LASSERT(!ldiskfs_track_declares_assert);
}
}
-static inline void osd_trans_declare_rb(const struct lu_env *env,
- struct thandle *th, unsigned int op)
+static inline void osd_trans_exec_check(const struct lu_env *env,
+ struct thandle *th,
+ enum osd_op_type op)
{
struct osd_thread_info *oti = osd_oti_get(env);
struct osd_thandle *oh = container_of(th, struct osd_thandle,
ot_super);
+ int used, over, quota;
- LASSERT(oh->ot_handle != NULL);
- if (unlikely(op >= OSD_OT_MAX)) {
- if (unlikely(ldiskfs_track_declares_assert))
- LASSERT(op < OSD_OT_MAX);
- else {
- CWARN("%s: Invalid operation index %d\n",
- osd_name(oti->oti_dev), op);
- libcfs_debug_dumpstack(NULL);
- }
+ /* how many credits have been used by the operation */
+ used = oti->oti_credits_before - oh->ot_handle->h_buffer_credits;
+
+ if (unlikely(used < 0)) {
+ /* if some block was allocated and released in the same
+ * transaction, then it won't be a part of the transaction
+ * and delta can be negative */
+ return;
+ }
+
+ if (used == 0) {
+ /* rollback operations (e.g. when we destroy just created
+ * object) should not consume any credits. there is no point
+ * to confuse the checks below */
+ return;
+ }
+ oti->oti_declare_ops_used[op] += used;
+ if (oti->oti_declare_ops_used[op] <= oti->oti_declare_ops_cred[op])
+ return;
+
+ /* we account quota for a whole transaction and any operation can
+ * consume corresponding credits */
+ over = oti->oti_declare_ops_used[op] -
+ oti->oti_declare_ops_cred[op];
+ quota = oti->oti_declare_ops_cred[OSD_OT_QUOTA] -
+ oti->oti_declare_ops_used[OSD_OT_QUOTA];
+ if (over <= quota) {
+ /* probably that credits were consumed by
+ * quota indirectly (in the depths of ldiskfs) */
+ oti->oti_declare_ops_used[OSD_OT_QUOTA] += over;
+ oti->oti_declare_ops_used[op] -= over;
} else {
- oti->oti_declare_ops_rb[op]++;
+ CWARN("%s: opcode %d: used %u, used now %u, reserved %u\n",
+ osd_name(osd_dt_dev(oh->ot_super.th_dev)), op,
+ oti->oti_declare_ops_used[op], used,
+ oti->oti_declare_ops_cred[op]);
+ osd_trans_dump_creds(env, th);
+ if (unlikely(ldiskfs_track_declares_assert))
+ LBUG();
}
}
static inline int is_32bit_api(void)
{
#ifdef CONFIG_COMPAT
- return is_compat_task();
+ return in_compat_syscall();
#else
return (BITS_PER_LONG == 32);
#endif
return (!fid_is_namespace_visible(fid) && !fid_is_idif(fid));
}
-static inline unsigned long osd_remote_parent_ino(struct osd_device *dev)
+static inline bool is_remote_parent_ino(struct osd_device *o, unsigned long ino)
{
- return dev->od_mdt_map->omm_remote_parent->d_inode->i_ino;
+ if (o->od_is_ost)
+ return false;
+
+ LASSERT(o->od_mdt_map != NULL);
+
+ return ino == o->od_mdt_map->omm_remote_parent->d_inode->i_ino;
}
-#ifdef JOURNAL_START_HAS_3ARGS
-# define osd_journal_start_sb(sb, type, nblock) \
- ldiskfs_journal_start_sb(sb, type, nblock)
-# define osd_ldiskfs_append(handle, inode, nblock, err) \
- ldiskfs_append(handle, inode, nblock)
-# define osd_ldiskfs_find_entry(dir, name, de, inlined, lock) \
- ldiskfs_find_entry(dir, name, de, inlined, lock)
-# define osd_journal_start(inode, type, nblocks) \
- ldiskfs_journal_start(inode, type, nblocks)
-# define osd_transaction_size(dev) \
- (osd_journal(dev)->j_max_transaction_buffers / 2)
+/**
+ * ext4_bread/ldiskfs_bread has either 5 or 4 parameters. The error
+ * return code has been removed and integrated into the pointer in the
+ * kernel 3.18.
+ */
+static inline struct buffer_head *__ldiskfs_bread(handle_t *handle,
+ struct inode *inode,
+ ldiskfs_lblk_t block,
+ int create)
+{
+ int rc = 0;
+ struct buffer_head *bh;
+
+ if (create) {
+ rc = osd_attach_jinode(inode);
+ if (rc)
+ return ERR_PTR(rc);
+ }
+#ifdef HAVE_EXT4_BREAD_4ARGS
+ bh = ldiskfs_bread(handle, inode, block, create);
#else
-# define LDISKFS_HT_MISC 0
-# define osd_journal_start_sb(sb, type, nblock) \
- ldiskfs_journal_start_sb(sb, nblock)
-# define osd_ldiskfs_append(handle, inode, nblock, err) \
- ldiskfs_append(handle, inode, nblock, err)
-# define osd_ldiskfs_find_entry(dir, name, de, inlined, lock) \
- ldiskfs_find_entry(dir, name, de, lock)
-# define osd_journal_start(inode, type, nblocks) \
- ldiskfs_journal_start(inode, nblocks)
-# define osd_transaction_size(dev) \
- (osd_journal(dev)->j_max_transaction_buffers)
+
+ bh = ldiskfs_bread(handle, inode, block, create, &rc);
+ if (bh == NULL && rc != 0)
+ bh = ERR_PTR(rc);
+#endif
+ return bh;
+}
+
+#ifndef HAVE_BIO_INTEGRITY_ENABLED
+bool bio_integrity_enabled(struct bio *bio);
+#endif
+
+#ifdef HAVE_BI_BDEV
+# define bio_get_dev(bio) ((bio)->bi_bdev)
+# define bio_get_disk(bio) (bio_get_dev(bio)->bd_disk)
+# define bio_get_queue(bio) bdev_get_queue(bio_get_dev(bio))
+# define bio_set_dev(bio, bdev) (bio_get_dev(bio) = (bdev))
+#else
+# define bio_get_disk(bio) ((bio)->bi_disk)
+# define bio_get_queue(bio) (bio_get_disk(bio)->queue)
#endif
void ldiskfs_inc_count(handle_t *handle, struct inode *inode);
void osd_fini_iobuf(struct osd_device *d, struct osd_iobuf *iobuf);
+static inline int
+osd_index_register(struct osd_device *osd, const struct lu_fid *fid,
+ __u32 keysize, __u32 recsize)
+{
+ return lustre_index_register(&osd->od_dt_dev, osd_name(osd),
+ &osd->od_index_backup_list, &osd->od_lock,
+ &osd->od_index_backup_stop,
+ fid, keysize, recsize);
+}
+
+static inline void
+osd_index_backup(const struct lu_env *env, struct osd_device *osd, bool backup)
+{
+ struct osd_thread_info *info = osd_oti_get(env);
+ struct lu_fid *fid = &info->oti_fid3;
+ struct osd_inode_id *id = &info->oti_id3;
+
+ lu_local_obj_fid(fid, INDEX_BACKUP_OID);
+ osd_id_gen(id, osd->od_index_backup_inode->i_ino,
+ osd->od_index_backup_inode->i_generation);
+ osd_add_oi_cache(info, osd, id, fid);
+
+ lustre_index_backup(env, &osd->od_dt_dev, osd_name(osd),
+ &osd->od_index_backup_list, &osd->od_lock,
+ &osd->od_index_backup_stop, backup);
+}
+
+#ifdef LDISKFS_HAS_INCOMPAT_FEATURE
+
+# ifdef LDISKFS_FEATURE_INCOMPAT_EXTENTS
+# define ldiskfs_has_feature_extents(sb) \
+ LDISKFS_HAS_INCOMPAT_FEATURE(sb, LDISKFS_FEATURE_INCOMPAT_EXTENTS)
+# endif
+# ifdef LDISKFS_FEATURE_INCOMPAT_EA_INODE
+# define ldiskfs_has_feature_ea_inode(sb) \
+ LDISKFS_HAS_INCOMPAT_FEATURE(sb, LDISKFS_FEATURE_INCOMPAT_EA_INODE)
+# endif
+# ifdef LDISKFS_FEATURE_INCOMPAT_DIRDATA
+# define ldiskfs_has_feature_dirdata(sb) \
+ LDISKFS_HAS_INCOMPAT_FEATURE(sb, LDISKFS_FEATURE_INCOMPAT_DIRDATA)
+# endif
+# ifdef LDISKFS_FEATURE_COMPAT_HAS_JOURNAL
+# define ldiskfs_has_feature_journal(sb) \
+ LDISKFS_HAS_COMPAT_FEATURE(sb, LDISKFS_FEATURE_COMPAT_HAS_JOURNAL)
+# endif
+# ifdef LDISKFS_FEATURE_RO_COMPAT_QUOTA
+# define ldiskfs_has_feature_quota(sb) \
+ LDISKFS_HAS_RO_COMPAT_FEATURE(sb, LDISKFS_FEATURE_RO_COMPAT_QUOTA)
+# endif
+# ifdef LDISKFS_FEATURE_RO_COMPAT_PROJECT
+# define ldiskfs_has_feature_project(sb) \
+ LDISKFS_HAS_RO_COMPAT_FEATURE(sb, LDISKFS_FEATURE_RO_COMPAT_PROJECT)
+# endif
+
+#endif
+
+int osd_trunc_lock(struct osd_object *obj, struct osd_thandle *oh,
+ bool shared);
+void osd_trunc_unlock_all(struct list_head *list);
+void osd_process_truncates(struct list_head *list);
+void osd_execute_truncate(struct osd_object *obj);
+
+#ifdef HAVE_BIO_ENDIO_USES_ONE_ARG
+#define osd_dio_complete_routine(bio, error) dio_complete_routine(bio)
+#else
+#define osd_dio_complete_routine(bio, error) dio_complete_routine(bio, error)
+#endif
+
+#ifndef HAVE___BI_CNT
+#define __bi_cnt bi_cnt
+#endif
+
+#ifndef HAVE_BI_OPF
+#define bi_opf bi_rw
+#endif
+
+#ifndef HAVE_CLEAN_BDEV_ALIASES
+#define clean_bdev_aliases(bdev, block, len) \
+ unmap_underlying_metadata((bdev), (block))
+#endif
+
+#ifndef HAVE_BI_STATUS
+#define bi_status bi_error
+#endif
+
+/*
+ * Maximum size of xattr attributes for FEATURE_INCOMPAT_EA_INODE 1Mb
+ * This limit is arbitrary, but is reasonable for the xattr API.
+ */
+#define LDISKFS_XATTR_MAX_LARGE_EA_SIZE (1024 * 1024)
+
+struct osd_bio_private {
+ struct osd_iobuf *obp_iobuf;
+ /* Start page index in the obp_iobuf for the bio */
+ int obp_start_page_idx;
+};
+
+#ifdef HAVE_BIO_INTEGRITY_PREP_FN
+int osd_get_integrity_profile(struct osd_device *osd,
+ integrity_gen_fn **generate_fn,
+ integrity_vrfy_fn **verify_fn);
+#else
+#define integrity_gen_fn void
+#define integrity_vrfy_fn int
+static inline int osd_get_integrity_profile(struct osd_device *osd,
+ integrity_gen_fn **generate_fn,
+ integrity_vrfy_fn **verify_fn)
+{
+ return 0;
+}
+
+static inline bool bio_integrity_prep_fn(struct bio *bio,
+ integrity_gen_fn *generate_fn,
+ integrity_vrfy_fn *verify_fn)
+{
+ return bio_integrity_prep(bio);
+}
+#endif
+
+#ifdef HAVE_BIO_BI_PHYS_SEGMENTS
+#define osd_bio_nr_segs(bio) ((bio)->bi_phys_segments)
+#else
+#define osd_bio_nr_segs(bio) bio_segments((bio))
+#endif /* HAVE_BIO_BI_PHYS_SEGMENTS */
+
#endif /* _OSD_INTERNAL_H */