*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2011, 2014, Intel Corporation.
+ * Copyright (c) 2011, 2016, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
/* OI scrub should skip this inode. */
#define LDISKFS_STATE_LUSTRE_NOSCRUB 31
+#define LDISKFS_STATE_LUSTRE_DESTROY 30
/** Enable thandle usage statistics */
#define OSD_THANDLE_STATS (0)
#define ADMIN_USR "admin_quotafile_v2.usr"
#define ADMIN_GRP "admin_quotafile_v2.grp"
+/* Statfs space reservation for fragmentation and local objects */
+#define OSD_STATFS_RESERVED (1ULL << 23) /* 8MB */
+#define OSD_STATFS_RESERVED_SHIFT (7) /* reserve 0.78% of all space */
+
struct osd_directory {
struct iam_container od_container;
struct iam_descr od_descr;
struct osd_directory *oo_dir;
/** protects inode attributes. */
spinlock_t oo_guard;
+
+ __u32 oo_destroyed:1;
+
+ /* the i_flags in LMA */
+ __u32 oo_lma_flags;
/**
* Following two members are used to indicate the presence of dot and
* dotdot in the given directory. This is required for interop mode
#ifdef CONFIG_LOCKDEP
struct lockdep_map oo_dep_map;
#endif
+
+ struct list_head oo_xattr_list;
};
struct osd_obj_seq {
struct osd_mdobj_map {
struct dentry *omm_remote_parent;
};
-
-#define osd_ldiskfs_add_entry(handle, child, cinode, hlock) \
- __ldiskfs_add_entry(handle, child, cinode, hlock)
+int osd_ldiskfs_add_entry(struct osd_thread_info *info, struct osd_device *osd,
+ handle_t *handle, struct dentry *child,
+ struct inode *inode, struct htree_lock *hlock);
#define OSD_OTABLE_IT_CACHE_SIZE 64
#define OSD_OTABLE_IT_CACHE_MASK (~(OSD_OTABLE_IT_CACHE_SIZE - 1))
ooi_waiting:1; /* it::next is waiting. */
};
+struct osd_obj_orphan {
+ struct list_head oor_list;
+ struct lu_env *oor_env; /* to identify "own" records */
+ __u32 oor_ino;
+};
+
/*
* osd device.
*/
* exceeds the osd_device::od_full_scrub_threshold_rate,
* then trigger OI scrub to scan the whole device. */
__u64 od_full_scrub_threshold_rate;
+
+ /* a list of orphaned agent inodes, protected with od_osfs_lock */
+ struct list_head od_orphan_list;
};
enum osd_full_scrub_ratio {
*/
#define OSD_MAX_UGID_CNT 10
-enum {
+enum osd_op_type {
OSD_OT_ATTR_SET = 0,
OSD_OT_PUNCH = 1,
OSD_OT_XATTR_SET = 2,
struct list_head ot_stop_dcb_list;
/* Link to the device, for debugging. */
struct lu_ref_link ot_dev_link;
- unsigned short ot_credits;
- unsigned short ot_id_cnt;
- unsigned short ot_id_type;
- uid_t ot_id_array[OSD_MAX_UGID_CNT];
+ unsigned int ot_credits;
+ unsigned short ot_id_cnt;
+ unsigned short ot_id_type;
+ unsigned int ot_remove_agents:1;
+ uid_t ot_id_array[OSD_MAX_UGID_CNT];
struct lquota_trans *ot_quota_trans;
#if OSD_THANDLE_STATS
/** time when this handle was allocated */
- cfs_time_t oth_alloced;
+ ktime_t oth_alloced;
/** time when this thanle was started */
- cfs_time_t oth_started;
+ ktime_t oth_started;
#endif
};
* there would be one ext3 readdir for every mdd readdir page.
*/
-#define OSD_IT_EA_BUFSIZE (PAGE_CACHE_SIZE + PAGE_CACHE_SIZE/4)
+#define OSD_IT_EA_BUFSIZE (PAGE_SIZE + PAGE_SIZE/4)
/**
* This is iterator's in-memory data structure in interoperability
struct list_head oiq_list;
};
-#define MAX_BLOCKS_PER_PAGE (PAGE_CACHE_SIZE / 512)
+#define MAX_BLOCKS_PER_PAGE (PAGE_SIZE / 512)
struct osd_iobuf {
wait_queue_head_t dr_wait;
unsigned int dr_init_at; /* the line iobuf was initialized */
};
+#define OSD_INS_CACHE_SIZE 8
+
struct osd_thread_info {
const struct lu_env *oti_env;
/**
*/
struct timespec oti_time;
- /** osd_device reference, initialized in osd_trans_start() and
- used in osd_trans_stop() */
- struct osd_device *oti_dev;
-
/**
* following ipd and it structures are used for osd_index_iam_lookup()
* these are defined separately as we might do index operation
struct osd_idmap_cache oti_cache;
+ /* dedicated OI cache for insert (which needs inum) */
+ struct osd_idmap_cache *oti_ins_cache;
+ int oti_ins_cache_size;
+ int oti_ins_cache_used;
+
int oti_r_locks;
int oti_w_locks;
int oti_txns;
/* used by quota code */
union {
-#ifdef HAVE_DQUOT_FS_DISK_QUOTA
+#if defined(HAVE_DQUOT_QC_DQBLK)
+ struct qc_dqblk oti_qdq;
+#elif defined(HAVE_DQUOT_FS_DISK_QUOTA)
struct fs_disk_quota oti_fdq;
#else
struct if_dqblk oti_dqblk;
* cases where a large number of credits are being allocated for
* single transaction. */
unsigned int oti_credits_before;
- unsigned short oti_declare_ops[OSD_OT_MAX];
- unsigned short oti_declare_ops_cred[OSD_OT_MAX];
- unsigned short oti_declare_ops_used[OSD_OT_MAX];
+ unsigned int oti_declare_ops[OSD_OT_MAX];
+ unsigned int oti_declare_ops_cred[OSD_OT_MAX];
+ unsigned int oti_declare_ops_used[OSD_OT_MAX];
};
extern int ldiskfs_pdo;
}
#endif
+#ifdef HAVE_LDISKFS_INFO_JINODE
+# define osd_attach_jinode(inode) ldiskfs_inode_attach_jinode(inode)
+#else /* HAVE_LDISKFS_INFO_JINODE */
+# define osd_attach_jinode(inode) 0
+#endif /* HAVE_LDISKFS_INFO_JINODE */
+
#ifdef LDISKFS_HT_MISC
# define osd_journal_start_sb(sb, type, nblock) \
ldiskfs_journal_start_sb(sb, type, nblock)
-# define osd_ldiskfs_append(handle, inode, nblock, err) \
- ldiskfs_append(handle, inode, nblock)
+static inline struct buffer_head *osd_ldiskfs_append(handle_t *handle,
+ struct inode *inode,
+ ldiskfs_lblk_t *nblock)
+{
+ int rc;
+
+ rc = osd_attach_jinode(inode);
+ if (rc)
+ return ERR_PTR(rc);
+ return ldiskfs_append(handle, inode, nblock);
+}
# define osd_ldiskfs_find_entry(dir, name, de, inlined, lock) \
- __ldiskfs_find_entry(dir, name, de, inlined, lock)
+ (__ldiskfs_find_entry(dir, name, de, inlined, lock) ?: \
+ ERR_PTR(-ENOENT))
# define osd_journal_start(inode, type, nblocks) \
ldiskfs_journal_start(inode, type, nblocks)
# define osd_transaction_size(dev) \
# define LDISKFS_HT_MISC 0
# define osd_journal_start_sb(sb, type, nblock) \
ldiskfs_journal_start_sb(sb, nblock)
-# define osd_ldiskfs_append(handle, inode, nblock, err) \
- ldiskfs_append(handle, inode, nblock, err)
+
+static inline struct buffer_head *osd_ldiskfs_append(handle_t *handle,
+ struct inode *inode,
+ ldiskfs_lblk_t *nblock)
+{
+ struct buffer_head *bh;
+ int err = 0;
+
+ bh = ldiskfs_append(handle, inode, nblock, &err);
+ if (bh == NULL)
+ bh = ERR_PTR(err);
+
+ return bh;
+}
+
# define osd_ldiskfs_find_entry(dir, name, de, inlined, lock) \
- __ldiskfs_find_entry(dir, name, de, lock)
+ (__ldiskfs_find_entry(dir, name, de, lock) ?: \
+ ERR_PTR(-ENOENT))
# define osd_journal_start(inode, type, nblocks) \
ldiskfs_journal_start(inode, nblocks)
# define osd_transaction_size(dev) \
static inline char *osd_name(struct osd_device *osd)
{
- return osd->od_dt_dev.dd_lu_dev.ld_obd->obd_name;
+ return osd->od_svname;
+}
+
+static inline bool osd_is_ea_inode(struct inode *inode)
+{
+ return !!(LDISKFS_I(inode)->i_flags & LDISKFS_EA_INODE_FL);
}
extern const struct dt_body_operations osd_body_ops;
bag->ic_descr->id_ops->id_ipd_free(ipd);
}
+int osd_calc_bkmap_credits(struct super_block *sb, struct inode *inode,
+ const loff_t size, const loff_t pos,
+ const int blocks);
+
int osd_ldiskfs_read(struct inode *inode, void *buf, int size, loff_t *offs);
int osd_ldiskfs_write_record(struct inode *inode, void *buf, int bufsize,
int write_NUL, loff_t *offs, handle_t *handle);
static inline void osd_trans_declare_op(const struct lu_env *env,
struct osd_thandle *oh,
- unsigned int op, int credits)
+ enum osd_op_type op, int credits)
{
struct osd_thread_info *oti = osd_oti_get(env);
}
static inline void osd_trans_exec_op(const struct lu_env *env,
- struct thandle *th, unsigned int op)
+ struct thandle *th,
+ enum osd_op_type op)
{
struct osd_thread_info *oti = osd_oti_get(env);
struct osd_thandle *oh = container_of(th, struct osd_thandle,
if (unlikely(ldiskfs_track_declares_assert))
LASSERT(op < OSD_OT_MAX);
else {
- CWARN("%s: Invalid operation index %d\n",
- osd_name(osd_dt_dev(oh->ot_super.th_dev)), op);
+ CWARN("%s: opcode %u: invalid value >= %u\n",
+ osd_name(osd_dt_dev(oh->ot_super.th_dev)),
+ op, OSD_OT_MAX);
libcfs_debug_dumpstack(NULL);
return;
}
if (op == OSD_OT_REF_ADD &&
oti->oti_declare_ops_cred[OSD_OT_DESTROY] > 0)
goto proceed;
+ CWARN("%s: opcode %u: credits = 0, rollback = %u\n",
+ osd_name(osd_dt_dev(oh->ot_super.th_dev)), op, rb);
osd_trans_dump_creds(env, th);
- CERROR("%s: op = %d, rb = %d\n",
- osd_name(osd_dt_dev(oh->ot_super.th_dev)), op, rb);
- if (unlikely(ldiskfs_track_declares_assert))
- LBUG();
+ LASSERT(!ldiskfs_track_declares_assert);
}
proceed:
oti->oti_credits_before = oh->ot_handle->h_buffer_credits;
left = oti->oti_declare_ops_cred[op] - oti->oti_declare_ops_used[op];
if (unlikely(oti->oti_credits_before < left)) {
+ CWARN("%s: opcode %u: before %u < left %u, rollback = %u\n",
+ osd_name(osd_dt_dev(oh->ot_super.th_dev)), op,
+ oti->oti_credits_before, left, rb);
osd_trans_dump_creds(env, th);
- CERROR("%s: op = %d, rb = %d\n",
- osd_name(osd_dt_dev(oh->ot_super.th_dev)), op, rb);
/* on a very small fs (testing?) it's possible that
* the transaction can't fit 1/4 of journal, so we
* just request less credits (see osd_trans_start()).
* ignore the same case here */
rb = osd_transaction_size(osd_dt_dev(th->th_dev));
- if (unlikely(oh->ot_credits < rb)) {
- if (unlikely(ldiskfs_track_declares_assert))
- LBUG();
- }
+ if (unlikely(oh->ot_credits < rb))
+ LASSERT(!ldiskfs_track_declares_assert);
}
}
static inline void osd_trans_exec_check(const struct lu_env *env,
struct thandle *th,
- unsigned int op)
+ enum osd_op_type op)
{
struct osd_thread_info *oti = osd_oti_get(env);
struct osd_thandle *oh = container_of(th, struct osd_thandle,
oti->oti_declare_ops_used[OSD_OT_QUOTA] += over;
oti->oti_declare_ops_used[op] -= over;
} else {
- CWARN("op %d: used %u, used now %u, reserved %u\n",
- op, oti->oti_declare_ops_used[op], used,
+ CWARN("%s: opcode %d: used %u, used now %u, reserved %u\n",
+ osd_name(osd_dt_dev(oh->ot_super.th_dev)), op,
+ oti->oti_declare_ops_used[op], used,
oti->oti_declare_ops_cred[op]);
osd_trans_dump_creds(env, th);
if (unlikely(ldiskfs_track_declares_assert))
static inline int is_32bit_api(void)
{
#ifdef CONFIG_COMPAT
- return is_compat_task();
+ return in_compat_syscall();
#else
return (BITS_PER_LONG == 32);
#endif
return dev->od_mdt_map->omm_remote_parent->d_inode->i_ino;
}
+/**
+ * ext4_bread/ldiskfs_bread has either 5 or 4 parameters. The error
+ * return code has been removed and integrated into the pointer in the
+ * kernel 3.18.
+ */
+static inline struct buffer_head *__ldiskfs_bread(handle_t *handle,
+ struct inode *inode,
+ ldiskfs_lblk_t block,
+ int create)
+{
+ int rc = 0;
+ struct buffer_head *bh;
+
+ if (create) {
+ rc = osd_attach_jinode(inode);
+ if (rc)
+ return ERR_PTR(rc);
+ }
+#ifdef HAVE_EXT4_BREAD_4ARGS
+ bh = ldiskfs_bread(handle, inode, block, create);
+#else
+
+ bh = ldiskfs_bread(handle, inode, block, create, &rc);
+ if (bh == NULL && rc != 0)
+ bh = ERR_PTR(rc);
+#endif
+ return bh;
+}
+
void ldiskfs_inc_count(handle_t *handle, struct inode *inode);
void ldiskfs_dec_count(handle_t *handle, struct inode *inode);
void osd_fini_iobuf(struct osd_device *d, struct osd_iobuf *iobuf);
+
#endif /* _OSD_INTERNAL_H */