* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2012, 2015, Intel Corporation.
+ * Copyright (c) 2012, 2016, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
#include <sys/zfs_znode.h>
#include <sys/zap.h>
#include <sys/dbuf.h>
+#include <sys/dmu_objset.h>
/**
* By design including kmem.h overrides the Linux slab interfaces to provide
#undef kmem_cache_free
#endif
+#define ZFS_VERSION_CODE \
+ OBD_OCD_VERSION(ZFS_MAJOR, ZFS_MINOR, ZFS_PATCH, ZFS_FIX)
+
#define LUSTRE_ROOT_FID_SEQ 0
#define DMU_OSD_SVNAME "svname"
#define DMU_OSD_OI_NAME_BASE "oi"
/* Statfs {minimum, safe estimate, and maximum} dnodes per block */
#define OSD_DNODE_MIN_BLKSHIFT (DNODES_PER_BLOCK_SHIFT)
-#define OSD_DNODE_EST_BLKSHIFT (DNODES_PER_BLOCK_SHIFT >> 1)
-#define OSD_DNODE_EST_COUNT 1024
+#define OSD_DNODE_EST_BLKSHIFT (12) /* est 4KB/dnode */
+#define OSD_DNODE_EST_COUNT 4096
#define OSD_GRANT_FOR_LOCAL_OIDS (2ULL << 20) /* 2MB for last_rcvd, ... */
uint64_t mode;
uint64_t gid;
uint64_t uid;
+#ifdef ZFS_PROJINHERIT
+ uint64_t projid;
+#endif
uint64_t nlink;
uint64_t rdev;
uint64_t flags;
uint64_t ctime[2];
};
-/* max.number of regular attrubites the callers may ask for */
-#define OSD_MAX_IN_BULK 13
+
+#define OSD_INS_CACHE_SIZE 8
+
+/* OI cache entry */
+struct osd_idmap_cache {
+ struct osd_device *oic_dev;
+ struct lu_fid oic_fid;
+ /** max 2^48 dnodes per dataset, avoid spilling into another word */
+ uint64_t oic_dnode:DN_MAX_OBJECT_SHIFT,
+ oic_remote:1; /* FID isn't local */
+};
+
+/* max.number of regular attributes the callers may ask for */
+# define OSD_MAX_IN_BULK (sizeof(struct osa_attr)/sizeof(uint64_t))
struct osd_thread_info {
const struct lu_env *oti_env;
struct lquota_id_info oti_qi;
struct lu_seq_range oti_seq_range;
+
+ /* dedicated OI cache for insert (which needs inum) */
+ struct osd_idmap_cache *oti_ins_cache;
+ int oti_ins_cache_size;
+ int oti_ins_cache_used;
+ struct lu_buf oti_xattr_lbuf;
};
extern struct lu_context_key osd_key;
struct list_head ot_stop_dcb_list;
struct list_head ot_unlinked_list;
struct list_head ot_sa_list;
- struct semaphore ot_sa_lock;
dmu_tx_t *ot_tx;
struct lquota_trans ot_quota_trans;
__u32 ot_write_commit:1,
struct osd_oi {
char oi_name[OSD_OI_NAME_SIZE]; /* unused */
uint64_t oi_zapid;
- dmu_buf_t *oi_db;
+ dnode_t *oi_dn;
};
struct osd_seq {
/* information about underlying file system */
struct objset *od_os;
uint64_t od_rootid; /* id of root znode */
- uint64_t od_unlinkedid; /* id of unlinked zapobj */
+ dnode_t *od_unlinked; /* dnode of unlinked zapobj */
/* SA attr mapping->id,
* name is the same as in ZFS to use defines SA_ZPL_...*/
sa_attr_type_t *z_attr_table;
unsigned int od_dev_set_rdonly:1, /**< osd_ro() called */
od_prop_rdonly:1, /**< ZFS property readonly */
od_xattr_in_sa:1,
- od_quota_iused_est:1,
od_is_ost:1,
od_posix_acl:1;
+ unsigned int od_dnsize;
char od_mntdev[128];
char od_svname[128];
int od_connects;
struct lu_site od_site;
- /* object IDs of the inode accounting indexes */
- uint64_t od_iusr_oid;
- uint64_t od_igrp_oid;
+ dnode_t *od_groupused_dn;
+ dnode_t *od_userused_dn;
+#ifdef ZFS_PROJINHERIT
+ dnode_t *od_projectused_dn;
+#endif
/* quota slave instance */
struct qsd_instance *od_quota_slave;
* inode is pinned for the whole duration of lu_object life.
*
* Not modified concurrently (either setup early during object
- * creation, or assigned by osd_object_create() under write lock).
+ * creation, or assigned by osd_create() under write lock).
*/
- dmu_buf_t *oo_db;
+ dnode_t *oo_dn;
sa_handle_t *oo_sa_hdl;
nvlist_t *oo_sa_xattr;
struct list_head oo_sa_linkage;
uint64_t oo_xattr;
enum osd_destroy_type oo_destroy;
- __u32 oo_destroyed:1;
+ __u32 oo_destroyed:1,
+ oo_late_xattr:1,
+#ifdef ZFS_PROJINHERIT
+ oo_with_projid:1,
+#endif
+ oo_late_attr_set:1;
/* the i_flags in LMA */
__u32 oo_lma_flags;
unsigned char oo_recsize;
unsigned char oo_recusize; /* unit size */
};
+ uint64_t oo_parent; /* used only at object creation */
};
-
-
};
int osd_statfs(const struct lu_env *, struct dt_device *, struct obd_statfs *);
extern const struct dt_index_operations osd_acct_index_ops;
-uint64_t osd_quota_fid2dmu(const struct lu_fid *fid);
extern struct lu_device_operations osd_lu_ops;
extern struct dt_index_operations osd_dir_ops;
int osd_declare_quota(const struct lu_env *env, struct osd_device *osd,
- qid_t uid, qid_t gid, long long space,
- struct osd_thandle *oh, bool is_blk, int *flags,
- bool force);
+ qid_t uid, qid_t gid, qid_t projid, long long space,
+ struct osd_thandle *oh, int *flags,
+ enum osd_qid_declare_flags osd_qid_declare_flags);
uint64_t osd_objs_count_estimate(uint64_t refdbytes, uint64_t usedobjs,
uint64_t nrblocks, uint64_t est_maxblockshift);
+int osd_unlinked_object_free(const struct lu_env *env, struct osd_device *osd,
+ uint64_t oid);
/*
* Helpers.
return 1;
}
+/**
+ * Put the osd object once done with it.
+ *
+ * \param obj osd object that needs to be put
+ */
+static inline void osd_object_put(const struct lu_env *env,
+ struct osd_object *obj)
+{
+ dt_object_put(env, &obj->oo_dt);
+}
+
static inline int osd_object_invariant(const struct lu_object *l)
{
return osd_invariant(osd_obj(l));
/* osd_object.c */
extern char *osd_obj_tag;
-void osd_object_sa_dirty_rele(struct osd_thandle *oh);
+int __osd_obj2dnode(objset_t *os, uint64_t oid, dnode_t **dnp);
+void osd_object_sa_dirty_rele(const struct lu_env *env, struct osd_thandle *oh);
+void osd_object_sa_dirty_add(struct osd_object *obj, struct osd_thandle *oh);
int __osd_obj2dbuf(const struct lu_env *env, objset_t *os,
uint64_t oid, dmu_buf_t **dbp);
struct lu_object *osd_object_alloc(const struct lu_env *env,
int osd_object_sa_update(struct osd_object *obj, sa_attr_type_t type,
void *buf, uint32_t buflen, struct osd_thandle *oh);
int __osd_zap_create(const struct lu_env *env, struct osd_device *osd,
- dmu_buf_t **zap_dbp, dmu_tx_t *tx, struct lu_attr *la,
- zap_flags_t flags);
+ dnode_t **zap_dnp, dmu_tx_t *tx, struct lu_attr *la,
+ unsigned dnsize, zap_flags_t flags);
int __osd_object_create(const struct lu_env *env, struct osd_object *obj,
- dmu_buf_t **dbp, dmu_tx_t *tx, struct lu_attr *la);
+ dnode_t **dnp, dmu_tx_t *tx, struct lu_attr *la);
int __osd_attr_init(const struct lu_env *env, struct osd_device *osd,
- sa_handle_t *sa_hdl, dmu_tx_t *tx,
- struct lu_attr *la, uint64_t parent);
+ struct osd_object *obj, sa_handle_t *sa_hdl, dmu_tx_t *tx,
+ struct lu_attr *la, uint64_t parent, nvlist_t *);
/* osd_oi.c */
int osd_oi_init(const struct lu_env *env, struct osd_device *o);
int osd_fid_lookup(const struct lu_env *env,
struct osd_device *, const struct lu_fid *, uint64_t *);
uint64_t osd_get_name_n_idx(const struct lu_env *env, struct osd_device *osd,
- const struct lu_fid *fid, char *buf, int bufsize);
+ const struct lu_fid *fid, char *buf, int bufsize,
+ dnode_t **zdn);
int osd_options_init(void);
int osd_ost_seq_exists(const struct lu_env *env, struct osd_device *osd,
__u64 seq);
+int osd_idc_find_and_init(const struct lu_env *env, struct osd_device *osd,
+ struct osd_object *obj);
+struct osd_idmap_cache *osd_idc_find_or_init(const struct lu_env *env,
+ struct osd_device *osd,
+ const struct lu_fid *fid);
+struct osd_idmap_cache *osd_idc_find(const struct lu_env *env,
+ struct osd_device *osd,
+ const struct lu_fid *fid);
+
/* osd_index.c */
int osd_index_try(const struct lu_env *env, struct dt_object *dt,
const struct dt_index_features *feat);
uint64_t id, uint64_t dirhash);
void osd_zap_cursor_fini(zap_cursor_t *zc);
uint64_t osd_zap_cursor_serialize(zap_cursor_t *zc);
+int osd_remote_fid(const struct lu_env *env, struct osd_device *osd,
+ const struct lu_fid *fid);
/* osd_xattr.c */
-int __osd_xattr_load(struct osd_device *osd, uint64_t dnode,
- nvlist_t **sa_xattr);
+int __osd_sa_xattr_schedule_update(const struct lu_env *env,
+ struct osd_object *obj,
+ struct osd_thandle *oh);
+int __osd_sa_attr_init(const struct lu_env *env, struct osd_object *obj,
+ struct osd_thandle *oh);
+int __osd_sa_xattr_update(const struct lu_env *env, struct osd_object *obj,
+ struct osd_thandle *oh);
+int __osd_xattr_load(struct osd_device *osd, sa_handle_t *hdl,
+ nvlist_t **sa);
int __osd_xattr_get_large(const struct lu_env *env, struct osd_device *osd,
uint64_t xattr, struct lu_buf *buf,
const char *name, int *sizep);
if (unlikely(!dt_object_exists(&obj->oo_dt) || obj->oo_destroyed))
return -ENOENT;
- LASSERT(obj->oo_db);
+ LASSERT(obj->oo_dn);
if (osd_obj2dev(obj)->od_xattr_in_sa) {
rc = __osd_sa_xattr_set(env, obj, buf, name, fl, oh);
if (rc == -EFBIG)
{
return (flags & LUSTRE_APPEND_FL ? ZFS_APPENDONLY : 0) |
(flags & LUSTRE_NODUMP_FL ? ZFS_NODUMP : 0) |
+#ifdef ZFS_PROJINHERIT
+ (flags & LUSTRE_PROJINHERIT_FL ? ZFS_PROJINHERIT : 0) |
+#endif
(flags & LUSTRE_IMMUTABLE_FL ? ZFS_IMMUTABLE : 0);
}
{
return (flags & ZFS_APPENDONLY ? LUSTRE_APPEND_FL : 0) |
(flags & ZFS_NODUMP ? LUSTRE_NODUMP_FL : 0) |
+#ifdef ZFS_PROJINHERIT
+ (flags & ZFS_PROJINHERIT ? LUSTRE_PROJINHERIT_FL : 0) |
+#endif
(flags & ZFS_IMMUTABLE ? LUSTRE_IMMUTABLE_FL : 0);
}
DMU_OT_SA, DN_BONUS_SIZE(dnodesize),
dnodesize, tx);
}
+
+static inline int
+osd_obj_bonuslen(struct osd_object *obj)
+{
+ int bonuslen = DN_BONUS_SIZE(DNODE_MIN_SIZE);
+
+ if (obj->oo_dn != NULL && obj->oo_dn->dn_num_slots != 0) {
+ bonuslen = DN_SLOTS_TO_BONUSLEN(obj->oo_dn->dn_num_slots);
+ } else {
+ objset_t *os = osd_dtobj2objset(&obj->oo_dt);
+ int dnodesize;
+
+ if (os != NULL) {
+ dnodesize = dmu_objset_dnodesize(os);
+ if (dnodesize != 0)
+ bonuslen = DN_BONUS_SIZE(dnodesize);
+ }
+ }
+
+ return bonuslen;
+}
#else
static inline uint64_t
osd_dmu_object_alloc(objset_t *os, dmu_object_type_t objtype, int blocksize,
indirect_blockshift, DMU_OT_SA,
DN_MAX_BONUSLEN, tx);
}
+
+static inline int
+osd_obj_bonuslen(struct osd_object *obj)
+{
+ return DN_MAX_BONUSLEN;
+}
#endif /* HAVE_DMU_OBJECT_ALLOC_DNSIZE */
#ifdef HAVE_DMU_PREFETCH_6ARG
dmu_prefetch((os), (obj), (lvl), (off))
#endif
-#ifndef DN_MAX_BONUSLEN
-#define DN_MAX_BONUSLEN DN_OLD_MAX_BONUSLEN
+static inline int osd_sa_handle_get(struct osd_object *obj)
+{
+ struct osd_device *osd = osd_obj2dev(obj);
+ dnode_t *dn = obj->oo_dn;
+ int rc;
+
+ if (obj->oo_sa_hdl)
+ return 0;
+
+ dbuf_read(dn->dn_bonus, NULL, DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH);
+ rc = -sa_handle_get_from_db(osd->od_os, &dn->dn_bonus->db, obj,
+ SA_HDL_PRIVATE, &obj->oo_sa_hdl);
+ if (rc)
+ return rc;
+ refcount_add(&dn->dn_bonus->db_holds, osd_obj_tag);
+ return 0;
+}
+
+static inline void osd_dnode_rele(dnode_t *dn)
+{
+ dmu_buf_impl_t *db;
+ LASSERT(dn);
+ LASSERT(dn->dn_bonus);
+ db = dn->dn_bonus;
+
+ DB_DNODE_EXIT(db);
+ dmu_buf_rele(&db->db, osd_obj_tag);
+}
+
+#ifdef HAVE_DMU_USEROBJ_ACCOUNTING
+
+#define OSD_DMU_USEROBJ_PREFIX DMU_OBJACCT_PREFIX
+#define OSD_DMU_USEROBJ_PREFIX_LEN DMU_OBJACCT_PREFIX_LEN
+
+static inline bool osd_dmu_userobj_accounting_available(struct osd_device *osd)
+{
+ return dmu_objset_userobjspace_present(osd->od_os);
+}
+#else
+
+#define OSD_DMU_USEROBJ_PREFIX "obj-"
+#define OSD_DMU_USEROBJ_PREFIX_LEN 4
+
+static inline bool osd_dmu_userobj_accounting_available(struct osd_device *osd)
+{
+ return false;
+}
+#endif /* #ifdef HAVE_DMU_USEROBJ_ACCOUNTING */
+
+static inline int osd_zap_add(struct osd_device *osd, uint64_t zap,
+ dnode_t *dn, const char *key,
+ int int_size, int int_num,
+ const void *val, dmu_tx_t *tx)
+{
+ LASSERT(zap != 0);
+
+#ifdef HAVE_ZAP_ADD_BY_DNODE
+ if (dn)
+ return -zap_add_by_dnode(dn, key, int_size, int_num, val, tx);
+#endif
+ return -zap_add(osd->od_os, zap, key, int_size, int_num, val, tx);
+}
+
+static inline int osd_zap_remove(struct osd_device *osd, uint64_t zap,
+ dnode_t *dn, const char *key,
+ dmu_tx_t *tx)
+{
+ LASSERT(zap != 0);
+
+#ifdef HAVE_ZAP_ADD_BY_DNODE
+ if (dn)
+ return -zap_remove_by_dnode(dn, key, tx);
+#endif
+ return -zap_remove(osd->od_os, zap, key, tx);
+}
+
+
+static inline int osd_zap_lookup(struct osd_device *osd, uint64_t zap,
+ dnode_t *dn, const char *key,
+ int int_size, int int_num, void *v)
+{
+ LASSERT(zap != 0);
+
+#ifdef HAVE_ZAP_ADD_BY_DNODE
+ if (dn)
+ return -zap_lookup_by_dnode(dn, key, int_size, int_num, v);
+#endif
+ return -zap_lookup(osd->od_os, zap, key, int_size, int_num, v);
+}
+
+static inline void osd_tx_hold_zap(dmu_tx_t *tx, uint64_t zap,
+ dnode_t *dn, int add, const char *name)
+{
+#ifdef HAVE_DMU_TX_HOLD_ZAP_BY_DNODE
+ if (dn) {
+ dmu_tx_hold_zap_by_dnode(tx, dn, add, name);
+ return;
+ }
+#endif
+ dmu_tx_hold_zap(tx, zap, add, name);
+}
+
+static inline void osd_tx_hold_write(dmu_tx_t *tx, uint64_t oid,
+ dnode_t *dn, uint64_t off, int len)
+{
+#ifdef HAVE_DMU_TX_HOLD_ZAP_BY_DNODE
+ if (dn) {
+ dmu_tx_hold_write_by_dnode(tx, dn, off, len);
+ return;
+ }
+#endif
+ dmu_tx_hold_write(tx, oid, off, len);
+}
+
+static inline void osd_dmu_write(struct osd_device *osd, dnode_t *dn,
+ uint64_t offset, uint64_t size,
+ const char *buf, dmu_tx_t *tx)
+{
+ LASSERT(dn);
+#ifdef HAVE_DMU_WRITE_BY_DNODE
+ dmu_write_by_dnode(dn, offset, size, buf, tx);
+#else
+ dmu_write(osd->od_os, dn->dn_object, offset, size, buf, tx);
+#endif
+}
+
+static inline int osd_dmu_read(struct osd_device *osd, dnode_t *dn,
+ uint64_t offset, uint64_t size,
+ char *buf, int flags)
+{
+ LASSERT(dn);
+#ifdef HAVE_DMU_READ_BY_DNODE
+ return -dmu_read_by_dnode(dn, offset, size, buf, flags);
+#else
+ return -dmu_read(osd->od_os, dn->dn_object, offset, size, buf, flags);
+#endif
+}
+
+#ifdef HAVE_DMU_OBJSET_OWN_6ARG
+#define osd_dmu_objset_own(name, type, ronly, decrypt, tag, os) \
+ dmu_objset_own((name), (type), (ronly), (decrypt), (tag), (os))
+#else
+#define osd_dmu_objset_own(name, type, ronly, decrypt, tag, os) \
+ dmu_objset_own((name), (type), (ronly), (tag), (os))
+#endif
+
+#ifdef HAVE_DMU_OBJSET_DISOWN_3ARG
+#define osd_dmu_objset_disown(os, decrypt, tag) \
+ dmu_objset_disown((os), (decrypt), (tag))
+#else
+#define osd_dmu_objset_disown(os, decrypt, tag) \
+ dmu_objset_disown((os), (tag))
#endif
#endif /* _OSD_INTERNAL_H */