X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fosd-ldiskfs%2Fosd_internal.h;h=75a2a8a496ad7f8a921d548040210361d65936d2;hb=1ec95b2b5410286c46b95c624e1d382b1e86535a;hp=527fa5cf01a4288bdde6195cf0e3e3dac1326dbd;hpb=9fb46705ae86aa2c0ac29427f0ff24f923560eb7;p=fs%2Flustre-release.git diff --git a/lustre/osd-ldiskfs/osd_internal.h b/lustre/osd-ldiskfs/osd_internal.h index 527fa5c..75a2a8a 100644 --- a/lustre/osd-ldiskfs/osd_internal.h +++ b/lustre/osd-ldiskfs/osd_internal.h @@ -27,7 +27,7 @@ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2011, 2012, Whamcloud, Inc. + * Copyright (c) 2011, 2012, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -52,16 +52,9 @@ /* struct dirent64 */ #include +#include #include #include -#ifdef HAVE_LDISKFS_JOURNAL_CALLBACK_ADD -# define journal_callback ldiskfs_journal_cb_entry -# define osd_journal_callback_set(handle, func, jcb) \ - ldiskfs_journal_callback_add(handle, func, jcb) -#else -# define osd_journal_callback_set(handle, func, jcb) \ - jbd2_journal_callback_set(handle, func, jcb) -#endif /* fsfilt_{get|put}_ops */ #include @@ -83,12 +76,24 @@ struct inode; #define OSD_COUNTERS (0) -/* Lustre special inode::i_state to indicate OI scrub skip this inode. */ -#define I_LUSTRE_NOSCRUB (1 << 31) +/* ldiskfs special inode::i_state_flags need to be accessed with + * ldiskfs_{set,clear,test}_inode_state() only */ + +/* OI scrub should skip this inode. */ +#define LDISKFS_STATE_LUSTRE_NOSCRUB 31 + +/* Do not add OI mapping for this inode. */ +#define LDISKFS_STATE_LUSTRE_NO_OI 30 /** Enable thandle usage statistics */ #define OSD_THANDLE_STATS (0) +#define MAX_OBJID_GROUP (FID_SEQ_ECHO + 1) + +#define OBJECTS "OBJECTS" +#define ADMIN_USR "admin_quotafile_v2.usr" +#define ADMIN_GRP "admin_quotafile_v2.grp" + struct osd_directory { struct iam_container od_container; struct iam_descr od_descr; @@ -140,54 +145,37 @@ struct osd_object { #endif }; -#ifdef HAVE_LDISKFS_PDO - -#define osd_ldiskfs_find_entry(dir, dentry, de, lock) \ - ll_ldiskfs_find_entry(dir, dentry, de, lock) -#define osd_ldiskfs_add_entry(handle, child, cinode, hlock) \ - ldiskfs_add_entry(handle, child, cinode, hlock) - -#else /* HAVE_LDISKFS_PDO */ - -struct htree_lock { - int dummy; +struct osd_obj_seq { + /* protects on-fly initialization */ + int oos_subdir_count; /* subdir count for each seq */ + struct dentry *oos_root; /* O/ */ + struct dentry **oos_dirs; /* O//d0-dXX */ + obd_seq oos_seq; /* seq number */ + cfs_list_t oos_seq_list; /* list to seq_list */ }; -struct htree_lock_head { - int dummy; +struct osd_obj_map { + struct dentry *om_root; /* dentry for /O */ + rwlock_t om_seq_list_lock; /* lock for seq_list */ + cfs_list_t om_seq_list; /* list head for seq */ + int om_subdir_count; + struct semaphore om_dir_init_sem; }; -#define ldiskfs_htree_lock(lock, head, inode, op) do { LBUG(); } while (0) -#define ldiskfs_htree_unlock(lock) do { LBUG(); } while (0) - -static inline struct htree_lock_head *ldiskfs_htree_lock_head_alloc(int dep) -{ - LBUG(); - return NULL; -} - -#define ldiskfs_htree_lock_head_free(lh) do { LBUG(); } while (0) - -#define LDISKFS_DUMMY_HTREE_LOCK 0xbabecafe - -static inline struct htree_lock *ldiskfs_htree_lock_alloc(void) -{ - return (struct htree_lock *)LDISKFS_DUMMY_HTREE_LOCK; -} - -static inline void ldiskfs_htree_lock_free(struct htree_lock *lk) -{ - LASSERT((unsigned long)lk == LDISKFS_DUMMY_HTREE_LOCK); -} +struct osd_mdobj { + struct dentry *om_root; /* AGENT/ */ + obd_seq om_index; /* mdt index */ + cfs_list_t om_list; /* list to omm_list */ +}; -#define HTREE_HBITS_DEF 0 +struct osd_mdobj_map { + struct dentry *omm_remote_parent; +}; #define osd_ldiskfs_find_entry(dir, dentry, de, lock) \ - ll_ldiskfs_find_entry(dir, dentry, de) -#define osd_ldiskfs_add_entry(handle, child, cinode, lock) \ - ldiskfs_add_entry(handle, child, cinode) - -#endif /* HAVE_LDISKFS_PDO */ + ll_ldiskfs_find_entry(dir, dentry, de, lock) +#define osd_ldiskfs_add_entry(handle, child, cinode, hlock) \ + ldiskfs_add_entry(handle, child, cinode, hlock) #define OSD_OTABLE_IT_CACHE_SIZE 128 #define OSD_OTABLE_IT_CACHE_MASK (~(OSD_OTABLE_IT_CACHE_SIZE - 1)) @@ -221,11 +209,9 @@ struct osd_otable_cache { struct osd_otable_it { struct osd_device *ooi_dev; + pid_t ooi_pid; struct osd_otable_cache ooi_cache; - /* For osd_otable_it_key. */ - __u8 ooi_key[16]; - /* The following bits can be updated/checked w/o lock protection. * If more bits will be introduced in the future and need lock to * protect, please add comment. */ @@ -235,7 +221,9 @@ struct osd_otable_it { * filled into cache. */ ooi_user_ready:1, /* The user out of OSD is * ready to iterate. */ - ooi_waiting:1; /* it::next is waiting. */ + ooi_waiting:1, /* it::next is waiting. */ + ooi_stopping:1; /* Someone is stopping + * the iteration. */ }; extern const int osd_dto_credits_noquota[]; @@ -271,16 +259,16 @@ struct osd_device { struct obd_statfs od_statfs; spinlock_t od_osfs_lock; - unsigned int od_noscrub:1; + unsigned int od_noscrub:1, + od_dirent_journal:1, + od_handle_nolma:1; struct fsfilt_operations *od_fsops; int od_connects; struct lu_site od_site; - /* - * mapping for legacy OST objids - */ - struct osd_compat_objid *od_ost_map; + struct osd_obj_map *od_ost_map; + struct osd_mdobj_map *od_mdt_map; unsigned long long od_readcache_max_filesize; int od_read_cache; @@ -293,34 +281,16 @@ struct osd_device { struct mutex od_otable_mutex; struct osd_otable_it *od_otable_it; struct osd_scrub od_scrub; + cfs_list_t od_ios_list; /* service name associated with the osd device */ char od_svname[MAX_OBD_NAME]; + char od_mntdev[MAX_OBD_NAME]; /* quota slave instance */ struct qsd_instance *od_quota_slave; }; -#define OSD_TRACK_DECLARES -#ifdef OSD_TRACK_DECLARES -#define OSD_DECLARE_OP(oh, op, credits) \ -do { \ - LASSERT((oh)->ot_handle == NULL); \ - ((oh)->ot_declare_ ##op)++; \ - ((oh)->ot_declare_ ##op ##_cred) += (credits); \ - (oh)->ot_credits += (credits); \ -} while (0) -#define OSD_EXEC_OP(handle, op) \ -do { \ - struct osd_thandle *oh = container_of(handle, typeof(*oh), ot_super); \ - LASSERT((oh)->ot_declare_ ##op > 0); \ - ((oh)->ot_declare_ ##op)--; \ -} while (0) -#else -#define OSD_DECLARE_OP(oh, op, credits) (oh)->ot_credits += (credits) -#define OSD_EXEC_OP(oh, op) -#endif - /* There are at most 10 uid/gids are affected in a transaction, and * that's rename case: * - 2 for source parent uid & gid; @@ -334,10 +304,29 @@ do { \ */ #define OSD_MAX_UGID_CNT 10 +enum { + OSD_OT_ATTR_SET = 0, + OSD_OT_PUNCH = 1, + OSD_OT_XATTR_SET = 2, + OSD_OT_CREATE = 3, + OSD_OT_DESTROY = 4, + OSD_OT_REF_ADD = 5, + OSD_OT_REF_DEL = 6, + OSD_OT_WRITE = 7, + OSD_OT_INSERT = 8, + OSD_OT_DELETE = 9, + OSD_OT_QUOTA = 10, + OSD_OT_MAX = 11 +}; + +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 3, 90, 0) +# define OSD_TRACK_DECLARES +#endif + struct osd_thandle { struct thandle ot_super; handle_t *ot_handle; - struct journal_callback ot_jcb; + struct ldiskfs_journal_cb_entry ot_jcb; cfs_list_t ot_dcb_list; /* Link to the device, for debugging. */ struct lu_ref_link *ot_dev_link; @@ -346,36 +335,6 @@ struct osd_thandle { unsigned short ot_id_type; uid_t ot_id_array[OSD_MAX_UGID_CNT]; struct lquota_trans *ot_quota_trans; - -#ifdef OSD_TRACK_DECLARES - /* Tracking for transaction credits, to allow debugging and optimizing - * cases where a large number of credits are being allocated for - * single transaction. */ - unsigned char ot_declare_attr_set; - unsigned char ot_declare_punch; - unsigned char ot_declare_xattr_set; - unsigned char ot_declare_create; - unsigned char ot_declare_destroy; - unsigned char ot_declare_ref_add; - unsigned char ot_declare_ref_del; - unsigned char ot_declare_write; - unsigned char ot_declare_insert; - unsigned char ot_declare_delete; - unsigned char ot_declare_quota; - - unsigned short ot_declare_attr_set_cred; - unsigned short ot_declare_punch_cred; - unsigned short ot_declare_xattr_set_cred; - unsigned short ot_declare_create_cred; - unsigned short ot_declare_destroy_cred; - unsigned short ot_declare_ref_add_cred; - unsigned short ot_declare_ref_del_cred; - unsigned short ot_declare_write_cred; - unsigned short ot_declare_insert_cred; - unsigned short ot_declare_delete_cred; - unsigned short ot_declare_quota_cred; -#endif - #if OSD_THANDLE_STATS /** time when this handle was allocated */ cfs_time_t oth_alloced; @@ -507,20 +466,22 @@ struct osd_it_quota { #define MAX_BLOCKS_PER_PAGE (CFS_PAGE_SIZE / 512) struct osd_iobuf { - cfs_waitq_t dr_wait; - cfs_atomic_t dr_numreqs; /* number of reqs being processed */ - int dr_max_pages; - int dr_npages; - int dr_error; - int dr_frags; - unsigned int dr_ignore_quota:1; - unsigned int dr_elapsed_valid:1; /* we really did count time */ - unsigned int dr_rw:1; - struct page *dr_pages[PTLRPC_MAX_BRW_PAGES]; - unsigned long dr_blocks[PTLRPC_MAX_BRW_PAGES*MAX_BLOCKS_PER_PAGE]; - unsigned long dr_start_time; - unsigned long dr_elapsed; /* how long io took */ - struct osd_device *dr_dev; + cfs_waitq_t dr_wait; + cfs_atomic_t dr_numreqs; /* number of reqs being processed */ + int dr_max_pages; + int dr_npages; + int dr_error; + int dr_frags; + unsigned int dr_ignore_quota:1; + unsigned int dr_elapsed_valid:1; /* we really did count time */ + unsigned int dr_rw:1; + struct lu_buf dr_pg_buf; + struct page **dr_pages; + struct lu_buf dr_bl_buf; + unsigned long *dr_blocks; + unsigned long dr_start_time; + unsigned long dr_elapsed; /* how long io took */ + struct osd_device *dr_dev; unsigned int dr_init_at; /* the line iobuf was initialized */ }; @@ -576,10 +537,10 @@ struct osd_thread_info { struct osd_it_quota oti_it_quota; }; - /** pre-allocated buffer used by oti_it_ea, size OSD_IT_EA_BUFSIZE */ - void *oti_it_ea_buf; + /** pre-allocated buffer used by oti_it_ea, size OSD_IT_EA_BUFSIZE */ + void *oti_it_ea_buf; - cfs_kstatfs_t oti_ksfs; + struct kstatfs oti_ksfs; /** IAM iterator for index operation. */ struct iam_iterator oti_idx_it; @@ -603,37 +564,64 @@ struct osd_thread_info { /** used in osd_fid_set() to put xattr */ struct lu_buf oti_buf; /** used in osd_ea_fid_set() to set fid into common ea */ - struct lustre_mdt_attrs oti_mdt_attrs; - /** 0-copy IO */ - struct osd_iobuf oti_iobuf; - struct inode oti_inode; - int oti_created[PTLRPC_MAX_BRW_PAGES]; - struct lu_env oti_obj_delete_tx_env; + union { + struct lustre_mdt_attrs oti_mdt_attrs; + /* old LMA for compatibility */ + char oti_mdt_attrs_old[LMA_OLD_SIZE]; + }; + /** 0-copy IO */ + struct osd_iobuf oti_iobuf; + struct inode oti_inode; #define OSD_FID_REC_SZ 32 - char oti_ldp[OSD_FID_REC_SZ]; - char oti_ldp2[OSD_FID_REC_SZ]; + char oti_ldp[OSD_FID_REC_SZ]; + char oti_ldp2[OSD_FID_REC_SZ]; /* used by quota code */ union { +#ifdef HAVE_DQUOT_FS_DISK_QUOTA + struct fs_disk_quota oti_fdq; +#else struct if_dqblk oti_dqblk; +#endif struct if_dqinfo oti_dqinfo; }; struct lquota_id_info oti_qi; struct lquota_trans oti_quota_trans; union lquota_rec oti_quota_rec; __u64 oti_quota_id; + struct lu_seq_range oti_seq_range; + +#ifdef OSD_TRACK_DECLARES + /* Tracking for transaction credits, to allow debugging and optimizing + * cases where a large number of credits are being allocated for + * single transaction. */ + unsigned char oti_declare_ops[OSD_OT_MAX]; + unsigned char oti_declare_ops_rb[OSD_OT_MAX]; + unsigned short oti_declare_ops_cred[OSD_OT_MAX]; + bool oti_rollback; +#endif + + char oti_name[48]; }; extern int ldiskfs_pdo; +static inline int __osd_xattr_set(struct osd_thread_info *info, + struct inode *inode, const char *name, + const void *buf, int buflen, int fl) +{ + struct dentry *dentry = &info->oti_child_dentry; + + ll_vfs_dq_init(inode); + dentry->d_inode = inode; + return inode->i_op->setxattr(dentry, name, buf, buflen, fl); +} + #ifdef LPROCFS /* osd_lproc.c */ void lprocfs_osd_init_vars(struct lprocfs_static_vars *lvars); int osd_procfs_init(struct osd_device *osd, const char *name); int osd_procfs_fini(struct osd_device *osd); -void osd_lprocfs_time_start(const struct lu_env *env); -void osd_lprocfs_time_end(const struct lu_env *env, - struct osd_device *osd, int op); void osd_brw_stats_update(struct osd_device *osd, struct osd_iobuf *iobuf); #endif @@ -643,31 +631,29 @@ int osd_object_auth(const struct lu_env *env, struct dt_object *dt, struct lustre_capa *capa, __u64 opc); struct inode *osd_iget(struct osd_thread_info *info, struct osd_device *dev, struct osd_inode_id *id); -struct inode *osd_iget_fid(struct osd_thread_info *info, struct osd_device *dev, - struct osd_inode_id *id, struct lu_fid *fid); - -int osd_compat_init(struct osd_device *dev); -void osd_compat_fini(struct osd_device *dev); -int osd_compat_objid_lookup(struct osd_thread_info *info, - struct osd_device *osd, - const struct lu_fid *fid, struct osd_inode_id *id); -int osd_compat_objid_insert(struct osd_thread_info *info, - struct osd_device *osd, - const struct lu_fid *fid, - const struct osd_inode_id *id, struct thandle *th); -int osd_compat_objid_delete(struct osd_thread_info *info, - struct osd_device *osd, - const struct lu_fid *fid, struct thandle *th); -int osd_compat_spec_lookup(struct osd_thread_info *info, - struct osd_device *osd, - const struct lu_fid *fid, struct osd_inode_id *id); -int osd_compat_spec_insert(struct osd_thread_info *info, - struct osd_device *osd, - const struct lu_fid *fid, - const struct osd_inode_id *id, struct thandle *th); +int osd_ea_fid_set(struct osd_thread_info *info, struct inode *inode, + const struct lu_fid *fid, __u64 flags); +int osd_get_lma(struct osd_thread_info *info, struct inode *inode, + struct dentry *dentry, struct lustre_mdt_attrs *lma); + +int osd_obj_map_init(const struct lu_env *env, struct osd_device *osd); +void osd_obj_map_fini(struct osd_device *dev); +int osd_obj_map_lookup(struct osd_thread_info *info, struct osd_device *osd, + const struct lu_fid *fid, struct osd_inode_id *id); +int osd_obj_map_insert(struct osd_thread_info *info, struct osd_device *osd, + const struct lu_fid *fid, const struct osd_inode_id *id, + struct thandle *th); +int osd_obj_map_delete(struct osd_thread_info *info, struct osd_device *osd, + const struct lu_fid *fid, struct thandle *th); +int osd_obj_spec_lookup(struct osd_thread_info *info, struct osd_device *osd, + const struct lu_fid *fid, struct osd_inode_id *id); +int osd_obj_spec_insert(struct osd_thread_info *info, struct osd_device *osd, + const struct lu_fid *fid, const struct osd_inode_id *id, + struct thandle *th); void osd_scrub_file_reset(struct osd_scrub *scrub, __u8 *uuid, __u64 flags); int osd_scrub_file_store(struct osd_scrub *scrub); +char *osd_lf_fid2name(const struct lu_fid *fid); int osd_scrub_start(struct osd_device *dev); int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev); void osd_scrub_cleanup(const struct lu_env *env, struct osd_device *dev); @@ -677,6 +663,16 @@ int osd_oii_lookup(struct osd_device *dev, const struct lu_fid *fid, struct osd_inode_id *id); int osd_scrub_dump(struct osd_device *dev, char *buf, int len); +int osd_fld_lookup(const struct lu_env *env, struct osd_device *osd, + const struct lu_fid *fid, struct lu_seq_range *range); + +int osd_delete_from_remote_parent(const struct lu_env *env, + struct osd_device *osd, + struct osd_object *obj, + struct osd_thandle *oh); +int osd_add_to_remote_parent(const struct lu_env *env, struct osd_device *osd, + struct osd_object *obj, struct osd_thandle *oh); + /* osd_quota_fmt.c */ int walk_tree_dqentry(const struct lu_env *env, struct osd_object *obj, int type, uint blk, int depth, uint index, @@ -701,6 +697,9 @@ void osd_quota_unpack(struct osd_object *obj, const struct dt_rec *rec); int osd_quota_migration(const struct lu_env *env, struct dt_object *dt, const struct dt_index_features *feat); +/* osd_compat.c */ +struct osd_obj_seq *osd_seq_load(struct osd_device *osd, obd_seq seq); + static inline bool is_quota_glb_feat(const struct dt_index_features *feat) { return (feat == &dt_quota_iusr_features || @@ -749,7 +748,7 @@ static inline struct osd_oi *osd_fid2oi(struct osd_device *osd, const struct lu_fid *fid) { LASSERTF(!fid_is_idif(fid), DFID"\n", PFID(fid)); - LASSERTF(!fid_is_igif(fid), DFID"\n", PFID(fid)); + LASSERTF(!fid_is_last_id(fid), DFID"\n", PFID(fid)); LASSERT(osd->od_oi_table != NULL && osd->od_oi_count >= 1); /* It can work even od_oi_count equals to 1 although it's unexpected, * the only reason we set it to 1 is for performance measurement */ @@ -811,6 +810,17 @@ static inline journal_t *osd_journal(const struct osd_device *dev) return LDISKFS_SB(osd_sb(dev))->s_journal; } +static inline struct seq_server_site *osd_seq_site(struct osd_device *osd) +{ + return osd->od_dt_dev.dd_lu_dev.ld_site->ld_seq_site; +} + +static inline char *osd_name(struct osd_device *osd) +{ + return osd->od_dt_dev.dd_lu_dev.ld_obd->obd_name; +} + + extern const struct dt_body_operations osd_body_ops; extern struct lu_context_key osd_key; @@ -871,6 +881,78 @@ struct dentry *osd_child_dentry_by_inode(const struct lu_env *env, return child_dentry; } +#ifdef OSD_TRACK_DECLARES +extern int osd_trans_declare_op2rb[]; + +static inline void osd_trans_declare_op(const struct lu_env *env, + struct osd_thandle *oh, + unsigned int op, int credits) +{ + struct osd_thread_info *oti = osd_oti_get(env); + + LASSERT(oh->ot_handle == NULL); + LASSERT(op < OSD_OT_MAX); + + oti->oti_declare_ops[op]++; + oti->oti_declare_ops_cred[op] += credits; + oh->ot_credits += credits; +} + +static inline void osd_trans_exec_op(const struct lu_env *env, + struct thandle *th, unsigned int op) +{ + struct osd_thread_info *oti = osd_oti_get(env); + struct osd_thandle *oh = container_of(th, struct osd_thandle, + ot_super); + unsigned int rb; + + LASSERT(oh->ot_handle != NULL); + LASSERT(op < OSD_OT_MAX); + + if (likely(!oti->oti_rollback && oti->oti_declare_ops[op] > 0)) { + oti->oti_declare_ops[op]--; + oti->oti_declare_ops_rb[op]++; + } else { + /* all future updates are considered rollback */ + oti->oti_rollback = true; + rb = osd_trans_declare_op2rb[op]; + LASSERTF(rb < OSD_OT_MAX, "op = %u\n", op); + LASSERTF(oti->oti_declare_ops_rb[rb] > 0, "rb = %u\n", rb); + oti->oti_declare_ops_rb[rb]--; + } +} + +static inline void osd_trans_declare_rb(const struct lu_env *env, + struct thandle *th, unsigned int op) +{ + struct osd_thread_info *oti = osd_oti_get(env); + struct osd_thandle *oh = container_of(th, struct osd_thandle, + ot_super); + + LASSERT(oh->ot_handle != NULL); + LASSERT(op < OSD_OT_MAX); + + oti->oti_declare_ops_rb[op]++; +} +#else +static inline void osd_trans_declare_op(const struct lu_env *env, + struct osd_thandle *oh, + unsigned int op, int credits) +{ + oh->ot_credits += credits; +} + +static inline void osd_trans_exec_op(const struct lu_env *env, + struct thandle *th, unsigned int op) +{ +} + +static inline void osd_trans_declare_rb(const struct lu_env *env, + struct thandle *th, unsigned int op) +{ +} +#endif + /** * Helper function to pack the fid, ldiskfs stores fid in packed format. */