X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Finclude%2Fobd.h;h=870c8447a3a91d176a89c1ee2332eae817aa0953;hp=bb561509d05a039fe9d1dc35c4181944b2efd4d5;hb=322968acf183ab16d952cd3026f6580957b31259;hpb=dfbc98ead8c357255909aafa7f81b3448337dde2 diff --git a/lustre/include/obd.h b/lustre/include/obd.h index bb56150..870c844 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -30,6 +30,9 @@ * Use is subject to license terms. */ /* + * Copyright (c) 2011 Whamcloud, Inc. + */ +/* * This file is part of Lustre, http://www.lustre.org/ * Lustre is a trademark of Sun Microsystems, Inc. */ @@ -129,17 +132,20 @@ struct lov_stripe_md { cfs_spinlock_t lsm_lock; pid_t lsm_lock_owner; /* debugging */ + /* maximum possible file size, might change as OSTs status changes, + * e.g. disconnected, deactivated */ + __u64 lsm_maxbytes; struct { /* Public members. */ __u64 lw_object_id; /* lov object id */ __u64 lw_object_seq; /* lov object seq */ - __u64 lw_maxbytes; /* maximum possible file size */ /* LOV-private members start here -- only for use in lov/. */ __u32 lw_magic; __u32 lw_stripe_size; /* size of the stripe */ __u32 lw_pattern; /* striping pattern (RAID0, RAID1) */ - unsigned lw_stripe_count; /* number of objects being striped over */ + __u16 lw_stripe_count; /* number of objects being striped over */ + __u16 lw_layout_gen; /* generation of the layout */ char lw_pool_name[LOV_MAXPOOLNAME]; /* pool name */ } lsm_wire; @@ -148,8 +154,8 @@ struct lov_stripe_md { #define lsm_object_id lsm_wire.lw_object_id #define lsm_object_seq lsm_wire.lw_object_seq -#define lsm_maxbytes lsm_wire.lw_maxbytes #define lsm_magic lsm_wire.lw_magic +#define lsm_layout_gen lsm_wire.lw_layout_gen #define lsm_stripe_size lsm_wire.lw_stripe_size #define lsm_pattern lsm_wire.lw_pattern #define lsm_stripe_count lsm_wire.lw_stripe_count @@ -169,6 +175,7 @@ struct obd_info { - while lock handling, the flags obtained on the enqueue request are set here. - while stats, the flags used for control delay/resend. + - while setattr, the flags used for distinguish punch operation */ int oi_flags; /* Lock handle specific for every OSC lock. */ @@ -200,6 +207,57 @@ static inline int lov_stripe_md_cmp(struct lov_stripe_md *m1, return memcmp(&m1->lsm_wire, &m2->lsm_wire, sizeof m1->lsm_wire); } +static inline int lov_lum_lsm_cmp(struct lov_user_md *lum, + struct lov_stripe_md *lsm) +{ + if (lsm->lsm_magic != lum->lmm_magic) + return 1; + if ((lsm->lsm_stripe_count != 0) && (lum->lmm_stripe_count != 0) && + (lsm->lsm_stripe_count != lum->lmm_stripe_count)) + return 2; + if ((lsm->lsm_stripe_size != 0) && (lum->lmm_stripe_size != 0) && + (lsm->lsm_stripe_size != lum->lmm_stripe_size)) + return 3; + if ((lsm->lsm_pattern != 0) && (lum->lmm_pattern != 0) && + (lsm->lsm_pattern != lum->lmm_pattern)) + return 4; + if ((lsm->lsm_magic == LOV_MAGIC_V3) && + (strncmp(lsm->lsm_pool_name, + ((struct lov_user_md_v3 *)lum)->lmm_pool_name, + LOV_MAXPOOLNAME) != 0)) + return 5; + return 0; +} + +static inline int lov_lum_swab_if_needed(struct lov_user_md_v3 *lumv3, + int *lmm_magic, + struct lov_user_md *lum) +{ + if (lum && cfs_copy_from_user(lumv3, lum,sizeof(struct lov_user_md_v1))) + return -EFAULT; + + *lmm_magic = lumv3->lmm_magic; + + if (*lmm_magic == __swab32(LOV_USER_MAGIC_V1)) { + lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lumv3); + *lmm_magic = LOV_USER_MAGIC_V1; + } else if (*lmm_magic == LOV_USER_MAGIC_V3) { + if (lum && cfs_copy_from_user(lumv3, lum, sizeof(*lumv3))) + return -EFAULT; + } else if (*lmm_magic == __swab32(LOV_USER_MAGIC_V3)) { + if (lum && cfs_copy_from_user(lumv3, lum, sizeof(*lumv3))) + return -EFAULT; + lustre_swab_lov_user_md_v3(lumv3); + *lmm_magic = LOV_USER_MAGIC_V3; + } else if (*lmm_magic != LOV_USER_MAGIC_V1) { + CDEBUG(D_IOCTL, + "bad userland LOV MAGIC: %#08x != %#08x nor %#08x\n", + *lmm_magic, LOV_USER_MAGIC_V1, LOV_USER_MAGIC_V3); + return -EINVAL; + } + return 0; +} + void lov_stripe_lock(struct lov_stripe_md *md); void lov_stripe_unlock(struct lov_stripe_md *md); @@ -225,10 +283,15 @@ struct brw_page { struct ost_server_data; +struct osd_properties { + size_t osd_max_ea_size; +}; + #define OBT_MAGIC 0xBDDECEAE /* hold common fields for "target" device */ struct obd_device_target { __u32 obt_magic; + __u32 obt_instance; struct super_block *obt_sb; /** last_rcvd file */ struct file *obt_rcvd_filp; @@ -240,6 +303,7 @@ struct obd_device_target { cfs_rw_semaphore_t obt_rwsem; struct vfsmount *obt_vfsmnt; struct file *obt_health_check_filp; + struct osd_properties obt_osd_properties; }; /* llog contexts */ @@ -300,7 +364,8 @@ struct filter_obd { int fo_tot_granted_clients; obd_size fo_readcache_max_filesize; - int fo_read_cache:1, /**< enable read-only cache */ + cfs_spinlock_t fo_flags_lock; + unsigned int fo_read_cache:1, /**< enable read-only cache */ fo_writethrough_cache:1,/**< read cache writes */ fo_mds_ost_sync:1, /**< MDS-OST orphan recovery*/ fo_raid_degraded:1;/**< RAID device degraded */ @@ -458,7 +523,6 @@ struct client_obd { cfs_waitq_t cl_destroy_waitq; struct mdc_rpc_lock *cl_rpc_lock; - struct mdc_rpc_lock *cl_setattr_lock; struct mdc_rpc_lock *cl_close_lock; struct osc_creator cl_oscc; @@ -486,6 +550,9 @@ struct client_obd { struct lu_client_seq *cl_seq; cfs_atomic_t cl_resends; /* resend count */ + + /* ptlrpc work for writeback in ptlrpcd context */ + void *cl_writeback_work; }; #define obd2cli_tgt(obd) ((char *)(obd)->u.cli.cl_target_uuid.uuid) @@ -497,10 +564,10 @@ struct mgs_obd { struct vfsmount *mgs_vfsmnt; struct super_block *mgs_sb; struct dentry *mgs_configs_dir; - struct dentry *mgs_fid_de; cfs_list_t mgs_fs_db_list; cfs_semaphore_t mgs_sem; cfs_proc_dir_entry_t *mgs_proc_live; + cfs_time_t mgs_start_time; }; struct mds_obd { @@ -509,7 +576,6 @@ struct mds_obd { struct ptlrpc_service *mds_service; struct ptlrpc_service *mds_setattr_service; struct ptlrpc_service *mds_readpage_service; - cfs_dentry_t *mds_fid_de; int mds_max_mdsize; int mds_max_cookiesize; __u64 mds_io_epoch; @@ -578,11 +644,12 @@ struct obd_id_info { /* */ struct echo_obd { - struct obdo eo_oa; - cfs_spinlock_t eo_lock; - __u64 eo_lastino; - struct lustre_handle eo_nl_lock; - cfs_atomic_t eo_prep; + struct obd_device_target eo_obt; + struct obdo eo_oa; + cfs_spinlock_t eo_lock; + __u64 eo_lastino; + struct lustre_handle eo_nl_lock; + cfs_atomic_t eo_prep; }; struct ost_obd { @@ -863,8 +930,8 @@ static inline void oti_alloc_cookies(struct obd_trans_info *oti,int num_cookies) if (num_cookies == 1) oti->oti_logcookies = &oti->oti_onecookie; else - OBD_ALLOC(oti->oti_logcookies, - num_cookies * sizeof(oti->oti_onecookie)); + OBD_ALLOC_LARGE(oti->oti_logcookies, + num_cookies * sizeof(oti->oti_onecookie)); oti->oti_numcookies = num_cookies; } @@ -877,8 +944,8 @@ static inline void oti_free_cookies(struct obd_trans_info *oti) if (oti->oti_logcookies == &oti->oti_onecookie) LASSERT(oti->oti_numcookies == 1); else - OBD_FREE(oti->oti_logcookies, - oti->oti_numcookies * sizeof(oti->oti_onecookie)); + OBD_FREE_LARGE(oti->oti_logcookies, + oti->oti_numcookies*sizeof(oti->oti_onecookie)); oti->oti_logcookies = NULL; oti->oti_numcookies = 0; } @@ -887,6 +954,8 @@ static inline void oti_free_cookies(struct obd_trans_info *oti) * Events signalled through obd_notify() upcall-chain. */ enum obd_notify_event { + /* target added */ + OBD_NOTIFY_CREATE, /* Device connect start */ OBD_NOTIFY_CONNECT, /* Device activated */ @@ -903,7 +972,10 @@ enum obd_notify_event { /* Configuration event */ OBD_NOTIFY_CONFIG, /* Trigger quota recovery */ - OBD_NOTIFY_QUOTA + OBD_NOTIFY_QUOTA, + /* Administratively deactivate/activate event */ + OBD_NOTIFY_DEACTIVATE, + OBD_NOTIFY_ACTIVATE }; /* bit-mask flags for config events */ @@ -974,9 +1046,9 @@ struct obd_llog_group { }; /* corresponds to one of the obd's */ -#define MAX_OBD_NAME 128 #define OBD_DEVICE_MAGIC 0XAB5CD6EF #define OBD_DEV_BY_DEVNAME 0xffffd0de + struct obd_device { struct obd_type *obd_type; __u32 obd_magic; @@ -988,16 +1060,15 @@ struct obd_device { struct lu_device *obd_lu_dev; int obd_minor; + /* bitfield modification is protected by obd_dev_lock */ unsigned long obd_attached:1, /* finished attach */ obd_set_up:1, /* finished setup */ obd_recovering:1, /* there are recoverable clients */ obd_abort_recovery:1,/* recovery expired */ obd_version_recov:1, /* obd uses version checking */ - obd_recovery_expired:1, obd_replayable:1, /* recovery is enabled; inform clients */ obd_no_transno:1, /* no committed-transno notification */ obd_no_recov:1, /* fail instead of retry messages */ - obd_req_replaying:1, /* replaying requests */ obd_stopping:1, /* started cleanup */ obd_starting:1, /* started setup */ obd_force:1, /* cleanup with > 0 obd refcount */ @@ -1006,7 +1077,11 @@ struct obd_device { obd_no_conn:1, /* deny new connections */ obd_inactive:1, /* device active/inactive * (for /proc/status only!!) */ + obd_no_ir:1, /* no imperative recovery. */ obd_process_conf:1; /* device is processing mgs config */ + /* use separate field as it is set in interrupt to don't mess with + * protection of other bits using _bh lock */ + unsigned long obd_recovery_expired:1; /* uuid-export hash body */ cfs_hash_t *obd_uuid_hash; /* nid-export hash body */ @@ -1024,7 +1099,7 @@ struct obd_device { struct ldlm_namespace *obd_namespace; struct ptlrpc_client obd_ldlm_client; /* XXX OST/MDS only */ /* a spinlock is OK for what we do now, may need a semaphore later */ - cfs_spinlock_t obd_dev_lock; + cfs_spinlock_t obd_dev_lock; /* protects obd bitfield above */ cfs_semaphore_t obd_dev_sem; __u64 obd_last_committed; struct fsfilt_operations *obd_fsops; @@ -1042,25 +1117,30 @@ struct obd_device { time_t obd_eviction_timer; /* for ping evictor */ int obd_max_recoverable_clients; - int obd_connected_clients; + cfs_atomic_t obd_connected_clients; int obd_stale_clients; int obd_delayed_clients; - cfs_spinlock_t obd_processing_task_lock; /* BH lock (timer) */ + /* this lock protects all recovery list_heads, timer and + * obd_next_recovery_transno value */ + cfs_spinlock_t obd_recovery_task_lock; __u64 obd_next_recovery_transno; int obd_replayed_requests; int obd_requests_queued_for_recovery; cfs_waitq_t obd_next_transno_waitq; + /* protected by obd_recovery_task_lock */ cfs_timer_t obd_recovery_timer; time_t obd_recovery_start; /* seconds */ time_t obd_recovery_end; /* seconds, for lprocfs_status */ - time_t obd_recovery_time_hard; + int obd_recovery_time_hard; int obd_recovery_timeout; + int obd_recovery_ir_factor; /* new recovery stuff from CMD2 */ struct target_recovery_data obd_recovery_data; int obd_replayed_locks; cfs_atomic_t obd_req_replay_clients; cfs_atomic_t obd_lock_replay_clients; + /* all lists are protected by obd_recovery_task_lock */ cfs_list_t obd_req_replay_queue; cfs_list_t obd_lock_replay_queue; cfs_list_t obd_final_req_queue; @@ -1146,20 +1226,34 @@ enum obd_cleanup_stage { #define KEY_REGISTER_TARGET "register_target" #define KEY_REVIMP_UPD "revimp_update" #define KEY_SET_FS "set_fs" +#define KEY_TGT_COUNT "tgt_count" /* KEY_SET_INFO in lustre_idl.h */ #define KEY_SPTLRPC_CONF "sptlrpc_conf" #define KEY_CONNECT_FLAG "connect_flags" #define KEY_SYNC_LOCK_CANCEL "sync_lock_cancel" - struct lu_context; +/* /!\ must be coherent with include/linux/namei.h on patched kernel */ +#define IT_OPEN (1 << 0) +#define IT_CREAT (1 << 1) +#define IT_READDIR (1 << 2) +#define IT_GETATTR (1 << 3) +#define IT_LOOKUP (1 << 4) +#define IT_UNLINK (1 << 5) +#define IT_TRUNC (1 << 6) +#define IT_GETXATTR (1 << 7) +#define IT_EXEC (1 << 8) +#define IT_PIN (1 << 9) +#define IT_LAYOUT (1 << 10) + static inline int it_to_lock_mode(struct lookup_intent *it) { /* CREAT needs to be tested before open (both could be set) */ if (it->it_op & IT_CREAT) return LCK_CW; - else if (it->it_op & (IT_READDIR | IT_GETATTR | IT_OPEN | IT_LOOKUP)) + else if (it->it_op & (IT_READDIR | IT_GETATTR | IT_OPEN | IT_LOOKUP | + IT_LAYOUT)) return LCK_CR; LASSERTF(0, "Invalid it_op: %d\n", it->it_op); @@ -1173,7 +1267,7 @@ struct md_op_data { struct lu_fid op_fid4; /* to the operation locks. */ mdsno_t op_mds; /* what mds server open will go to */ struct lustre_handle op_handle; - __u64 op_mod_time; + obd_time op_mod_time; const char *op_name; int op_namelen; __u32 op_mode; @@ -1220,11 +1314,10 @@ struct md_enqueue_info { struct md_op_data mi_data; struct lookup_intent mi_it; struct lustre_handle mi_lockh; - struct dentry *mi_dentry; struct inode *mi_dir; md_enqueue_cb_t mi_cb; + __u64 mi_cbdata; unsigned int mi_generation; - void *mi_cbdata; }; struct obd_ops { @@ -1315,9 +1408,9 @@ struct obd_ops { int (*o_punch)(struct obd_export *exp, struct obd_info *oinfo, struct obd_trans_info *oti, struct ptlrpc_request_set *rqset); - int (*o_sync)(struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md *ea, obd_size start, obd_size end, - void *capa); + int (*o_sync)(struct obd_export *exp, struct obd_info *oinfo, + obd_size start, obd_size end, + struct ptlrpc_request_set *set); int (*o_migrate)(struct lustre_handle *conn, struct lov_stripe_md *dst, struct lov_stripe_md *src, obd_size start, obd_size end, struct obd_trans_info *oti); @@ -1381,7 +1474,8 @@ struct obd_ops { struct obd_quotactl *); int (*o_quota_adjust_qunit)(struct obd_export *exp, struct quota_adjust_qunit *oqaq, - struct lustre_quota_ctxt *qctxt); + struct lustre_quota_ctxt *qctxt, + struct ptlrpc_request_set *rqset); int (*o_ping)(struct obd_export *exp); @@ -1479,8 +1573,8 @@ struct md_ops { int (*m_sync)(struct obd_export *, const struct lu_fid *, struct obd_capa *, struct ptlrpc_request **); int (*m_readpage)(struct obd_export *, const struct lu_fid *, - struct obd_capa *, __u64, struct page *, - struct ptlrpc_request **); + struct obd_capa *, __u64, struct page **, + unsigned, struct ptlrpc_request **); int (*m_unlink)(struct obd_export *, struct md_op_data *, struct ptlrpc_request **); @@ -1508,7 +1602,7 @@ struct md_ops { struct ptlrpc_request *); int (*m_clear_open_replay_data)(struct obd_export *, struct obd_client_handle *); - int (*m_set_lock_data)(struct obd_export *, __u64 *, void *, __u32 *); + int (*m_set_lock_data)(struct obd_export *, __u64 *, void *, __u64 *); ldlm_mode_t (*m_lock_match)(struct obd_export *, int, const struct lu_fid *, ldlm_type_t, @@ -1532,7 +1626,7 @@ struct md_ops { struct ldlm_enqueue_info *); int (*m_revalidate_lock)(struct obd_export *, struct lookup_intent *, - struct lu_fid *, __u32 *); + struct lu_fid *, __u64 *bits); /* * NOTE: If adding ops, add another LPROCFS_MD_OP_INIT() line to @@ -1550,7 +1644,7 @@ struct lsm_operations { void (*lsm_stripe_by_offset)(struct lov_stripe_md *, int *, obd_off *, obd_off *); int (*lsm_lmm_verify) (struct lov_mds_md *lmm, int lmm_bytes, - int *stripe_count); + __u16 *stripe_count); int (*lsm_unpackmd) (struct lov_obd *lov, struct lov_stripe_md *lsm, struct lov_mds_md *lmm); }; @@ -1637,4 +1731,23 @@ static inline struct md_open_data *obd_mod_alloc(void) extern void obdo_from_inode(struct obdo *dst, struct inode *src, struct lu_fid *parent, obd_flag valid); +/* return 1 if client should be resend request */ +static inline int client_should_resend(int resend, struct client_obd *cli) +{ + return cfs_atomic_read(&cli->cl_resends) ? + cfs_atomic_read(&cli->cl_resends) > resend : 1; +} + +/** + * Return device name for this device + * + * XXX: lu_device is declared before obd_device, while a pointer pointing + * back to obd_device in lu_device, so this helper function defines here + * instead of in lu_object.h + */ +static inline const char *lu_dev_name(const struct lu_device *lu_dev) +{ + return lu_dev->ld_obd->obd_name; +} + #endif /* __OBD_H */