* GPL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*/
/*
+ * Copyright (c) 2011 Whamcloud, Inc.
+ */
+/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*/
#define IOC_MDC_TYPE 'i'
#define IOC_MDC_MIN_NR 20
-/* Moved to lustre_user.h
-#define IOC_MDC_LOOKUP _IOWR(IOC_MDC_TYPE, 20, struct obd_ioctl_data *)
-#define IOC_MDC_GETSTRIPE _IOWR(IOC_MDC_TYPE, 21, struct lov_mds_md *) */
#define IOC_MDC_MAX_NR 50
#include <lustre/lustre_idl.h>
};
struct lov_oinfo { /* per-stripe data structure */
- __u64 loi_id; /* object ID on the target OST */
- __u64 loi_gr; /* object group on the target OST */
+ struct ost_id loi_oi; /* object ID/Sequence on the target OST */
int loi_ost_idx; /* OST stripe index in lov_tgt_desc->tgts */
int loi_ost_gen; /* generation of this loi_ost_idx */
struct ost_lvb loi_lvb;
struct osc_async_rc loi_ar;
};
+#define loi_id loi_oi.oi_id
+#define loi_seq loi_oi.oi_seq
static inline void loi_kms_set(struct lov_oinfo *oinfo, __u64 kms)
{
cfs_spinlock_t lsm_lock;
pid_t lsm_lock_owner; /* debugging */
+ /* maximum possible file size, might change as OSTs status changes,
+ * e.g. disconnected, deactivated */
+ __u64 lsm_maxbytes;
struct {
/* Public members. */
__u64 lw_object_id; /* lov object id */
- __u64 lw_object_gr; /* lov object group */
- __u64 lw_maxbytes; /* maximum possible file size */
+ __u64 lw_object_seq; /* lov object seq */
/* LOV-private members start here -- only for use in lov/. */
__u32 lw_magic;
};
#define lsm_object_id lsm_wire.lw_object_id
-#define lsm_object_gr lsm_wire.lw_object_gr
-#define lsm_maxbytes lsm_wire.lw_maxbytes
+#define lsm_object_seq lsm_wire.lw_object_seq
#define lsm_magic lsm_wire.lw_magic
#define lsm_stripe_size lsm_wire.lw_stripe_size
#define lsm_pattern lsm_wire.lw_pattern
- while lock handling, the flags obtained on the enqueue
request are set here.
- while stats, the flags used for control delay/resend.
+ - while setattr, the flags used for distinguish punch operation
*/
int oi_flags;
/* Lock handle specific for every OSC lock. */
struct ost_server_data;
+struct osd_properties {
+ size_t osd_max_ea_size;
+};
+
+#define OBT_MAGIC 0xBDDECEAE
/* hold common fields for "target" device */
struct obd_device_target {
+ __u32 obt_magic;
+ __u32 obt_instance;
struct super_block *obt_sb;
/** last_rcvd file */
struct file *obt_rcvd_filp;
- /** server data in last_rcvd file */
- struct lr_server_data *obt_lsd;
- /** Lock protecting client bitmap */
- cfs_spinlock_t obt_client_bitmap_lock;
- /** Bitmap of known clients */
- unsigned long *obt_client_bitmap;
- /** Server last transaction number */
- __u64 obt_last_transno;
- /** Lock protecting last transaction number */
- cfs_spinlock_t obt_translock;
- /** Number of mounts */
+ struct lu_target *obt_lut;
__u64 obt_mount_count;
cfs_semaphore_t obt_quotachecking;
struct lustre_quota_ctxt obt_qctxt;
cfs_rw_semaphore_t obt_rwsem;
struct vfsmount *obt_vfsmnt;
struct file *obt_health_check_filp;
+ struct osd_properties obt_osd_properties;
};
/* llog contexts */
struct filter_obd {
/* NB this field MUST be first */
struct obd_device_target fo_obt;
- struct lu_target fo_lut;
const char *fo_fstype;
int fo_group_count;
cfs_semaphore_t fo_init_lock; /* group initialization lock */
int fo_committed_group;
-#define CLIENT_QUOTA_DEFAULT_RESENDS 10
-
cfs_spinlock_t fo_objidlock; /* protect fo_lastobjid */
unsigned long fo_destroys_in_progress;
int fo_tot_granted_clients;
obd_size fo_readcache_max_filesize;
- int fo_read_cache:1, /**< enable read-only cache */
+ cfs_spinlock_t fo_flags_lock;
+ unsigned int fo_read_cache:1, /**< enable read-only cache */
fo_writethrough_cache:1,/**< read cache writes */
fo_mds_ost_sync:1, /**< MDS-OST orphan recovery*/
fo_raid_degraded:1;/**< RAID device degraded */
int fo_fmd_max_num; /* per exp filter_mod_data */
int fo_fmd_max_age; /* jiffies to fmd expiry */
+ unsigned long fo_syncjournal:1, /* sync journal on writes */
+ fo_sync_lock_cancel:2;/* sync on lock cancel */
+
/* sptlrpc stuff */
cfs_rwlock_t fo_sptlrpc_lock;
int fo_sec_level;
};
-#define fo_translock fo_obt.obt_translock
-#define fo_rcvd_filp fo_obt.obt_rcvd_filp
-#define fo_fsd fo_obt.obt_lsd
-#define fo_last_rcvd_slots fo_obt.obt_client_bitmap
-#define fo_mount_count fo_obt.obt_mount_count
-#define fo_vfsmnt fo_obt.obt_vfsmnt
-
struct timeout_item {
enum timeout_event ti_event;
cfs_time_t ti_timeout;
#define OSC_MAX_DIRTY_MB_MAX 2048 /* arbitrary, but < MAX_LONG bytes */
#define OSC_DEFAULT_RESENDS 10
+/* possible values for fo_sync_lock_cancel */
+enum {
+ NEVER_SYNC_ON_CANCEL = 0,
+ BLOCKING_SYNC_ON_CANCEL = 1,
+ ALWAYS_SYNC_ON_CANCEL = 2,
+ NUM_SYNC_ON_CANCEL_STATES
+};
+
#define MDC_MAX_RIF_DEFAULT 8
#define MDC_MAX_RIF_MAX 512
cfs_waitq_t cl_destroy_waitq;
struct mdc_rpc_lock *cl_rpc_lock;
- struct mdc_rpc_lock *cl_setattr_lock;
struct mdc_rpc_lock *cl_close_lock;
struct osc_creator cl_oscc;
struct lu_client_seq *cl_seq;
cfs_atomic_t cl_resends; /* resend count */
- cfs_atomic_t cl_quota_resends; /* quota related resend count */
};
#define obd2cli_tgt(obd) ((char *)(obd)->u.cli.cl_target_uuid.uuid)
#define CL_NOT_QUOTACHECKED 1 /* client->cl_qchk_stat init value */
struct mgs_obd {
+ struct obd_device_target mgs_obt;
struct ptlrpc_service *mgs_service;
struct vfsmount *mgs_vfsmnt;
struct super_block *mgs_sb;
struct dentry *mgs_configs_dir;
- struct dentry *mgs_fid_de;
cfs_list_t mgs_fs_db_list;
cfs_semaphore_t mgs_sem;
cfs_proc_dir_entry_t *mgs_proc_live;
+ cfs_time_t mgs_start_time;
};
struct mds_obd {
struct ptlrpc_service *mds_service;
struct ptlrpc_service *mds_setattr_service;
struct ptlrpc_service *mds_readpage_service;
- cfs_dentry_t *mds_fid_de;
int mds_max_mdsize;
int mds_max_cookiesize;
__u64 mds_io_epoch;
cfs_dentry_t *mds_logs_dir;
cfs_dentry_t *mds_objects_dir;
struct llog_handle *mds_cfg_llh;
- struct obd_device *mds_osc_obd; /* XXX lov_obd */
+ struct obd_device *mds_lov_obd;
struct obd_uuid mds_lov_uuid;
char *mds_profile;
- struct obd_export *mds_osc_exp; /* XXX lov_exp */
+ struct obd_export *mds_lov_exp;
struct lov_desc mds_lov_desc;
__u32 mds_id;
struct lustre_quota_info mds_quota_info;
- cfs_semaphore_t mds_qonoff_sem;
+ cfs_rw_semaphore_t mds_qonoff_sem;
cfs_semaphore_t mds_health_sem;
unsigned long mds_fl_user_xattr:1,
mds_fl_acl:1,
cfs_rw_semaphore_t mds_notify_lock;
};
-#define mds_transno_lock mds_obt.obt_translock
-#define mds_rcvd_filp mds_obt.obt_rcvd_filp
-#define mds_server_data mds_obt.obt_lsd
-#define mds_client_bitmap mds_obt.obt_client_bitmap
-#define mds_mount_count mds_obt.obt_mount_count
-#define mds_last_transno mds_obt.obt_last_transno
-#define mds_vfsmnt mds_obt.obt_vfsmnt
-
/* lov objid */
extern __u32 mds_max_ost_index;
/* */
struct echo_obd {
- struct obdo eo_oa;
- cfs_spinlock_t eo_lock;
- __u64 eo_lastino;
- struct lustre_handle eo_nl_lock;
- cfs_atomic_t eo_prep;
+ struct obd_device_target eo_obt;
+ struct obdo eo_oa;
+ cfs_spinlock_t eo_lock;
+ __u64 eo_lastino;
+ struct lustre_handle eo_nl_lock;
+ cfs_atomic_t eo_prep;
};
struct ost_obd {
unsigned long lqr_dirty:1; /* recalc round-robin list */
};
+/* allow statfs data caching for 1 second */
+#define OBD_STATFS_CACHE_SECONDS 1
+
struct lov_statfs_data {
struct obd_info lsd_oi;
struct obd_statfs lsd_statfs;
#define LUSTRE_CMM_NAME "cmm"
#define LUSTRE_MDD_NAME "mdd"
-#define LUSTRE_OSD_NAME "osd"
+#define LUSTRE_OSD_NAME "osd-ldiskfs"
#define LUSTRE_VVP_NAME "vvp"
#define LUSTRE_LMV_NAME "lmv"
#define LUSTRE_CMM_MDC_NAME "cmm-mdc"
if (num_cookies == 1)
oti->oti_logcookies = &oti->oti_onecookie;
else
- OBD_ALLOC(oti->oti_logcookies,
- num_cookies * sizeof(oti->oti_onecookie));
+ OBD_ALLOC_LARGE(oti->oti_logcookies,
+ num_cookies * sizeof(oti->oti_onecookie));
oti->oti_numcookies = num_cookies;
}
if (oti->oti_logcookies == &oti->oti_onecookie)
LASSERT(oti->oti_numcookies == 1);
else
- OBD_FREE(oti->oti_logcookies,
- oti->oti_numcookies * sizeof(oti->oti_onecookie));
+ OBD_FREE_LARGE(oti->oti_logcookies,
+ oti->oti_numcookies*sizeof(oti->oti_onecookie));
oti->oti_logcookies = NULL;
oti->oti_numcookies = 0;
}
* Events signalled through obd_notify() upcall-chain.
*/
enum obd_notify_event {
+ /* target added */
+ OBD_NOTIFY_CREATE,
/* Device connect start */
OBD_NOTIFY_CONNECT,
/* Device activated */
/* Configuration event */
OBD_NOTIFY_CONFIG,
/* Trigger quota recovery */
- OBD_NOTIFY_QUOTA
+ OBD_NOTIFY_QUOTA,
+ /* Administratively deactivate/activate event */
+ OBD_NOTIFY_DEACTIVATE,
+ OBD_NOTIFY_ACTIVATE
};
/* bit-mask flags for config events */
cfs_completion_t trd_finishing;
};
-enum filter_groups {
- FILTER_GROUP_MDS0 = 0,
- FILTER_GROUP_LLOG = 1,
- FILTER_GROUP_ECHO = 2 ,
- FILTER_GROUP_MDS1_N_BASE = 3
-};
-
/**
* In HEAD for CMD, the object is created in group number which is 3>=
* or indexing starts from 3. To test this assertions are added to disallow
* 2. The group number indexing starts from 0 instead of 3
*/
-static inline int filter_group_is_mds(obd_gr group)
-{
- return (group == FILTER_GROUP_MDS0 ||
- group >= FILTER_GROUP_MDS1_N_BASE);
-}
+#define LASSERT_SEQ_IS_MDT(seq) LASSERT(fid_seq_is_mdt(seq))
-#define LASSERT_MDS_GROUP(group) LASSERT(filter_group_is_mds(group))
-
-static inline __u64 objgrp_to_mdsno(obd_gr group)
+static inline __u64 objseq_to_mdsno(obd_seq seq)
{
- LASSERT(filter_group_is_mds(group));
- if (group == FILTER_GROUP_MDS0)
+ LASSERT_SEQ_IS_MDT(seq);
+ if (seq == FID_SEQ_OST_MDT0)
return 0;
- return group - FILTER_GROUP_MDS1_N_BASE + 1;
+ return seq - FID_SEQ_OST_MDT1 + 1;
}
-static inline int mdt_to_obd_objgrp(int mdtid)
+static inline int mdt_to_obd_objseq(int mdtid)
{
/**
- * MDS0 uses group 0 always, other MDSes will use groups from
- * FILTER_GROUP_MDS1_N_BASE
+ * MDS0 uses seq 0 pre FID-on-OST, other MDSes will use seq from
+ * FID_SEQ_OST_MDT1
*/
if (mdtid)
- return FILTER_GROUP_MDS1_N_BASE + mdtid - 1;
+ return FID_SEQ_OST_MDT1 + mdtid - 1;
return 0;
}
-static inline __u64 obdo_mdsno(struct obdo *oa)
-{
- LASSERT((oa->o_valid & OBD_MD_FLGROUP));
- return objgrp_to_mdsno(oa->o_gr);
-}
-
-static inline int obdo_is_mds(struct obdo *oa)
-{
- LASSERT(oa->o_valid & OBD_MD_FLGROUP);
- return filter_group_is_mds(oa->o_gr);
-}
-
struct obd_llog_group {
cfs_list_t olg_list;
- int olg_group;
+ int olg_seq;
struct llog_ctxt *olg_ctxts[LLOG_MAX_CTXTS];
cfs_waitq_t olg_waitq;
cfs_spinlock_t olg_lock;
#define MAX_OBD_NAME 128
#define OBD_DEVICE_MAGIC 0XAB5CD6EF
#define OBD_DEV_BY_DEVNAME 0xffffd0de
+
struct obd_device {
struct obd_type *obd_type;
__u32 obd_magic;
struct lu_device *obd_lu_dev;
int obd_minor;
+ /* bitfield modification is protected by obd_dev_lock */
unsigned long obd_attached:1, /* finished attach */
obd_set_up:1, /* finished setup */
obd_recovering:1, /* there are recoverable clients */
obd_abort_recovery:1,/* recovery expired */
obd_version_recov:1, /* obd uses version checking */
- obd_recovery_expired:1,
obd_replayable:1, /* recovery is enabled; inform clients */
obd_no_transno:1, /* no committed-transno notification */
obd_no_recov:1, /* fail instead of retry messages */
- obd_req_replaying:1, /* replaying requests */
obd_stopping:1, /* started cleanup */
obd_starting:1, /* started setup */
obd_force:1, /* cleanup with > 0 obd refcount */
obd_no_conn:1, /* deny new connections */
obd_inactive:1, /* device active/inactive
* (for /proc/status only!!) */
+ obd_no_ir:1, /* no imperative recovery. */
obd_process_conf:1; /* device is processing mgs config */
+ /* use separate field as it is set in interrupt to don't mess with
+ * protection of other bits using _bh lock */
+ unsigned long obd_recovery_expired:1;
/* uuid-export hash body */
cfs_hash_t *obd_uuid_hash;
/* nid-export hash body */
struct ldlm_namespace *obd_namespace;
struct ptlrpc_client obd_ldlm_client; /* XXX OST/MDS only */
/* a spinlock is OK for what we do now, may need a semaphore later */
- cfs_spinlock_t obd_dev_lock;
+ cfs_spinlock_t obd_dev_lock; /* protects obd bitfield above */
cfs_semaphore_t obd_dev_sem;
__u64 obd_last_committed;
struct fsfilt_operations *obd_fsops;
time_t obd_eviction_timer; /* for ping evictor */
int obd_max_recoverable_clients;
- int obd_connected_clients;
+ cfs_atomic_t obd_connected_clients;
int obd_stale_clients;
int obd_delayed_clients;
- cfs_spinlock_t obd_processing_task_lock; /* BH lock (timer) */
+ /* this lock protects all recovery list_heads, timer and
+ * obd_next_recovery_transno value */
+ cfs_spinlock_t obd_recovery_task_lock;
__u64 obd_next_recovery_transno;
int obd_replayed_requests;
int obd_requests_queued_for_recovery;
cfs_waitq_t obd_next_transno_waitq;
+ /* protected by obd_recovery_task_lock */
cfs_timer_t obd_recovery_timer;
time_t obd_recovery_start; /* seconds */
time_t obd_recovery_end; /* seconds, for lprocfs_status */
- time_t obd_recovery_max_time; /* seconds, bz13079 */
+ int obd_recovery_time_hard;
int obd_recovery_timeout;
+ int obd_recovery_ir_factor;
/* new recovery stuff from CMD2 */
struct target_recovery_data obd_recovery_data;
int obd_replayed_locks;
cfs_atomic_t obd_req_replay_clients;
cfs_atomic_t obd_lock_replay_clients;
+ /* all lists are protected by obd_recovery_task_lock */
cfs_list_t obd_req_replay_queue;
cfs_list_t obd_lock_replay_queue;
cfs_list_t obd_final_req_queue;
#define KEY_REGISTER_TARGET "register_target"
#define KEY_REVIMP_UPD "revimp_update"
#define KEY_SET_FS "set_fs"
+#define KEY_TGT_COUNT "tgt_count"
/* KEY_SET_INFO in lustre_idl.h */
#define KEY_SPTLRPC_CONF "sptlrpc_conf"
-
+#define KEY_CONNECT_FLAG "connect_flags"
+#define KEY_SYNC_LOCK_CANCEL "sync_lock_cancel"
struct lu_context;
+/* /!\ must be coherent with include/linux/namei.h on patched kernel */
+#define IT_OPEN (1 << 0)
+#define IT_CREAT (1 << 1)
+#define IT_READDIR (1 << 2)
+#define IT_GETATTR (1 << 3)
+#define IT_LOOKUP (1 << 4)
+#define IT_UNLINK (1 << 5)
+#define IT_TRUNC (1 << 6)
+#define IT_GETXATTR (1 << 7)
+#define IT_EXEC (1 << 8)
+#define IT_PIN (1 << 9)
+
static inline int it_to_lock_mode(struct lookup_intent *it)
{
/* CREAT needs to be tested before open (both could be set) */
struct lu_fid op_fid4; /* to the operation locks. */
mdsno_t op_mds; /* what mds server open will go to */
struct lustre_handle op_handle;
- __u64 op_mod_time;
+ obd_time op_mod_time;
const char *op_name;
int op_namelen;
__u32 op_mode;
struct md_op_data mi_data;
struct lookup_intent mi_it;
struct lustre_handle mi_lockh;
- struct dentry *mi_dentry;
struct inode *mi_dir;
md_enqueue_cb_t mi_cb;
+ __u64 mi_cbdata;
unsigned int mi_generation;
- void *mi_cbdata;
};
struct obd_ops {
int (*o_punch)(struct obd_export *exp, struct obd_info *oinfo,
struct obd_trans_info *oti,
struct ptlrpc_request_set *rqset);
- int (*o_sync)(struct obd_export *exp, struct obdo *oa,
- struct lov_stripe_md *ea, obd_size start, obd_size end,
- void *capa);
+ int (*o_sync)(struct obd_export *exp, struct obd_info *oinfo,
+ obd_size start, obd_size end,
+ struct ptlrpc_request_set *set);
int (*o_migrate)(struct lustre_handle *conn, struct lov_stripe_md *dst,
struct lov_stripe_md *src, obd_size start,
obd_size end, struct obd_trans_info *oti);
struct lustre_handle *srconn, struct lov_stripe_md *src,
obd_size start, obd_size end, struct obd_trans_info *);
int (*o_iterate)(struct lustre_handle *conn,
- int (*)(obd_id, obd_gr, void *),
- obd_id *startid, obd_gr group, void *data);
+ int (*)(obd_id, obd_seq, void *),
+ obd_id *startid, obd_seq seq, void *data);
int (*o_preprw)(int cmd, struct obd_export *exp, struct obdo *oa,
int objcount, struct obd_ioobj *obj,
struct niobuf_remote *remote, int *nr_pages,
struct ptlrpc_request_set *rqset);
int (*o_change_cbdata)(struct obd_export *, struct lov_stripe_md *,
ldlm_iterator_t it, void *data);
+ int (*o_find_cbdata)(struct obd_export *, struct lov_stripe_md *,
+ ldlm_iterator_t it, void *data);
int (*o_cancel)(struct obd_export *, struct lov_stripe_md *md,
__u32 mode, struct lustre_handle *);
int (*o_cancel_unused)(struct obd_export *, struct lov_stripe_md *,
- int flags, void *opaque);
+ ldlm_cancel_flags_t flags, void *opaque);
int (*o_init_export)(struct obd_export *exp);
int (*o_destroy_export)(struct obd_export *exp);
int (*o_extent_calc)(struct obd_export *, struct lov_stripe_md *,
struct obd_quotactl *);
int (*o_quota_adjust_qunit)(struct obd_export *exp,
struct quota_adjust_qunit *oqaq,
- struct lustre_quota_ctxt *qctxt);
+ struct lustre_quota_ctxt *qctxt,
+ struct ptlrpc_request_set *rqset);
int (*o_ping)(struct obd_export *exp);
struct obd_capa **);
int (*m_change_cbdata)(struct obd_export *, const struct lu_fid *,
ldlm_iterator_t, void *);
+ int (*m_find_cbdata)(struct obd_export *, const struct lu_fid *,
+ ldlm_iterator_t, void *);
int (*m_close)(struct obd_export *, struct md_op_data *,
struct md_open_data *, struct ptlrpc_request **);
int (*m_create)(struct obd_export *, struct md_op_data *,
int (*m_sync)(struct obd_export *, const struct lu_fid *,
struct obd_capa *, struct ptlrpc_request **);
int (*m_readpage)(struct obd_export *, const struct lu_fid *,
- struct obd_capa *, __u64, struct page *,
- struct ptlrpc_request **);
+ struct obd_capa *, __u64, struct page **,
+ unsigned, struct ptlrpc_request **);
int (*m_unlink)(struct obd_export *, struct md_op_data *,
struct ptlrpc_request **);
struct ptlrpc_request *);
int (*m_clear_open_replay_data)(struct obd_export *,
struct obd_client_handle *);
- int (*m_set_lock_data)(struct obd_export *, __u64 *, void *, __u32 *);
+ int (*m_set_lock_data)(struct obd_export *, __u64 *, void *, __u64 *);
ldlm_mode_t (*m_lock_match)(struct obd_export *, int,
const struct lu_fid *, ldlm_type_t,
struct lustre_handle *);
int (*m_cancel_unused)(struct obd_export *, const struct lu_fid *,
- ldlm_policy_data_t *, ldlm_mode_t, int flags,
- void *opaque);
+ ldlm_policy_data_t *, ldlm_mode_t,
+ ldlm_cancel_flags_t flags, void *opaque);
int (*m_renew_capa)(struct obd_export *, struct obd_capa *oc,
renew_capa_cb_t cb);
int (*m_unpack_capa)(struct obd_export *, struct ptlrpc_request *,
struct md_enqueue_info *,
struct ldlm_enqueue_info *);
- int (*m_revalidate_lock)(struct obd_export *,
- struct lookup_intent *,
- struct lu_fid *);
+ int (*m_revalidate_lock)(struct obd_export *, struct lookup_intent *,
+ struct lu_fid *, __u64 *bits);
/*
* NOTE: If adding ops, add another LPROCFS_MD_OP_INIT() line to
} \
})
+extern void obdo_from_inode(struct obdo *dst, struct inode *src,
+ struct lu_fid *parent, obd_flag valid);
+
+/* return 1 if client should be resend request */
+static inline int client_should_resend(int resend, struct client_obd *cli)
+{
+ return cfs_atomic_read(&cli->cl_resends) ?
+ cfs_atomic_read(&cli->cl_resends) > resend : 1;
+}
+
#endif /* __OBD_H */