*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2011, 2014, Intel Corporation.
+ * Copyright (c) 2011, 2017, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
#ifndef __OBD_H
#define __OBD_H
+#include <linux/kobject.h>
#include <linux/spinlock.h>
+#include <linux/sysfs.h>
-#include <lustre/lustre_idl.h>
+#include <uapi/linux/lustre/lustre_idl.h>
#include <lustre_lib.h>
#include <libcfs/bitmap.h>
#ifdef HAVE_SERVER_SUPPORT
#include <lustre_fld.h>
#include <lustre_handles.h>
#include <lustre_intent.h>
-#include <lustre_capa.h>
#include <lvfs.h>
+#include <lustre_quota.h>
#define MAX_OBD_DEVICES 8192
oinfo->loi_kms_valid = 1;
}
-static inline void loi_init(struct lov_oinfo *loi)
-{
-}
-
struct lov_stripe_md;
struct obd_info;
struct md_ops *typ_md_ops;
struct proc_dir_entry *typ_procroot;
struct proc_dir_entry *typ_procsym;
- __u32 typ_sym_filter;
+ struct dentry *typ_debugfs_entry;
+#ifdef HAVE_SERVER_SUPPORT
+ bool typ_sym_filter;
+#endif
char *typ_name;
int typ_refcnt;
struct lu_device_type *typ_lu;
spinlock_t obd_type_lock;
+ struct kobject *typ_kobj;
};
struct brw_page {
struct timeout_item {
enum timeout_event ti_event;
- cfs_time_t ti_timeout;
+ time64_t ti_timeout;
timeout_cb_t ti_cb;
void *ti_cb_data;
struct list_head ti_obd_list;
#define OBD_MAX_RIF_DEFAULT 8
#define OBD_MAX_RIF_MAX 512
#define OSC_MAX_RIF_MAX 256
-#define OSC_MAX_DIRTY_DEFAULT (OBD_MAX_RIF_DEFAULT * 4)
+#define OSC_MAX_DIRTY_DEFAULT 2000 /* Arbitrary large value */
#define OSC_MAX_DIRTY_MB_MAX 2048 /* arbitrary, but < MAX_LONG bytes */
#define OSC_DEFAULT_RESENDS 10
*/
#define OBD_MAX_DEFAULT_EA_SIZE 4096
+enum obd_cl_sem_lock_class {
+ OBD_CLI_SEM_NORMAL,
+ OBD_CLI_SEM_MGC,
+ OBD_CLI_SEM_MDCOSC,
+};
+
struct mdc_rpc_lock;
struct obd_import;
struct client_obd {
* run-time if a larger observed size is advertised by the MDT. */
__u32 cl_max_mds_easize;
+ /* Data-on-MDT specific value to set larger reply buffer for possible
+ * data read along with open/stat requests. By default it tries to use
+ * unused space in reply buffer.
+ * This value is used to ensure that reply buffer has at least as
+ * much free space as value indicates. That free space is gained from
+ * LOV EA buffer which is small for DoM files and on big systems can
+ * provide up to 32KB of extra space in reply buffer.
+ * Default value is 8K now.
+ */
+ __u32 cl_dom_min_inline_repsize;
+
enum lustre_sec_part cl_sp_me;
enum lustre_sec_part cl_sp_to;
struct sptlrpc_flavor cl_flvr_mgc; /* fixed flavor of mgc->mgs */
unsigned long cl_dirty_transit; /* dirty synchronous */
unsigned long cl_avail_grant; /* bytes of credit for ost */
unsigned long cl_lost_grant; /* lost credits (trunc) */
+ /* grant consumed for dirty pages */
+ unsigned long cl_dirty_grant;
/* since we allocate grant by blocks, we don't know how many grant will
* be used to add a page into cache. As a solution, we reserve maximum
* See osc_{reserve|unreserve}_grant for details. */
long cl_reserved_grant;
struct list_head cl_cache_waiters; /* waiting for cache/grant */
- cfs_time_t cl_next_shrink_grant; /* jiffies */
- struct list_head cl_grant_shrink_list; /* Timeout event list */
- int cl_grant_shrink_interval; /* seconds */
+ time64_t cl_next_shrink_grant; /* seconds */
+ struct list_head cl_grant_chain;
+ time64_t cl_grant_shrink_interval; /* seconds */
/* A chunk is an optimal size used by osc_extent to determine
- * the extent size. A chunk is max(PAGE_CACHE_SIZE, OST block size) */
+ * the extent size. A chunk is max(PAGE_SIZE, OST block size) */
int cl_chunkbits;
- unsigned int cl_extent_tax; /* extent overhead, by bytes */
+ /* extent insertion metadata overhead to be accounted in grant,
+ * in bytes */
+ unsigned int cl_grant_extent_tax;
+ /* maximum extent size, in number of pages */
+ unsigned int cl_max_extent_pages;
/* keep track of objects that have lois that contain pages which
* have been queued for async brw. this lock also protects the
/* just a sum of the loi/lop pending numbers to be exported by /proc */
atomic_t cl_pending_w_pages;
atomic_t cl_pending_r_pages;
- __u32 cl_max_pages_per_rpc;
- __u32 cl_max_rpcs_in_flight;
+ u32 cl_max_pages_per_rpc;
+ u32 cl_max_rpcs_in_flight;
+ u32 cl_max_short_io_bytes;
struct obd_histogram cl_read_rpc_hist;
struct obd_histogram cl_write_rpc_hist;
struct obd_histogram cl_read_page_hist;
struct obd_histogram cl_read_offset_hist;
struct obd_histogram cl_write_offset_hist;
- /* lru for osc caching pages */
- struct cl_client_cache *cl_cache;
- struct list_head cl_lru_osc; /* member of cl_cache->ccc_lru */
- atomic_long_t *cl_lru_left;
- atomic_long_t cl_lru_busy;
- atomic_long_t cl_lru_in_list;
- atomic_long_t cl_unstable_count;
- struct list_head cl_lru_list; /* lru page list */
- spinlock_t cl_lru_list_lock; /* page list protector */
- atomic_t cl_lru_shrinkers;
+ /** LRU for osc caching pages */
+ struct cl_client_cache *cl_cache;
+ /** member of cl_cache->ccc_lru */
+ struct list_head cl_lru_osc;
+ /** # of available LRU slots left in the per-OSC cache.
+ * Available LRU slots are shared by all OSCs of the same file system,
+ * therefore this is a pointer to cl_client_cache::ccc_lru_left. */
+ atomic_long_t *cl_lru_left;
+ /** # of busy LRU pages. A page is considered busy if it's in writeback
+ * queue, or in transfer. Busy pages can't be discarded so they are not
+ * in LRU cache. */
+ atomic_long_t cl_lru_busy;
+ /** # of LRU pages in the cache for this client_obd */
+ atomic_long_t cl_lru_in_list;
+ /** # of threads are shrinking LRU cache. To avoid contention, it's not
+ * allowed to have multiple threads shrinking LRU cache. */
+ atomic_t cl_lru_shrinkers;
+ /** The time when this LRU cache was last used. */
+ time64_t cl_lru_last_used;
+ /** stats: how many reclaims have happened for this client_obd.
+ * reclaim and shrink - shrink is async, voluntarily rebalancing;
+ * reclaim is sync, initiated by IO thread when the LRU slots are
+ * in shortage. */
+ __u64 cl_lru_reclaim;
+ /** List of LRU pages for this client_obd */
+ struct list_head cl_lru_list;
+ /** Lock for LRU page list */
+ spinlock_t cl_lru_list_lock;
+ /** # of unstable pages in this client_obd.
+ * An unstable page is a page state that WRITE RPC has finished but
+ * the transaction has NOT yet committed. */
+ atomic_long_t cl_unstable_count;
+ /** Link to osc_shrinker_list */
+ struct list_head cl_shrink_list;
/* number of in flight destroy rpcs is limited to max_rpcs_in_flight */
atomic_t cl_destroy_in_flight;
wait_queue_head_t cl_destroy_waitq;
- struct mdc_rpc_lock *cl_rpc_lock;
- struct mdc_rpc_lock *cl_close_lock;
-
/* modify rpcs in flight
* currently used for metadata only */
spinlock_t cl_mod_rpcs_lock;
__u16 cl_max_mod_rpcs_in_flight;
+ __u16 cl_mod_rpcs_in_flight;
+ __u16 cl_close_rpcs_in_flight;
+ wait_queue_head_t cl_mod_rpcs_waitq;
+ unsigned long *cl_mod_tag_bitmap;
+ struct obd_histogram cl_mod_rpcs_hist;
/* mgc datastruct */
struct mutex cl_mgc_mutex;
struct local_oid_storage *cl_mgc_los;
struct dt_object *cl_mgc_configs_dir;
- atomic_t cl_mgc_refcount;
struct obd_export *cl_mgc_mgsexp;
+ atomic_t cl_mgc_refcount;
+ /* in-flight control list and total RPCs counter */
+ struct list_head cl_flight_waiters;
+ __u32 cl_rpcs_in_flight;
/* checksumming for data sent over the network */
- unsigned int cl_checksum:1; /* 0 = disabled, 1 = enabled */
+ unsigned int cl_checksum:1, /* 0 = disabled, 1 = enabled */
+ cl_checksum_dump:1; /* same */
/* supported checksum types that are worked out at connect time */
__u32 cl_supp_cksum_types;
/* checksum algorithm to be used */
- cksum_type_t cl_cksum_type;
+ enum cksum_types cl_cksum_type;
/* also protected by the poorly named _loi_list_lock lock above */
struct osc_async_rc cl_ar;
/* sequence manager */
struct lu_client_seq *cl_seq;
+ struct rw_semaphore cl_seq_rwsem;
- atomic_t cl_resends; /* resend count */
+ atomic_t cl_resends; /* resend count */
/* ptlrpc work for writeback in ptlrpcd context */
void *cl_writeback_work;
void *cl_lru_work;
/* hash tables for osc_quota_info */
- struct cfs_hash *cl_quota_hash[MAXQUOTAS];
+ struct cfs_hash *cl_quota_hash[LL_MAXQUOTAS];
+ /* Links to the global list of registered changelog devices */
+ struct list_head cl_chg_dev_linkage;
};
#define obd2cli_tgt(obd) ((char *)(obd)->u.cli.cl_target_uuid.uuid)
ltd_reap:1; /* should this target be deleted */
};
+struct lov_md_tgt_desc {
+ struct obd_device *lmtd_mdc;
+ __u32 lmtd_index;
+};
+
struct lov_obd {
struct lov_desc desc;
struct lov_tgt_desc **lov_tgts; /* sparse array */
struct cl_client_cache *lov_cache;
struct rw_semaphore lov_notify_lock;
+ /* Data-on-MDT: MDC array */
+ struct lov_md_tgt_desc *lov_mdc_tgts;
+
+ struct kobject *lov_tgts_kobj;
};
struct lmv_tgt_desc {
struct obd_uuid ltd_uuid;
+ struct obd_device *ltd_obd;
struct obd_export *ltd_exp;
__u32 ltd_idx;
struct mutex ltd_fid_mutex;
unsigned long ltd_active:1; /* target up for requests */
};
-enum placement_policy {
- PLACEMENT_CHAR_POLICY = 0,
- PLACEMENT_NID_POLICY = 1,
- PLACEMENT_INVAL_POLICY = 2,
- PLACEMENT_MAX_POLICY
-};
-
-typedef enum placement_policy placement_policy_t;
-
struct lmv_obd {
- int refcount;
struct lu_client_fld lmv_fld;
spinlock_t lmv_lock;
- placement_policy_t lmv_placement;
struct lmv_desc desc;
- struct obd_uuid cluuid;
- struct obd_export *exp;
- struct proc_dir_entry *targets_proc_entry;
struct mutex lmv_init_mutex;
int connected;
int max_easize;
int max_def_easize;
+ u32 lmv_statfs_start;
- __u32 tgts_size; /* size of tgts array */
+ u32 tgts_size; /* size of tgts array */
struct lmv_tgt_desc **tgts;
struct obd_connect_data conn_data;
+ struct kobject *lmv_tgts_kobj;
};
+/* Minimum sector size is 512 */
+#define MAX_GUARD_NUMBER (PAGE_SIZE / 512)
+
struct niobuf_local {
__u64 lnb_file_offset;
__u32 lnb_page_offset;
__u32 lnb_len;
__u32 lnb_flags;
+ int lnb_rc;
struct page *lnb_page;
void *lnb_data;
- int lnb_rc;
+ __u16 lnb_guards[MAX_GUARD_NUMBER];
+ __u16 lnb_guard_rpc:1;
+ __u16 lnb_guard_disk:1;
+};
+
+struct tgt_thread_big_cache {
+ struct niobuf_local local[PTLRPC_MAX_BRW_PAGES];
};
#define LUSTRE_FLD_NAME "fld"
#define LUSTRE_MGS_OBDNAME "MGS"
#define LUSTRE_MGC_OBDNAME "MGC"
+static inline int is_lwp_on_mdt(char *name)
+{
+ char *ptr;
+
+ ptr = strrchr(name, '-');
+ if (ptr == NULL) {
+ CERROR("%s is not a obdname\n", name);
+ return 0;
+ }
+
+ /* LWP name on MDT is fsname-MDTxxxx-lwp-MDTxxxx */
+
+ if (strncmp(ptr + 1, "MDT", 3) != 0)
+ return 0;
+
+ while (*(--ptr) != '-' && ptr != name);
+
+ if (ptr == name)
+ return 0;
+
+ if (strncmp(ptr + 1, LUSTRE_LWP_NAME, strlen(LUSTRE_LWP_NAME)) != 0)
+ return 0;
+
+ return 1;
+}
+
+static inline int is_lwp_on_ost(char *name)
+{
+ char *ptr;
+
+ ptr = strrchr(name, '-');
+ if (ptr == NULL) {
+ CERROR("%s is not a obdname\n", name);
+ return 0;
+ }
+
+ /* LWP name on OST is fsname-MDTxxxx-lwp-OSTxxxx */
+
+ if (strncmp(ptr + 1, "OST", 3) != 0)
+ return 0;
+
+ while (*(--ptr) != '-' && ptr != name);
+
+ if (ptr == name)
+ return 0;
+
+ if (strncmp(ptr + 1, LUSTRE_LWP_NAME, strlen(LUSTRE_LWP_NAME)) != 0)
+ return 0;
+
+ return 1;
+}
+
/*
* Events signalled through obd_notify() upcall-chain.
*/
enum obd_notify_event {
- /* target added */
- OBD_NOTIFY_CREATE,
/* Device connect start */
OBD_NOTIFY_CONNECT,
/* Device activated */
OBD_NOTIFY_ACTIVE,
/* Device deactivated */
OBD_NOTIFY_INACTIVE,
- /* Device disconnected */
- OBD_NOTIFY_DISCON,
/* Connect data for import were changed */
OBD_NOTIFY_OCD,
- /* Sync request */
- OBD_NOTIFY_SYNC_NONBLOCK,
- OBD_NOTIFY_SYNC,
- /* Configuration event */
- OBD_NOTIFY_CONFIG,
/* Administratively deactivate/activate event */
OBD_NOTIFY_DEACTIVATE,
OBD_NOTIFY_ACTIVATE
};
-/* bit-mask flags for config events */
-enum config_flags {
- CONFIG_LOG = 0x1, /* finished processing config log */
- CONFIG_SYNC = 0x2, /* mdt synced 1 ost */
- CONFIG_TARGET = 0x4 /* one target is added */
-};
-
/*
* Data structure used to pass obd_notify()-event to non-obd listeners (llite
- * and liblustre being main examples).
+ * being main example).
*/
struct obd_notify_upcall {
- int (*onu_upcall)(struct obd_device *host, struct obd_device *watched,
- enum obd_notify_event ev, void *owner, void *data);
+ int (*onu_upcall)(struct obd_device *host, struct obd_device *watched,
+ enum obd_notify_event ev, void *owner);
/* Opaque datum supplied by upper layer listener */
void *onu_owner;
};
#define OBD_DEVICE_MAGIC 0XAB5CD6EF
struct obd_device {
- struct obd_type *obd_type;
- __u32 obd_magic;
+ struct obd_type *obd_type;
+ __u32 obd_magic; /* OBD_DEVICE_MAGIC */
+ int obd_minor; /* device number: lctl dl */
+ struct lu_device *obd_lu_dev;
- /* common and UUID name of this device */
- char obd_name[MAX_OBD_NAME];
- struct obd_uuid obd_uuid;
- int obd_minor;
- struct lu_device *obd_lu_dev;
+ /* common and UUID name of this device */
+ struct obd_uuid obd_uuid;
+ char obd_name[MAX_OBD_NAME];
/* bitfield modification is protected by obd_dev_lock */
unsigned long
obd_recovering:1, /* there are recoverable clients */
obd_abort_recovery:1, /* recovery expired */
obd_version_recov:1, /* obd uses version checking */
- obd_replayable:1, /* recovery is enabled;
- * inform clients */
+ obd_replayable:1, /* recovery enabled; inform clients */
obd_no_transno:1, /* no committed-transno notification */
obd_no_recov:1, /* fail instead of retry messages */
obd_stopping:1, /* started cleanup */
* (for /proc/status only!!) */
obd_no_ir:1, /* no imperative recovery. */
obd_process_conf:1, /* device is processing mgs config */
- obd_uses_nid_stats:1, /* maintain per-client OBD stats */
- obd_force_abort_recovery:1; /* abort recovery forcely */
+ obd_checksum_dump:1; /* dump pages upon cksum error */
/* use separate field as it is set in interrupt to don't mess with
* protection of other bits using _bh lock */
struct cfs_hash *obd_nid_hash;
/* nid stats body */
struct cfs_hash *obd_nid_stats_hash;
+ /* client_generation-export hash body */
+ struct cfs_hash *obd_gen_hash;
struct list_head obd_nid_stats;
- atomic_t obd_refcount;
struct list_head obd_exports;
struct list_head obd_unlinked_exports;
struct list_head obd_delayed_exports;
struct list_head obd_lwp_list;
+ atomic_t obd_refcount;
int obd_num_exports;
spinlock_t obd_nid_lock;
struct ldlm_namespace *obd_namespace;
struct ptlrpc_client obd_ldlm_client; /* XXX OST/MDS only */
/* a spinlock is OK for what we do now, may need a semaphore later */
spinlock_t obd_dev_lock; /* protect OBD bitfield above */
- struct mutex obd_dev_mutex;
- __u64 obd_last_committed;
spinlock_t obd_osfs_lock;
struct obd_statfs obd_osfs; /* locked by obd_osfs_lock */
- __u64 obd_osfs_age;
+ time64_t obd_osfs_age;
+ __u64 obd_last_committed;
+ struct mutex obd_dev_mutex;
struct lvfs_run_ctxt obd_lvfs_ctxt;
struct obd_llog_group obd_olg; /* default llog group */
struct obd_device *obd_observer;
struct obd_export *obd_lwp_export;
/* list of exports in LRU order, for ping evictor, with obd_dev_lock */
struct list_head obd_exports_timed;
- time_t obd_eviction_timer; /* for ping evictor */
+ time64_t obd_eviction_timer; /* for ping evictor */
int obd_max_recoverable_clients;
atomic_t obd_connected_clients;
/* protected by obd_recovery_task_lock */
struct timer_list obd_recovery_timer;
/* seconds */
- time_t obd_recovery_start;
+ time64_t obd_recovery_start;
/* seconds, for lprocfs_status */
- time_t obd_recovery_end;
- int obd_recovery_time_hard;
- int obd_recovery_timeout;
+ time64_t obd_recovery_end;
+ time64_t obd_recovery_time_hard;
+ time64_t obd_recovery_timeout;
int obd_recovery_ir_factor;
/* new recovery stuff from CMD2 */
- struct target_recovery_data obd_recovery_data;
int obd_replayed_locks;
atomic_t obd_req_replay_clients;
atomic_t obd_lock_replay_clients;
+ struct target_recovery_data obd_recovery_data;
+
/* all lists are protected by obd_recovery_task_lock */
struct list_head obd_req_replay_queue;
struct list_head obd_lock_replay_queue;
struct lov_obd lov;
struct lmv_obd lmv;
} u;
+
/* Fields used by LProcFS */
- unsigned int obd_cntr_base;
- struct lprocfs_stats *obd_stats;
+ struct lprocfs_stats *obd_stats;
- unsigned int obd_md_cntr_base;
- struct lprocfs_stats *obd_md_stats;
+ struct lprocfs_stats *obd_md_stats;
+ struct dentry *obd_debugfs_entry;
struct proc_dir_entry *obd_proc_entry;
struct proc_dir_entry *obd_proc_exports_entry;
- struct proc_dir_entry *obd_svc_procroot;
+ struct dentry *obd_svc_debugfs_entry;
struct lprocfs_stats *obd_svc_stats;
+ const struct attribute **obd_attrs;
struct lprocfs_vars *obd_vars;
atomic_t obd_evict_inprogress;
wait_queue_head_t obd_evict_inprogress_waitq;
struct list_head obd_evict_list; /* protected with pet_lock */
- /**
- * Ldlm pool part. Save last calculated SLV and Limit.
- */
- rwlock_t obd_pool_lock;
- int obd_pool_limit;
- __u64 obd_pool_slv;
+ /**
+ * LDLM pool part. Save last calculated SLV and Limit.
+ */
+ rwlock_t obd_pool_lock;
+ __u64 obd_pool_slv;
+ int obd_pool_limit;
- /**
- * A list of outstanding class_incref()'s against this obd. For
- * debugging.
- */
- struct lu_ref obd_reference;
+ int obd_conn_inprogress;
- int obd_conn_inprogress;
-};
+ /**
+ * List of outstanding class_incref()'s fo this OBD. For debugging. */
+ struct lu_ref obd_reference;
-enum obd_cleanup_stage {
-/* Special case hack for MDS LOVs */
- OBD_CLEANUP_EARLY,
-/* can be directly mapped to .ldto_device_fini() */
- OBD_CLEANUP_EXPORTS,
+ struct kset obd_kset; /* sysfs object collection */
+ struct kobj_type obd_ktype;
+ struct completion obd_kobj_unregister;
};
/* get/set_info keys */
#define KEY_ASYNC "async"
-#define KEY_CAPA_KEY "capa_key"
#define KEY_CHANGELOG_CLEAR "changelog_clear"
#define KEY_FID2PATH "fid2path"
#define KEY_CHECKSUM "checksum"
#define KEY_CACHE_LRU_SHRINK "cache_lru_shrink"
#define KEY_OSP_CONNECTED "osp_connected"
-struct lu_context;
+/* Flags for op_xvalid */
+enum op_xvalid {
+ OP_XVALID_CTIME_SET = BIT(0), /* 0x0001 */
+ OP_XVALID_BLOCKS = BIT(1), /* 0x0002 */
+ OP_XVALID_OWNEROVERRIDE = BIT(2), /* 0x0004 */
+ OP_XVALID_FLAGS = BIT(3), /* 0x0008 */
+ OP_XVALID_PROJID = BIT(4), /* 0x0010 */
+ OP_XVALID_LAZYSIZE = BIT(5), /* 0x0020 */
+ OP_XVALID_LAZYBLOCKS = BIT(6), /* 0x0040 */
+};
-/* /!\ must be coherent with include/linux/namei.h on patched kernel */
-#define IT_OPEN (1 << 0)
-#define IT_CREAT (1 << 1)
-#define IT_READDIR (1 << 2)
-#define IT_GETATTR (1 << 3)
-#define IT_LOOKUP (1 << 4)
-#define IT_UNLINK (1 << 5)
-#define IT_TRUNC (1 << 6)
-#define IT_GETXATTR (1 << 7)
-#define IT_EXEC (1 << 8)
-#define IT_PIN (1 << 9)
-#define IT_LAYOUT (1 << 10)
-#define IT_QUOTA_DQACQ (1 << 11)
-#define IT_QUOTA_CONN (1 << 12)
-#define IT_SETXATTR (1 << 13)
+struct lu_context;
static inline int it_to_lock_mode(struct lookup_intent *it)
{
/* CREAT needs to be tested before open (both could be set) */
if (it->it_op & IT_CREAT)
return LCK_CW;
- else if (it->it_op & (IT_GETATTR | IT_OPEN | IT_LOOKUP |
- IT_LAYOUT))
+ else if (it->it_op & (IT_GETATTR | IT_OPEN | IT_LOOKUP))
return LCK_CR;
+ else if (it->it_op & IT_LAYOUT)
+ return (it->it_flags & FMODE_WRITE) ? LCK_EX : LCK_CR;
else if (it->it_op & IT_READDIR)
return LCK_PR;
else if (it->it_op & IT_GETXATTR)
return LCK_PR;
- else if (it->it_op & IT_SETXATTR)
- return LCK_PW;
LASSERTF(0, "Invalid it_op: %d\n", it->it_op);
return -EINVAL;
CLI_MIGRATE = 1 << 4,
};
+/**
+ * GETXATTR is not included as only a couple of fields in the reply body
+ * is filled, but not FID which is needed for common intent handling in
+ * mdc_finish_intent_lock()
+ */
+static inline bool it_has_reply_body(const struct lookup_intent *it)
+{
+ return it->it_op & (IT_OPEN | IT_LOOKUP | IT_GETATTR);
+}
+
struct md_op_data {
- struct lu_fid op_fid1; /* operation fid1 (usualy parent) */
- struct lu_fid op_fid2; /* operation fid2 (usualy child) */
- struct lu_fid op_fid3; /* 2 extra fids to find conflicting */
- struct lu_fid op_fid4; /* to the operation locks. */
+ struct lu_fid op_fid1; /* operation fid1 (usualy parent) */
+ struct lu_fid op_fid2; /* operation fid2 (usualy child) */
+ struct lu_fid op_fid3; /* 2 extra fids to find conflicting */
+ struct lu_fid op_fid4; /* to the operation locks. */
u32 op_mds; /* what mds server open will go to */
- struct lustre_handle op_handle;
+ __u32 op_mode;
+ struct lustre_handle op_open_handle;
s64 op_mod_time;
- const char *op_name;
+ const char *op_name;
size_t op_namelen;
- __u32 op_mode;
- struct lmv_stripe_md *op_mea1;
- struct lmv_stripe_md *op_mea2;
- __u32 op_suppgids[2];
- __u32 op_fsuid;
- __u32 op_fsgid;
- cfs_cap_t op_cap;
- void *op_data;
+ struct rw_semaphore *op_mea1_sem;
+ struct rw_semaphore *op_mea2_sem;
+ struct lmv_stripe_md *op_mea1;
+ struct lmv_stripe_md *op_mea2;
+ __u32 op_suppgids[2];
+ __u32 op_fsuid;
+ __u32 op_fsgid;
+ cfs_cap_t op_cap;
+ void *op_data;
size_t op_data_size;
- /* iattr fields and blocks. */
+ /* iattr fields and blocks. */
struct iattr op_attr;
+ enum op_xvalid op_xvalid; /* eXtra validity flags */
loff_t op_attr_blocks;
- unsigned int op_attr_flags; /* LUSTRE_{SYNC,..}_FL */
- __u64 op_valid; /* OBD_MD_* */
+ u64 op_valid; /* OBD_MD_* */
+ unsigned int op_attr_flags; /* LUSTRE_{SYNC,..}_FL */
enum md_op_flags op_flags;
- /* Capa fields */
- struct obd_capa *op_capa1;
- struct obd_capa *op_capa2;
-
/* Various operation flags. */
enum mds_op_bias op_bias;
- /* Used by readdir */
- unsigned int op_max_pages;
-
/* used to transfer info between the stacks of MD client
* see enum op_cli_flags */
enum md_cli_flags op_cli_flags;
__u64 op_data_version;
struct lustre_handle op_lease_handle;
+ /* File security context, for creates. */
+ const char *op_file_secctx_name;
+ void *op_file_secctx;
+ __u32 op_file_secctx_size;
+
/* default stripe offset */
__u32 op_default_stripe_offset;
+
+ __u32 op_projid;
+
+ /* Used by readdir */
+ unsigned int op_max_pages;
+
+ __u16 op_mirror_id;
+
+ /*
+ * used to access migrating dir: if it's set, assume migration is
+ * finished, use the new layout to access dir, otherwise use old layout.
+ * By default it's not set, because new files are created under new
+ * layout, if we can't find file with name under both old and new
+ * layout, we are sure file with name doesn't exist, but in reverse
+ * order there may be a race with creation by others.
+ */
+ bool op_post_migrate;
+ /* used to access dir with bash hash */
+ __u32 op_stripe_index;
};
struct md_callback {
int rc);
struct md_enqueue_info {
- struct md_op_data mi_data;
- struct lookup_intent mi_it;
- struct lustre_handle mi_lockh;
- struct inode *mi_dir;
- md_enqueue_cb_t mi_cb;
- void *mi_cbdata;
+ struct md_op_data mi_data;
+ struct lookup_intent mi_it;
+ struct lustre_handle mi_lockh;
+ struct inode *mi_dir;
+ struct ldlm_enqueue_info mi_einfo;
+ md_enqueue_cb_t mi_cb;
+ void *mi_cbdata;
};
struct obd_ops {
__u32 keylen, void *key,
__u32 vallen, void *val,
struct ptlrpc_request_set *set);
- int (*o_setup) (struct obd_device *dev, struct lustre_cfg *cfg);
- int (*o_precleanup)(struct obd_device *dev,
- enum obd_cleanup_stage cleanup_stage);
- int (*o_cleanup)(struct obd_device *dev);
+ int (*o_setup) (struct obd_device *dev, struct lustre_cfg *cfg);
+ int (*o_precleanup)(struct obd_device *dev);
+ int (*o_cleanup)(struct obd_device *dev);
int (*o_process_config)(struct obd_device *dev, size_t len, void *data);
- int (*o_postrecov)(struct obd_device *dev);
- int (*o_add_conn)(struct obd_import *imp, struct obd_uuid *uuid,
- int priority);
- int (*o_del_conn)(struct obd_import *imp, struct obd_uuid *uuid);
- /* connect to the target device with given connection
- * data. @ocd->ocd_connect_flags is modified to reflect flags actually
- * granted by the target, which are guaranteed to be a subset of flags
- * asked for. If @ocd == NULL, use default parameters. */
- int (*o_connect)(const struct lu_env *env,
- struct obd_export **exp, struct obd_device *src,
- struct obd_uuid *cluuid, struct obd_connect_data *ocd,
- void *localdata);
- int (*o_reconnect)(const struct lu_env *env,
- struct obd_export *exp, struct obd_device *src,
- struct obd_uuid *cluuid,
- struct obd_connect_data *ocd,
- void *localdata);
- int (*o_disconnect)(struct obd_export *exp);
+ int (*o_postrecov)(struct obd_device *dev);
+ int (*o_add_conn)(struct obd_import *imp, struct obd_uuid *uuid,
+ int priority);
+ int (*o_del_conn)(struct obd_import *imp, struct obd_uuid *uuid);
+ /* connect to the target device with given connection
+ * data. @ocd->ocd_connect_flags is modified to reflect flags actually
+ * granted by the target, which are guaranteed to be a subset of flags
+ * asked for. If @ocd == NULL, use default parameters. */
+ int (*o_connect)(const struct lu_env *env,
+ struct obd_export **exp, struct obd_device *src,
+ struct obd_uuid *cluuid, struct obd_connect_data *ocd,
+ void *localdata);
+ int (*o_reconnect)(const struct lu_env *env,
+ struct obd_export *exp, struct obd_device *src,
+ struct obd_uuid *cluuid,
+ struct obd_connect_data *ocd,
+ void *localdata);
+ int (*o_disconnect)(struct obd_export *exp);
/* Initialize/finalize fids infrastructure. */
int (*o_fid_init)(struct obd_device *obd,
int (*o_fid_alloc)(const struct lu_env *env, struct obd_export *exp,
struct lu_fid *fid, struct md_op_data *op_data);
- /*
- * Object with @fid is getting deleted, we may want to do something
- * about this.
- */
- int (*o_statfs)(const struct lu_env *, struct obd_export *exp,
- struct obd_statfs *osfs, __u64 max_age, __u32 flags);
- int (*o_statfs_async)(struct obd_export *exp, struct obd_info *oinfo,
- __u64 max_age, struct ptlrpc_request_set *set);
- int (*o_unpackmd)(struct obd_export *exp,struct lov_stripe_md **mem_tgt,
- struct lov_mds_md *disk_src, int disk_len);
+ /*
+ * Object with @fid is getting deleted, we may want to do something
+ * about this.
+ */
+ int (*o_statfs)(const struct lu_env *, struct obd_export *exp,
+ struct obd_statfs *osfs, time64_t max_age, __u32 flags);
+ int (*o_statfs_async)(struct obd_export *exp, struct obd_info *oinfo,
+ time64_t max_age, struct ptlrpc_request_set *set);
int (*o_create)(const struct lu_env *env, struct obd_export *exp,
struct obdo *oa);
int (*o_destroy)(const struct lu_env *env, struct obd_export *exp,
int objcount, struct obd_ioobj *obj,
struct niobuf_remote *remote, int pages,
struct niobuf_local *local, int rc);
- int (*o_init_export)(struct obd_export *exp);
- int (*o_destroy_export)(struct obd_export *exp);
+ int (*o_init_export)(struct obd_export *exp);
+ int (*o_destroy_export)(struct obd_export *exp);
- int (*o_import_event)(struct obd_device *, struct obd_import *,
- enum obd_import_event);
+ int (*o_import_event)(struct obd_device *, struct obd_import *,
+ enum obd_import_event);
- int (*o_notify)(struct obd_device *obd, struct obd_device *watched,
- enum obd_notify_event ev, void *data);
+ int (*o_notify)(struct obd_device *obd, struct obd_device *watched,
+ enum obd_notify_event ev);
- int (*o_health_check)(const struct lu_env *env, struct obd_device *);
- struct obd_uuid *(*o_get_uuid) (struct obd_export *exp);
+ int (*o_health_check)(const struct lu_env *env, struct obd_device *);
+ struct obd_uuid *(*o_get_uuid) (struct obd_export *exp);
- /* quota methods */
- int (*o_quotactl)(struct obd_device *, struct obd_export *,
- struct obd_quotactl *);
+ /* quota methods */
+ int (*o_quotactl)(struct obd_device *, struct obd_export *,
+ struct obd_quotactl *);
- int (*o_ping)(const struct lu_env *, struct obd_export *exp);
+ int (*o_ping)(const struct lu_env *, struct obd_export *exp);
- /* pools methods */
- int (*o_pool_new)(struct obd_device *obd, char *poolname);
- int (*o_pool_del)(struct obd_device *obd, char *poolname);
- int (*o_pool_add)(struct obd_device *obd, char *poolname,
- char *ostname);
- int (*o_pool_rem)(struct obd_device *obd, char *poolname,
- char *ostname);
- void (*o_getref)(struct obd_device *obd);
- void (*o_putref)(struct obd_device *obd);
- /*
- * NOTE: If adding ops, add another LPROCFS_OBD_OP_INIT() line
- * to lprocfs_alloc_obd_stats() in obdclass/lprocfs_status.c.
- * Also, add a wrapper function in include/linux/obd_class.h. */
+ /* pools methods */
+ int (*o_pool_new)(struct obd_device *obd, char *poolname);
+ int (*o_pool_del)(struct obd_device *obd, char *poolname);
+ int (*o_pool_add)(struct obd_device *obd, char *poolname,
+ char *ostname);
+ int (*o_pool_rem)(struct obd_device *obd, char *poolname,
+ char *ostname);
};
/* lmv structures */
struct lustre_md {
struct mdt_body *body;
- struct lov_stripe_md *lsm;
+ struct lu_buf layout;
struct lmv_stripe_md *lmv;
#ifdef CONFIG_FS_POSIX_ACL
struct posix_acl *posix_acl;
#endif
- struct mdt_remote_perm *remote_perm;
- struct obd_capa *mds_capa;
- struct obd_capa *oss_capa;
};
struct md_open_data {
};
struct obd_client_handle {
- struct lustre_handle och_fh;
+ struct lustre_handle och_open_handle;
struct lu_fid och_fid;
struct md_open_data *och_mod;
struct lustre_handle och_lease_handle; /* open lock for lease */
struct cl_attr;
struct md_ops {
- /* Every operation from MD_STATS_FIRST_OP up to and including
- * MD_STATS_LAST_OP will be counted by EXP_MD_OP_INCREMENT()
- * and will appear in /proc/fs/lustre/{lmv,mdc}/.../md_stats.
- * Operations after MD_STATS_LAST_OP are excluded from stats.
- * There are a few reasons for doing this: we prune the 17
- * counters which will be of minimal use in understanding
- * metadata utilization, we save memory by allocating 15
- * instead of 32 counters, we save cycles by not counting.
- *
- * MD_STATS_FIRST_OP must be the first member of md_ops.
- */
-#define MD_STATS_FIRST_OP m_close
int (*m_close)(struct obd_export *, struct md_op_data *,
struct md_open_data *, struct ptlrpc_request **);
cfs_cap_t, __u64, struct ptlrpc_request **);
int (*m_enqueue)(struct obd_export *, struct ldlm_enqueue_info *,
- const union ldlm_policy_data *,
- struct lookup_intent *, struct md_op_data *,
+ const union ldlm_policy_data *, struct md_op_data *,
struct lustre_handle *, __u64);
int (*m_getattr)(struct obd_export *, struct md_op_data *,
size_t , struct ptlrpc_request **);
int (*m_fsync)(struct obd_export *, const struct lu_fid *,
- struct obd_capa *, struct ptlrpc_request **);
+ struct ptlrpc_request **);
int (*m_read_page)(struct obd_export *, struct md_op_data *,
struct md_callback *cb_op, __u64 hash_offset,
struct ptlrpc_request **);
int (*m_setxattr)(struct obd_export *, const struct lu_fid *,
- struct obd_capa *, u64, const char *,
- const char *, int, int, int, __u32,
- struct ptlrpc_request **);
+ u64, const char *, const void *, size_t, unsigned int,
+ u32, struct ptlrpc_request **);
int (*m_getxattr)(struct obd_export *, const struct lu_fid *,
- struct obd_capa *, u64, const char *,
- const char *, int, int, int,
- struct ptlrpc_request **);
+ u64, const char *, size_t, struct ptlrpc_request **);
- int (*m_intent_getattr_async)(struct obd_export *,
- struct md_enqueue_info *,
- struct ldlm_enqueue_info *);
+ int (*m_intent_getattr_async)(struct obd_export *,
+ struct md_enqueue_info *);
int (*m_revalidate_lock)(struct obd_export *, struct lookup_intent *,
struct lu_fid *, __u64 *bits);
-#define MD_STATS_LAST_OP m_revalidate_lock
-
- int (*m_getstatus)(struct obd_export *, struct lu_fid *,
- struct obd_capa **);
+ int (*m_file_resync)(struct obd_export *, struct md_op_data *);
+ int (*m_get_root)(struct obd_export *, const char *, struct lu_fid *);
int (*m_null_inode)(struct obd_export *, const struct lu_fid *);
- int (*m_find_cbdata)(struct obd_export *, const struct lu_fid *,
- ldlm_iterator_t, void *);
-
int (*m_getattr_name)(struct obd_export *, struct md_op_data *,
struct ptlrpc_request **);
int (*m_clear_open_replay_data)(struct obd_export *,
struct obd_client_handle *);
- int (*m_set_lock_data)(struct obd_export *, __u64 *, void *, __u64 *);
+ int (*m_set_lock_data)(struct obd_export *,
+ const struct lustre_handle *, void *, __u64 *);
- ldlm_mode_t (*m_lock_match)(struct obd_export *, __u64,
- const struct lu_fid *, ldlm_type_t,
- ldlm_policy_data_t *, ldlm_mode_t,
- struct lustre_handle *);
+ enum ldlm_mode (*m_lock_match)(struct obd_export *, __u64,
+ const struct lu_fid *, enum ldlm_type,
+ union ldlm_policy_data *, enum ldlm_mode,
+ struct lustre_handle *);
int (*m_cancel_unused)(struct obd_export *, const struct lu_fid *,
- ldlm_policy_data_t *, ldlm_mode_t,
- ldlm_cancel_flags_t flags, void *opaque);
-
- int (*m_renew_capa)(struct obd_export *, struct obd_capa *oc,
- renew_capa_cb_t cb);
-
- int (*m_unpack_capa)(struct obd_export *, struct ptlrpc_request *,
- const struct req_msg_field *, struct obd_capa **);
-
- int (*m_get_remote_perm)(struct obd_export *, const struct lu_fid *,
- struct obd_capa *, __u32,
- struct ptlrpc_request **);
+ union ldlm_policy_data *, enum ldlm_mode,
+ enum ldlm_cancel_flags flags, void *opaque);
int (*m_get_fid_from_lsm)(struct obd_export *,
const struct lmv_stripe_md *,
const char *name, int namelen,
struct lu_fid *fid);
+ int (*m_unpackmd)(struct obd_export *exp, struct lmv_stripe_md **plsm,
+ const union lmv_mds_md *lmv, size_t lmv_size);
};
static inline struct md_open_data *obd_mod_alloc(void)
void obdo_from_inode(struct obdo *dst, struct inode *src, u64 valid);
void obdo_set_parent_fid(struct obdo *dst, const struct lu_fid *parent);
+void obdo_set_o_projid(struct obdo *dst, u32 projid);
/* return 1 if client should be resend request */
static inline int client_should_resend(int resend, struct client_obd *cli)
static inline int cli_brw_size(struct obd_device *obd)
{
LASSERT(obd != NULL);
- return obd->u.cli.cl_max_pages_per_rpc << PAGE_CACHE_SHIFT;
+ return obd->u.cli.cl_max_pages_per_rpc << PAGE_SHIFT;
}
-/* when RPC size or the max RPCs in flight is increased, the max dirty pages
+/*
+ * When RPC size or the max RPCs in flight is increased, the max dirty pages
* of the client should be increased accordingly to avoid sending fragmented
* RPCs over the network when the client runs out of the maximum dirty space
* when so many RPCs are being generated.
static inline void client_adjust_max_dirty(struct client_obd *cli)
{
/* initializing */
- if (cli->cl_dirty_max_pages <= 0)
- cli->cl_dirty_max_pages = (OSC_MAX_DIRTY_DEFAULT * 1024 * 1024)
- >> PAGE_CACHE_SHIFT;
- else {
+ if (cli->cl_dirty_max_pages <= 0) {
+ cli->cl_dirty_max_pages =
+ (OSC_MAX_DIRTY_DEFAULT * 1024 * 1024) >> PAGE_SHIFT;
+ } else {
unsigned long dirty_max = cli->cl_max_rpcs_in_flight *
cli->cl_max_pages_per_rpc;
if (cli->cl_dirty_max_pages > totalram_pages / 8)
cli->cl_dirty_max_pages = totalram_pages / 8;
+
+ /* This value is exported to userspace through the max_dirty_mb
+ * parameter. So we round up the number of pages to make it a round
+ * number of MBs. */
+ cli->cl_dirty_max_pages = round_up(cli->cl_dirty_max_pages,
+ 1 << (20 - PAGE_SHIFT));
}
#endif /* __OBD_H */