/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
*/
#ifndef __OBD_H
#include <lustre/lustre_idl.h>
#include <lu_object.h>
+#include <lu_ref.h>
#include <lustre_lib.h>
#include <lustre_export.h>
#include <lustre_quota.h>
struct osc_async_rc loi_ar;
};
+static inline void loi_kms_set(struct lov_oinfo *oinfo, __u64 kms)
+{
+ oinfo->loi_kms = kms;
+ oinfo->loi_kms_valid = 1;
+}
+
static inline void loi_init(struct lov_oinfo *loi)
{
CFS_INIT_LIST_HEAD(&loi->loi_read_lop.lop_pending);
__u32 lw_stripe_size; /* size of the stripe */
__u32 lw_pattern; /* striping pattern (RAID0, RAID1) */
unsigned lw_stripe_count; /* number of objects being striped over */
+ char lw_pool_name[LOV_MAXPOOLNAME]; /* pool name */
} lsm_wire;
struct lov_array_info *lsm_array; /*Only for joined file array info*/
#define lsm_stripe_size lsm_wire.lw_stripe_size
#define lsm_pattern lsm_wire.lw_pattern
#define lsm_stripe_count lsm_wire.lw_stripe_count
+#define lsm_pool_name lsm_wire.lw_pool_name
struct obd_info;
obd_size fo_tot_pending;
obd_size fo_readcache_max_filesize;
+ int fo_read_cache;
+ int fo_writethrough_cache;
struct obd_import *fo_mdc_imp;
struct obd_uuid fo_mdc_uuid;
unsigned int fo_fl_oss_capa;
struct list_head fo_capa_keys;
struct hlist_head *fo_capa_hash;
-
- void *fo_lcm;
+ struct llog_commit_master *fo_lcm;
};
#define OSC_MAX_RIF_DEFAULT 8
__u32 cl_supp_cksum_types;
/* checksum algorithm to be used */
cksum_type_t cl_cksum_type;
-
+
/* also protected by the poorly named _loi_list_lock lock above */
struct osc_async_rc cl_ar;
unsigned int ltq_usable:1; /* usable for striping */
};
+/* Generic subset of OSTs */
+struct ost_pool {
+ __u32 *op_array; /* array of index of
+ lov_obd->lov_tgts */
+ unsigned int op_count; /* number of OSTs in the array */
+ unsigned int op_size; /* allocated size of lp_array */
+ rwlock_t op_rwlock; /* to protect lov_pool use */
+};
+
+/* Round-robin allocator data */
+struct lov_qos_rr {
+ __u32 lqr_start_idx; /* start index of new inode */
+ __u32 lqr_offset_idx; /* aliasing for start_idx */
+ int lqr_start_count; /* reseed counter */
+ struct ost_pool lqr_pool; /* round-robin optimized list */
+ unsigned long lqr_dirty:1; /* recalc round-robin list */
+};
+
+/* Stripe placement optimization */
struct lov_qos {
struct list_head lq_oss_list; /* list of OSSs that targets use */
struct rw_semaphore lq_rw_sem;
__u32 lq_active_oss_count;
- __u32 *lq_rr_array; /* round-robin optimized list */
- unsigned int lq_rr_size; /* rr array size */
unsigned int lq_prio_free; /* priority for free space */
+ struct lov_qos_rr lq_rr; /* round robin qos data */
unsigned long lq_dirty:1, /* recalc qos data */
- lq_dirty_rr:1, /* recalc round-robin list */
lq_same_space:1,/* the ost's all have approx.
the same space avail */
lq_reset:1; /* zero current penalties */
ltd_reap:1; /* should this target be deleted */
};
+/* Pool metadata */
+#define pool_tgt_size(_p) _p->pool_obds.op_size
+#define pool_tgt_count(_p) _p->pool_obds.op_count
+#define pool_tgt_array(_p) _p->pool_obds.op_array
+#define pool_tgt_rwlock(_p) _p->pool_obds.op_rwlock
+#define pool_tgt(_p, _i) _p->pool_lov->lov_tgts[_p->pool_obds.op_array[_i]]
+
+struct pool_desc {
+ char pool_name[LOV_MAXPOOLNAME + 1]; /* name of pool */
+ struct ost_pool pool_obds; /* pool members */
+ struct lov_qos_rr pool_rr; /* round robin qos */
+ struct hlist_node pool_hash; /* access by poolname */
+ struct list_head pool_list; /* serial access */
+ cfs_proc_dir_entry_t *pool_proc_entry; /* file in /proc */
+ struct lov_obd *pool_lov; /* lov obd to which this
+ pool belong */
+};
+
struct lov_obd {
struct lov_desc desc;
- struct lov_tgt_desc **lov_tgts;
+ struct lov_tgt_desc **lov_tgts; /* sparse array */
+ struct ost_pool lov_packed; /* all OSTs in a packed
+ array */
struct semaphore lov_lock;
struct obd_connect_data lov_ocd;
struct lov_qos lov_qos; /* qos info per lov */
__u32 lov_active_tgt_count; /* how many active */
__u32 lov_death_row;/* tgts scheduled to be deleted */
__u32 lov_tgt_size; /* size of tgts array */
- __u32 lov_start_idx; /* start index of new inode */
- __u32 lov_offset_idx; /* aliasing for start_idx */
- int lov_start_count;/* reseed counter */
int lov_connects;
obd_page_removal_cb_t lov_page_removal_cb;
obd_pin_extent_cb lov_page_pin_cb;
obd_lock_cancel_cb lov_lock_cancel_cb;
+ int lov_pool_count;
+ lustre_hash_t *lov_pools_hash_body; /* used for key access */
+ struct list_head lov_pool_list; /* used for sequential access */
+ cfs_proc_dir_entry_t *lov_pool_proc_entry;
};
struct lmv_tgt_desc {
struct semaphore ltd_fid_sem;
};
+enum placement_policy {
+ PLACEMENT_CHAR_POLICY = 0,
+ PLACEMENT_NID_POLICY = 1,
+ PLACEMENT_INVAL_POLICY = 2,
+ PLACEMENT_MAX_POLICY
+};
+
+typedef enum placement_policy placement_policy_t;
+
struct lmv_obd {
int refcount;
struct lu_client_fld lmv_fld;
spinlock_t lmv_lock;
+ placement_policy_t lmv_placement;
struct lmv_desc desc;
struct obd_uuid cluuid;
struct obd_export *exp;
/* initial thread handling transaction */
int oti_thread_id;
__u32 oti_conn_cnt;
+
+ struct obd_uuid *oti_ost_uuid;
};
static inline void oti_init(struct obd_trans_info *oti,
struct completion trd_finishing;
};
-#define OBD_LLOG_GROUP 0
-
enum filter_groups {
FILTER_GROUP_LLOG = 1,
FILTER_GROUP_ECHO,
spinlock_t olg_lock;
struct obd_export *olg_exp;
int olg_initializing;
+ struct semaphore olg_cat_processing;
};
/* corresponds to one of the obd's */
obd_inactive:1; /* device active/inactive
* (for /proc/status only!!) */
/* uuid-export hash body */
- struct lustre_class_hash_body *obd_uuid_hash_body;
+ struct lustre_hash *obd_uuid_hash;
/* nid-export hash body */
- struct lustre_class_hash_body *obd_nid_hash_body;
+ struct lustre_hash *obd_nid_hash;
/* nid stats body */
- struct lustre_class_hash_body *obd_nid_stats_hash_body;
+ struct lustre_hash *obd_nid_stats_hash;
struct list_head obd_nid_stats;
atomic_t obd_refcount;
cfs_waitq_t obd_refcount_waitq;
/* XXX encapsulate all this recovery data into one struct */
svc_handler_t obd_recovery_handler;
pid_t obd_processing_task;
-
+
int obd_max_recoverable_clients;
int obd_connected_clients;
int obd_recoverable_clients;
time_t obd_recovery_end; /* seconds, for lprocfs_status */
time_t obd_recovery_max_time; /* seconds, bz13079 */
int obd_recovery_timeout;
-
+
/* new recovery stuff from CMD2 */
struct target_recovery_data obd_recovery_data;
int obd_replayed_locks;
atomic_t obd_evict_inprogress;
cfs_waitq_t obd_evict_inprogress_waitq;
- /**
- * Ldlm pool part. Save last calculated SLV and Limit.
+ /**
+ * Ldlm pool part. Save last calculated SLV and Limit.
*/
rwlock_t obd_pool_lock;
int obd_pool_limit;
__u64 obd_pool_slv;
+
+ /**
+ * A list of outstanding class_incref()'s against this obd. For
+ * debugging.
+ */
+ struct lu_ref obd_reference;
};
#define OBD_OPT_FORCE 0x0001
#define KEY_CLEAR_FS "clear_fs"
#define KEY_BLOCKSIZE "blocksize"
#define KEY_BLOCKSIZE_BITS "blocksize_bits"
+#define KEY_FIEMAP "FIEMAP"
/* XXX unused ?*/
#define KEY_INTERMDS "inter_mds"
#define KEY_ASYNC "async"
return LCK_CW;
else if (it->it_op & (IT_READDIR | IT_GETATTR | IT_OPEN | IT_LOOKUP))
return LCK_CR;
-
+
LASSERTF(0, "Invalid it_op: %d\n", it->it_op);
return -EINVAL;
}
__u32 op_suppgids[2];
__u32 op_fsuid;
__u32 op_fsgid;
- __u32 op_cap;
+ cfs_cap_t op_cap;
void *op_data;
/* iattr fields and blocks. */
int (*o_iocontrol)(unsigned int cmd, struct obd_export *exp, int len,
void *karg, void *uarg);
int (*o_get_info)(struct obd_export *, __u32 keylen, void *key,
- __u32 *vallen, void *val);
+ __u32 *vallen, void *val, struct lov_stripe_md *lsm);
int (*o_set_info_async)(struct obd_export *, __u32 keylen, void *key,
__u32 vallen, void *val,
struct ptlrpc_request_set *set);
int (*o_reconnect)(const struct lu_env *env,
struct obd_export *exp, struct obd_device *src,
struct obd_uuid *cluuid,
- struct obd_connect_data *ocd);
+ struct obd_connect_data *ocd,
+ void *localdata);
int (*o_disconnect)(struct obd_export *exp);
/* Initialize/finalize fids infrastructure. */
int (*o_fid_alloc)(struct obd_export *exp, struct lu_fid *fid,
struct md_op_data *op_data);
- /*
+ /*
* Object with @fid is getting deleted, we may want to do something
* about this.
*/
obd_id *startid, obd_gr group, void *data);
int (*o_preprw)(int cmd, struct obd_export *exp, struct obdo *oa,
int objcount, struct obd_ioobj *obj,
- int niocount, struct niobuf_remote *remote,
- struct niobuf_local *local, struct obd_trans_info *oti,
+ struct niobuf_remote *remote, int *nr_pages,
+ struct niobuf_local *local,
+ struct obd_trans_info *oti,
struct lustre_capa *capa);
int (*o_commitrw)(int cmd, struct obd_export *exp, struct obdo *oa,
int objcount, struct obd_ioobj *obj,
- int niocount, struct niobuf_local *local,
+ struct niobuf_remote *remote, int pages,
+ struct niobuf_local *local,
struct obd_trans_info *oti, int rc);
int (*o_enqueue)(struct obd_export *, struct obd_info *oinfo,
struct ldlm_enqueue_info *einfo,
__u32 mode, struct lustre_handle *);
int (*o_cancel_unused)(struct obd_export *, struct lov_stripe_md *,
int flags, void *opaque);
- int (*o_join_lru)(struct obd_export *, struct lov_stripe_md *,
- int join);
int (*o_init_export)(struct obd_export *exp);
int (*o_destroy_export)(struct obd_export *exp);
int (*o_extent_calc)(struct obd_export *, struct lov_stripe_md *,
enum obd_notify_event ev, void *data);
int (*o_health_check)(struct obd_device *);
+ struct obd_uuid *(*o_get_uuid) (struct obd_export *exp);
/* quota methods */
int (*o_quotacheck)(struct obd_export *, struct obd_quotactl *);
obd_lock_cancel_cb cb);
int (*o_unregister_lock_cancel_cb)(struct obd_export *exp,
obd_lock_cancel_cb cb);
-
+ /* pools methods */
+ int (*o_pool_new)(struct obd_device *obd, char *poolname);
+ int (*o_pool_del)(struct obd_device *obd, char *poolname);
+ int (*o_pool_add)(struct obd_device *obd, char *poolname,
+ char *ostname);
+ int (*o_pool_rem)(struct obd_device *obd, char *poolname,
+ char *ostname);
/*
* NOTE: If adding ops, add another LPROCFS_OBD_OP_INIT() line
* to lprocfs_alloc_obd_stats() in obdclass/lprocfs_status.c.
#define MAX_HASH_SIZE_32 0x7fffffffUL
#define MAX_HASH_SIZE 0x7fffffffffffffffULL
-#define MAX_HASH_HIGHEST_BIT 0x1000000000000000
+#define MAX_HASH_HIGHEST_BIT 0x1000000000000000ULL
struct lustre_md {
struct mdt_body *body;
int (*m_close)(struct obd_export *, struct md_op_data *,
struct md_open_data *, struct ptlrpc_request **);
int (*m_create)(struct obd_export *, struct md_op_data *,
- const void *, int, int, __u32, __u32, __u32,
+ const void *, int, int, __u32, __u32, cfs_cap_t,
__u64, struct ptlrpc_request **);
int (*m_done_writing)(struct obd_export *, struct md_op_data *,
struct md_open_data *);
int (*m_enqueue)(struct obd_export *, struct ldlm_enqueue_info *,
struct lookup_intent *, struct md_op_data *,
- struct lustre_handle *, void *, int, int);
+ struct lustre_handle *, void *, int,
+ struct ptlrpc_request **, int);
int (*m_getattr)(struct obd_export *, const struct lu_fid *,
struct obd_capa *, obd_valid, int,
struct ptlrpc_request **);
struct lov_mds_md *lmm);
};
-extern struct lsm_operations lsm_plain_ops;
-extern struct lsm_operations lsm_join_ops;
-static inline struct lsm_operations *lsm_op_find(int magic)
+extern const struct lsm_operations lsm_v1_ops;
+extern const struct lsm_operations lsm_join_ops;
+extern const struct lsm_operations lsm_v3_ops;
+static inline const struct lsm_operations *lsm_op_find(int magic)
{
switch(magic) {
- case LOV_MAGIC:
- return &lsm_plain_ops;
+ case LOV_MAGIC_V1:
+ return &lsm_v1_ops;
case LOV_MAGIC_JOIN:
return &lsm_join_ops;
+ case LOV_MAGIC_V3:
+ return &lsm_v3_ops;
default:
CERROR("Cannot recognize lsm_magic %d\n", magic);
return NULL;
obd->obd_name, transno, error);
return;
}
- CDEBUG(D_HA, "%s: transno "LPU64" committed\n",
- obd->obd_name, transno);
if (transno > obd->obd_last_committed) {
+ CDEBUG(D_HA, "%s: transno "LPD64" committed\n",
+ obd->obd_name, transno);
obd->obd_last_committed = transno;
ptlrpc_commit_replies (obd);
+ } else {
+ CDEBUG(D_INFO, "%s: transno "LPD64" committed\n",
+ obd->obd_name, transno);
}
}