again in unused hash.
Severity : enhancement
+Bugzilla : 15899
+Description: File striping can now be set to use an arbitrary pool of OSTs.
+
+Severity : enhancement
Bugzilla : 16573
Description: Export bytes_read/bytes_write count on OSC/OST.
\fB[[!] --uid|-u N] [[!] --user|-U <name>]
\fB<dirname|filename>\fR
.br
+.B lfs osts
+.br
.B lfs getstripe [--obd|-O <uuid>] [--quiet|-q] [--verbose|-v]
- \fB[--recursive|-r] <dirname|filename>\fR
+ \fB[--recursive|-r] <dirname|filename>\fR
.br
.B lfs setstripe [--size|-s stripe-size] [--count|-c stripe-cnt]
- \fB[--index|-i start-ost] <filename|dirname>\fR
+ \fB[--offset|-o start-ost] [--pool|-p pool-name]
+ \fB<dir|filename>\fR
.br
-.B lfs setstripe -d <dirname>
+.B lfs setstripe -d <dir>
.br
-.B lfs quotachown [-i] <filesystem>
+.B lfs poollist <filesystem>[.<pool>] | <pathname>
+.br
+.B lfs quota [-v] [-o obd_uuid] [-u|-g] <username|groupname> <filesystem>
+.br
+.B lfs quota <filesystem>
+.br
+.B lfs quota -t [-u|-g] <filesystem>
.br
.B lfs quotacheck [-ug] <filesystem>
.br
+.B lfs quotachown [-i] <filesystem>
+.br
.B lfs quotaon [-ugf] <filesystem>
.br
.B lfs quotaoff [-ug] <filesystem>
.B find
To search the directory tree rooted at the given dir/file name for the files that match the given parameters: \fB--atime\fR (file was last accessed N*24 hours ago), \fB--ctime\fR (file's status was last changed N*24 hours ago), \fB--mtime\fR (file's data was last modified N*24 hours ago), \fB--obd\fR (file has an object on a specific OST or OSTs), \fB--size\fR (file has size in bytes, or \fBk\fRilo-, \fBM\fRega-, \fBG\fRiga-, \fBT\fRera-, \fBP\fReta-, or \fBE\fRxabytes if a suffix is given), \fB--type\fR (file has the type: \fBb\fRlock, \fBc\fRharacter, \fBd\fRirectory, \fBp\fRipe, \fBf\fRile, sym\fBl\fRink, \fBs\fRocket, or \fBD\fRoor (Solaris)), \fB--uid\fR (file has specific numeric user ID), \fB--user\fR (file owned by specific user, numeric user ID allowed), \fB--gid\fR (file has specific group ID), \fB--group\fR (file belongs to specific group, numeric group ID allowed). The option \fB--maxdepth\fR allows find to decend at most N levels of directory tree. The options \fB--print\fR and \fB--print0\fR print full file name, followed by a newline or NUL character correspondingly. Using \fB!\fR before an option negates its meaning (\fIfiles NOT matching the parameter\fR). Using \fB+\fR before a numeric value means \fIfiles with the parameter OR MORE\fR, while \fB-\fR before a numeric value means \fIfiles with the parameter OR LESS\fR.
.TP
-.B getstripe
-To list the striping info for a given filename or files in a directory, optionally recursively, for all files in a directory tree: \fB--quiet\fR (don't print object IDs), \fB--verbose\fR (print striping parameters), \fB--recursive\fR (recurse into subdirectories).
-.TP
.B osts
List all the OSTs for the filesystem
.TP
-.B setstripe [--size stripe-size] [--count stripe-cnt] [--index start-ost]
+.B getstripe
+To list the striping info for a given filename or files in a directory, optionally recursively, for all files in a directory tree: \fB--quiet\fR (don't print object IDs), \fB--verbose\fR (print striping parameters), \fB--recursive\fR (recurse into subdirectories).
+.TP
+.B setstripe [--size stripe-size] [--count stripe-cnt]
+ \fB[--offset start-ost] [--pool pool-name]\fR
+.br
To create a new file, or set the directory default, with the specified striping parameters. The
.I stripe-count
is the number of OSTs to stripe a file over. A
.I stripe-size
of 0 means to use the filesystem-wide default stripe size (default 1MB). The
.I start-ost
-is the OST index (starting at 0) on which to start striping for this file. A
+is the OST index (base 10, starting at 0) on which to start striping for this file. A
.I start-ost
-of -1 allows the MDS to specify the starting index and it is strongly
-recommended that the starting OST not be given, as this allows space and
-load balancing to be done by the MDS as needed.
+of -1 allows the MDS to choose the starting index and it is strongly recommended, as this allows space and load balancing to be done by the MDS as needed. The
+.I pool-name
+is the name of a predefined pool of OSTs (see
+.I lctl
+) that will be used for striping. The
+.I stripe-count, stripe-size, start-ost
+will be used as well; the
+.I start-ost
+must be part of the pool or an error will be returned.
.TP
-.B lfs setstripe -d
+.B setstripe -d
Delete the default striping on the specified directory.
.TP
+.B poollist <filesystem>[.<pool>] | <pathname>
+List the pools in \fBfilesystem\fR or \fBpathname\fR, or the OSTs in \fBfilesystem.pool\fR
+.TP
.B quotachown
To change files' owner and group on OSTs of the specified filesystem
.TP
extern int lprocfs_exp_setup(struct obd_export *exp,
lnet_nid_t *peer_nid, int *newnid);
extern int lprocfs_exp_cleanup(struct obd_export *exp);
-extern int lprocfs_add_simple(struct proc_dir_entry *root,
- char *name, cfs_read_proc_t *read_proc,
- cfs_write_proc_t *write_proc, void *data);
+extern cfs_proc_dir_entry_t *lprocfs_add_simple(struct proc_dir_entry *root,
+ char *name,
+ cfs_read_proc_t *read_proc,
+ cfs_write_proc_t *write_proc,
+ void *data,
+ struct file_operations *fops);
extern struct proc_dir_entry *lprocfs_add_symlink(const char *name,
struct proc_dir_entry *parent, const char *dest);
extern void lprocfs_free_per_client_stats(struct obd_device *obd);
extern int lprocfs_obd_setup(struct obd_device *obd, struct lprocfs_vars *list);
extern int lprocfs_obd_cleanup(struct obd_device *obd);
-extern int lprocfs_add_simple(struct proc_dir_entry *root, char *name,
- cfs_read_proc_t *read_proc,
- cfs_write_proc_t *write_proc,
- void *data);
extern void lprocfs_free_per_client_stats(struct obd_device *obd);
extern struct file_operations lprocfs_evict_client_fops;
{ return 0; }
static inline int lprocfs_exp_cleanup(struct obd_export *exp)
{ return 0; }
-static inline int lprocfs_add_simple(struct proc_dir_entry *root,
+static inline cfs_proc_dir_entry_t *lprocfs_add_simple(struct proc_dir_entry *root,
char *name,
cfs_read_proc_t *read_proc,
cfs_write_proc_t *write_proc,
- void *data)
+ void *data,
+ struct file_operations *fops)
{return 0; }
static inline struct proc_dir_entry *lprocfs_add_symlink(const char *name,
struct proc_dir_entry *parent, const char *dest)
extern int llapi_file_open(const char *name, int flags, int mode,
unsigned long stripe_size, int stripe_offset,
int stripe_count, int stripe_pattern);
+extern int llapi_file_create_pool(const char *name, unsigned long stripe_size,
+ int stripe_offset, int stripe_count,
+ int stripe_pattern, char *pool_name);
+extern int llapi_file_open_pool(const char *name, int flags, int mode,
+ unsigned long stripe_size, int stripe_offset,
+ int stripe_count, int stripe_pattern,
+ char *pool_name);
+extern int llapi_poollist(char *name);
extern int llapi_file_get_stripe(const char *path, struct lov_user_md *lum);
#define HAVE_LLAPI_FILE_LOOKUP
extern int llapi_file_lookup(int dirfd, const char *name);
exclude_gid:1,
exclude_uid:1,
check_gid:1,
- check_uid:1;
+ check_uid:1,
+ check_pool:1,
+ exclude_pool:1;
int verbose;
int quiet;
/* In-precess parameters. */
unsigned int depth;
dev_t st_dev;
+
+ char poolname[MAXPOOLNAME+1];
};
extern int llapi_getstripe(char *path, struct find_param *param);
extern int llapi_target_check(int num_types, char **obd_types, char *dir);
extern int llapi_catinfo(char *dir, char *keyword, char *node_name);
extern int llapi_file_get_lov_uuid(const char *path, struct obd_uuid *lov_uuid);
-extern int llapi_file_get_lov_fuuid(int fd, struct obd_uuid *lov_uuid);
+extern int llapi_file_fget_lov_uuid(int fd, struct obd_uuid *lov_uuid);
extern int llapi_lov_get_uuids(int fd, struct obd_uuid *uuidp, int *ost_count);
extern int llapi_is_lustre_mnttype(const char *type);
extern int parse_size(char *optarg, unsigned long long *size,
*b=10600 */
#define OBD_CONNECT_CKSUM 0x20000000ULL /* support several cksum algos */
#define OBD_CONNECT_FID 0x40000000ULL /* FID is supported by server */
+#define OBD_CONNECT_LOV_V3 0x100000000ULL /* client supports lov v3 ea */
/* also update obd_connect_names[] for lprocfs_rd_connect_flags()
* and lustre/utils/wirecheck.c */
OBD_CONNECT_MDS_CAPA | OBD_CONNECT_OSS_CAPA | \
OBD_CONNECT_MDS_MDS | OBD_CONNECT_CANCELSET | \
OBD_CONNECT_FID | \
- LRU_RESIZE_CONNECT_FLAG | OBD_CONNECT_AT)
+ LRU_RESIZE_CONNECT_FLAG | OBD_CONNECT_AT | \
+ OBD_CONNECT_LOV_V3)
#define OST_CONNECT_SUPPORTED (OBD_CONNECT_SRVLOCK | OBD_CONNECT_GRANT | \
OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | \
OBD_CONNECT_TRUNCLOCK | OBD_CONNECT_INDEX | \
#define LOV_MAGIC_V1 0x0BD10BD0
#define LOV_MAGIC LOV_MAGIC_V1
#define LOV_MAGIC_JOIN 0x0BD20BD0
+#define LOV_MAGIC_V3 0x0BD30BD0
#define LOV_PATTERN_RAID0 0x001 /* stripes are used round-robin */
#define LOV_PATTERN_RAID1 0x002 /* stripes are mirrors of each other */
#define LOV_OBJECT_GROUP_DEFAULT ~0ULL
#define LOV_OBJECT_GROUP_CLEAR 0ULL
+#define MAXPOOLNAME 16
+#define POOLNAMEF "%.16s"
+
#define lov_ost_data lov_ost_data_v1
struct lov_ost_data_v1 { /* per-stripe data structure (little-endian)*/
__u64 l_object_id; /* OST object ID */
struct lov_ost_data_v1 lmm_objects[0]; /* per-stripe data */
};
-extern void lustre_swab_lov_mds_md(struct lov_mds_md *llm);
+/* extern void lustre_swab_lov_mds_md(struct lov_mds_md *llm); */
#define MAX_MD_SIZE (sizeof(struct lov_mds_md) + 4 * sizeof(struct lov_ost_data))
#define MIN_MD_SIZE (sizeof(struct lov_mds_md) + 1 * sizeof(struct lov_ost_data))
#define XATTR_NAME_ACL_DEFAULT "system.posix_acl_default"
#define XATTR_NAME_LOV "trusted.lov"
+struct lov_mds_md_v3 { /* LOV EA mds/wire data (little-endian) */
+ __u32 lmm_magic; /* magic number = LOV_MAGIC_V3 */
+ __u32 lmm_pattern; /* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */
+ __u64 lmm_object_id; /* LOV object ID */
+ __u64 lmm_object_gr; /* LOV object group */
+ __u32 lmm_stripe_size; /* size of stripe in bytes */
+ __u32 lmm_stripe_count; /* num stripes in use for this object */
+ char lmm_pool_name[MAXPOOLNAME]; /* must be 32bit aligned */
+ struct lov_ost_data_v1 lmm_objects[0]; /* per-stripe data */
+};
+
+
#define OBD_MD_FLID (0x00000001ULL) /* object ID */
#define OBD_MD_FLATIME (0x00000002ULL) /* access time */
#define OBD_MD_FLMTIME (0x00000004ULL) /* data modification time */
extern void lustre_swab_ost_last_id(obd_id *id);
extern void lustre_swab_fiemap(struct ll_user_fiemap *fiemap);
-extern void lustre_swab_lov_user_md(struct lov_user_md *lum);
-extern void lustre_swab_lov_user_md_objects(struct lov_user_md *lum);
+extern void lustre_swab_lov_user_md_v1(struct lov_user_md_v1 *lum);
+extern void lustre_swab_lov_user_md_v3(struct lov_user_md_v3 *lum);
+extern void lustre_swab_lov_user_md_objects(struct lov_user_ost_data *lod,
+ int stripe_count);
extern void lustre_swab_lov_user_md_join(struct lov_user_md_join *lumj);
/* llog_swab.c */
#define LOV_USER_MAGIC_V1 0x0BD10BD0
#define LOV_USER_MAGIC LOV_USER_MAGIC_V1
-
#define LOV_USER_MAGIC_JOIN 0x0BD20BD0
+#define LOV_USER_MAGIC_V3 0x0BD30BD0
#define LOV_PATTERN_RAID0 0x001
#define LOV_PATTERN_RAID1 0x002
#define LOV_PATTERN_FIRST 0x100
+#define MAXPOOLNAME 16
+
#define lov_user_ost_data lov_user_ost_data_v1
struct lov_user_ost_data_v1 { /* per-stripe data structure */
__u64 l_object_id; /* OST object ID */
struct lov_user_ost_data_v1 lmm_objects[0]; /* per-stripe data */
} __attribute__((packed));
+struct lov_user_md_v3 { /* LOV EA user data (host-endian) */
+ __u32 lmm_magic; /* magic number = LOV_USER_MAGIC_V3 */
+ __u32 lmm_pattern; /* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */
+ __u64 lmm_object_id; /* LOV object ID */
+ __u64 lmm_object_gr; /* LOV object group */
+ __u32 lmm_stripe_size; /* size of stripe in bytes */
+ __u16 lmm_stripe_count; /* num stripes in use for this object */
+ __u16 lmm_stripe_offset; /* starting stripe offset in lmm_objects */
+ char lmm_pool_name[MAXPOOLNAME]; /* pool name */
+ struct lov_user_ost_data_v1 lmm_objects[0]; /* per-stripe data */
+} __attribute__((packed));
+
/* Compile with -D_LARGEFILE64_SOURCE or -D_GNU_SOURCE (or #define) to
* use this. It is unsafe to #define those values in this header as it
* is possible the application has already #included <sys/stat.h>. */
#define lov_user_mds_data lov_user_mds_data_v1
struct lov_user_mds_data_v1 {
lstat_t lmd_st; /* MDS stat struct */
- struct lov_user_md_v1 lmd_lmm; /* LOV EA user data */
+ struct lov_user_md_v1 lmd_lmm; /* LOV EA V1 user data */
+} __attribute__((packed));
+
+struct lov_user_mds_data_v3 {
+ lstat_t lmd_st; /* MDS stat struct */
+ struct lov_user_md_v3 lmd_lmm; /* LOV EA V3 user data */
} __attribute__((packed));
#endif
LCFG_ADD_MDC = 0x00cf014,
LCFG_DEL_MDC = 0x00cf015,
LCFG_SPTLRPC_CONF = 0x00ce016,
+ LCFG_POOL_NEW = 0x00ce020,
+ LCFG_POOL_ADD = 0x00ce021,
+ LCFG_POOL_REM = 0x00ce022,
+ LCFG_POOL_DEL = 0x00ce023,
};
struct lustre_cfg_bufs {
OBD_ALLOC(lcfg, lustre_cfg_len(bufs->lcfg_bufcount,
bufs->lcfg_buflen));
if (!lcfg)
- RETURN(lcfg);
+ RETURN(ERR_PTR(-ENOMEM));
lcfg->lcfg_version = LUSTRE_CFG_VERSION;
lcfg->lcfg_command = cmd;
#define OBD_IOC_DUMP_LOG _IOWR('f', 185, OBD_IOC_DATA_TYPE)
#define OBD_IOC_CLEAR_LOG _IOWR('f', 186, OBD_IOC_DATA_TYPE)
#define OBD_IOC_PARAM _IOW ('f', 187, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_POOL _IOWR('f', 188, OBD_IOC_DATA_TYPE)
#define OBD_IOC_CATLOGLIST _IOWR('f', 190, OBD_IOC_DATA_TYPE)
#define OBD_IOC_LLOG_INFO _IOWR('f', 191, OBD_IOC_DATA_TYPE)
__u32 lw_stripe_size; /* size of the stripe */
__u32 lw_pattern; /* striping pattern (RAID0, RAID1) */
unsigned lw_stripe_count; /* number of objects being striped over */
+ char lw_pool_name[MAXPOOLNAME]; /* pool name */
} lsm_wire;
struct lov_array_info *lsm_array; /*Only for joined file array info*/
#define lsm_stripe_size lsm_wire.lw_stripe_size
#define lsm_pattern lsm_wire.lw_pattern
#define lsm_stripe_count lsm_wire.lw_stripe_count
+#define lsm_pool_name lsm_wire.lw_pool_name
struct obd_info;
unsigned int ltq_usable:1; /* usable for striping */
};
+/* Generic subset of OSTs */
+struct ost_pool {
+ __u32 *op_array; /* array of index of
+ lov_obd->lov_tgts */
+ unsigned int op_count; /* number of OSTs in the array */
+ unsigned int op_size; /* allocated size of lp_array */
+ rwlock_t op_rwlock; /* to protect lov_pool use */
+};
+
+/* Round-robin allocator data */
+struct lov_qos_rr {
+ __u32 lqr_start_idx; /* start index of new inode */
+ __u32 lqr_offset_idx; /* aliasing for start_idx */
+ int lqr_start_count; /* reseed counter */
+ struct ost_pool lqr_pool; /* round-robin optimized list */
+ unsigned long lqr_dirty:1; /* recalc round-robin list */
+};
+
+/* Stripe placement optimization */
struct lov_qos {
struct list_head lq_oss_list; /* list of OSSs that targets use */
struct rw_semaphore lq_rw_sem;
__u32 lq_active_oss_count;
- __u32 *lq_rr_array; /* round-robin optimized list */
- unsigned int lq_rr_size; /* rr array size */
unsigned int lq_prio_free; /* priority for free space */
+ struct lov_qos_rr lq_rr; /* round robin qos data */
unsigned long lq_dirty:1, /* recalc qos data */
- lq_dirty_rr:1, /* recalc round-robin list */
lq_same_space:1,/* the ost's all have approx.
the same space avail */
lq_reset:1; /* zero current penalties */
ltd_reap:1; /* should this target be deleted */
};
+/* Pool metadata */
+#define pool_tgt_size(_p) _p->pool_obds.op_size
+#define pool_tgt_count(_p) _p->pool_obds.op_count
+#define pool_tgt_array(_p) _p->pool_obds.op_array
+#define pool_tgt_rwlock(_p) _p->pool_obds.op_rwlock
+#define pool_tgt(_p, _i) _p->pool_lov->lov_tgts[_p->pool_obds.op_array[_i]]
+
+struct pool_desc {
+ char pool_name[MAXPOOLNAME + 1]; /* name of pool */
+ struct ost_pool pool_obds; /* pool members */
+ struct lov_qos_rr pool_rr; /* round robin qos */
+ struct hlist_node pool_hash; /* access by poolname */
+ struct list_head pool_list; /* serial access */
+ cfs_proc_dir_entry_t *pool_proc_entry; /* file in /proc */
+ struct lov_obd *pool_lov; /* lov obd to which this
+ pool belong */
+};
+
struct lov_obd {
struct lov_desc desc;
- struct lov_tgt_desc **lov_tgts;
+ struct lov_tgt_desc **lov_tgts; /* sparse array */
+ struct ost_pool lov_packed; /* all OSTs in a packed
+ array */
struct semaphore lov_lock;
struct obd_connect_data lov_ocd;
struct lov_qos lov_qos; /* qos info per lov */
__u32 lov_active_tgt_count; /* how many active */
__u32 lov_death_row;/* tgts scheduled to be deleted */
__u32 lov_tgt_size; /* size of tgts array */
- __u32 lov_start_idx; /* start index of new inode */
- __u32 lov_offset_idx; /* aliasing for start_idx */
- int lov_start_count;/* reseed counter */
int lov_connects;
obd_page_removal_cb_t lov_page_removal_cb;
obd_pin_extent_cb lov_page_pin_cb;
obd_lock_cancel_cb lov_lock_cancel_cb;
+ int lov_pool_count;
+ lustre_hash_t *lov_pools_hash_body; /* used for key access */
+ struct list_head lov_pool_list; /* used for sequential access */
+ cfs_proc_dir_entry_t *lov_pool_proc_entry;
};
struct lmv_tgt_desc {
obd_lock_cancel_cb cb);
int (*o_unregister_lock_cancel_cb)(struct obd_export *exp,
obd_lock_cancel_cb cb);
-
+ /* pools methods */
+ int (*o_pool_new)(struct obd_device *obd, char *poolname);
+ int (*o_pool_del)(struct obd_device *obd, char *poolname);
+ int (*o_pool_add)(struct obd_device *obd, char *poolname,
+ char *ostname);
+ int (*o_pool_rem)(struct obd_device *obd, char *poolname,
+ char *ostname);
/*
* NOTE: If adding ops, add another LPROCFS_OBD_OP_INIT() line
* to lprocfs_alloc_obd_stats() in obdclass/lprocfs_status.c.
struct lov_mds_md *lmm);
};
-extern struct lsm_operations lsm_plain_ops;
+extern struct lsm_operations lsm_v1_ops;
extern struct lsm_operations lsm_join_ops;
+extern struct lsm_operations lsm_v3_ops;
static inline struct lsm_operations *lsm_op_find(int magic)
{
switch(magic) {
- case LOV_MAGIC:
- return &lsm_plain_ops;
+ case LOV_MAGIC_V1:
+ return &lsm_v1_ops;
case LOV_MAGIC_JOIN:
return &lsm_join_ops;
+ case LOV_MAGIC_V3:
+ return &lsm_v3_ops;
default:
CERROR("Cannot recognize lsm_magic %d\n", magic);
return NULL;
RETURN(rc);
}
+static inline int obd_pool_new(struct obd_device *obd, char *poolname)
+{
+ int rc;
+ ENTRY;
+
+ OBD_CHECK_DT_OP(obd, pool_new, -EOPNOTSUPP);
+ OBD_COUNTER_INCREMENT(obd, pool_new);
+
+ rc = OBP(obd, pool_new)(obd, poolname);
+ RETURN(rc);
+}
+
+static inline int obd_pool_del(struct obd_device *obd, char *poolname)
+{
+ int rc;
+ ENTRY;
+
+ OBD_CHECK_DT_OP(obd, pool_del, -EOPNOTSUPP);
+ OBD_COUNTER_INCREMENT(obd, pool_del);
+
+ rc = OBP(obd, pool_del)(obd, poolname);
+ RETURN(rc);
+}
+
+static inline int obd_pool_add(struct obd_device *obd, char *poolname, char *ostname)
+{
+ int rc;
+ ENTRY;
+
+ OBD_CHECK_DT_OP(obd, pool_add, -EOPNOTSUPP);
+ OBD_COUNTER_INCREMENT(obd, pool_add);
+
+ rc = OBP(obd, pool_add)(obd, poolname, ostname);
+ RETURN(rc);
+}
+
+static inline int obd_pool_rem(struct obd_device *obd, char *poolname, char *ostname)
+{
+ int rc;
+ ENTRY;
+
+ OBD_CHECK_DT_OP(obd, pool_rem, -EOPNOTSUPP);
+ OBD_COUNTER_INCREMENT(obd, pool_rem);
+
+ rc = OBP(obd, pool_rem)(obd, poolname, ostname);
+ RETURN(rc);
+}
+
static inline int obd_init_export(struct obd_export *exp)
{
int rc = 0;
return sizeof(struct lov_stripe_md) + stripes*sizeof(struct lov_oinfo*);
}
-#define lov_mds_md_size(stripes) lov_mds_md_v1_size(stripes)
-static inline int lov_mds_md_v1_size(int stripes)
+static inline int lov_mds_md_size(int stripes, int lmm_magic)
{
- return sizeof(struct lov_mds_md_v1) +
- stripes * sizeof(struct lov_ost_data_v1);
+ if (lmm_magic == LOV_MAGIC_V3)
+ return sizeof(struct lov_mds_md_v3) +
+ stripes * sizeof(struct lov_ost_data_v1);
+ else
+ return sizeof(struct lov_mds_md_v1) +
+ stripes * sizeof(struct lov_ost_data_v1);
}
+
#define IOC_LOV_TYPE 'g'
#define IOC_LOV_MIN_NR 50
#define IOC_LOV_SET_OSC_ACTIVE _IOWR('g', 50, long)
cli->cl_import = imp;
/* cli->cl_max_mds_{easize,cookiesize} updated by mdc_init_ea_size() */
- cli->cl_max_mds_easize = sizeof(struct lov_mds_md);
+ cli->cl_max_mds_easize = sizeof(struct lov_mds_md_v3);
cli->cl_max_mds_cookiesize = sizeof(struct llog_cookie);
if (LUSTRE_CFG_BUFLEN(lcfg, 3) > 0) {
if (rc)
return(-EFAULT);
- if (lum.lmm_magic != LOV_USER_MAGIC)
+ switch (lum.lmm_magic) {
+ case LOV_USER_MAGIC_V1: {
+ if (lum.lmm_magic != cpu_to_le32(LOV_USER_MAGIC_V1))
+ lustre_swab_lov_user_md_v1(&lum);
+ break;
+ }
+ case LOV_USER_MAGIC_V3: {
+ if (lum.lmm_magic != cpu_to_le32(LOV_USER_MAGIC_V3))
+ lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)&lum);
+ break;
+ }
+ default: {
+ CDEBUG(D_IOCTL, "bad userland LOV MAGIC:"
+ " %#08x != %#08x nor %#08x\n",
+ lum.lmm_magic, LOV_USER_MAGIC_V1,
+ LOV_USER_MAGIC_V3);
RETURN(-EINVAL);
-
- if (lum.lmm_magic != cpu_to_le32(LOV_USER_MAGIC))
- lustre_swab_lov_user_md(&lum);
+ }
+ }
/* swabbing is done in lov_setstripe() on server side */
rc = md_setattr(sbi->ll_md_exp, &op_data, &lum,
static int
llu_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp)
{
- struct lov_stripe_md lsm = { .lsm_magic = LOV_MAGIC };
+ /* even if default lov is LOV_MAGIC_V1 we use LOV_MAGIC_V3
+ * to be sure buffer are large enough */
+ struct lov_stripe_md lsm = { .lsm_magic = LOV_MAGIC_V3 };
__u32 valsize = sizeof(struct lov_desc);
int rc, easize, def_easize, cookiesize;
struct lov_desc desc;
struct lustre_sb_info *lsi = s2lsi(inode->i_sb);
struct obd_device *mgc = lsi->lsi_mgc;
char *fsname = NULL, *param = NULL;
+ int lum_size;
/*
* This is coming from userspace, so should be in
* local endian. But the MDS would like it in little
* endian, so we swab it before we send it.
*/
- if (lump->lmm_magic != LOV_USER_MAGIC)
+ switch (lump->lmm_magic) {
+ case LOV_USER_MAGIC_V1: {
+ if (lump->lmm_magic != cpu_to_le32(LOV_USER_MAGIC_V1))
+ lustre_swab_lov_user_md_v1(lump);
+ lum_size = sizeof(struct lov_user_md_v1);
+ break;
+ }
+ case LOV_USER_MAGIC_V3: {
+ if (lump->lmm_magic != cpu_to_le32(LOV_USER_MAGIC_V3))
+ lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lump);
+ lum_size = sizeof(struct lov_user_md_v3);
+ break;
+ }
+ default: {
+ CDEBUG(D_IOCTL, "bad userland LOV MAGIC:"
+ " %#08x != %#08x nor %#08x\n",
+ lump->lmm_magic, LOV_USER_MAGIC_V1,
+ LOV_USER_MAGIC_V3);
RETURN(-EINVAL);
-
- if (lump->lmm_magic != cpu_to_le32(LOV_USER_MAGIC))
- lustre_swab_lov_user_md(lump);
+ }
+ }
op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
LUSTRE_OPC_ANY, NULL);
RETURN(PTR_ERR(op_data));
/* swabbing is done in lov_setstripe() on server side */
- rc = md_setattr(sbi->ll_md_exp, op_data, lump, sizeof(*lump),
+ rc = md_setattr(sbi->ll_md_exp, op_data, lump, lum_size,
NULL, 0, &req, NULL);
ll_finish_md_op_data(op_data);
ptlrpc_req_finished(req);
CERROR("mdc_setattr fails: rc = %d\n", rc);
}
+ /* In the following we use the fact that LOV_USER_MAGIC_V1 and
+ LOV_USER_MAGIC_V3 have the same initial fields so we do not
+ need the make the distiction between the 2 versions */
if (set_default && mgc->u.cli.cl_mgc_mgsexp) {
OBD_ALLOC(param, MGS_PARAM_MAXLEN);
* little endian. We convert it to host endian before
* passing it to userspace.
*/
- if (lmm->lmm_magic == __swab32(LOV_MAGIC)) {
- lustre_swab_lov_user_md((struct lov_user_md *)lmm);
+ /* We don't swab objects for directories */
+ switch (le32_to_cpu(lmm->lmm_magic)) {
+ case LOV_MAGIC_V1:
+ if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC))
+ lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
+ break;
+ case LOV_MAGIC_V3:
+ if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC))
+ lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
+ break;
+ default:
+ CERROR("unknown magic: %lX\n", (unsigned long)lmm->lmm_magic);
+ rc = -EPROTO;
}
out:
*lmmp = lmm;
return rc;
}
case LL_IOC_LOV_SETSTRIPE: {
- struct lov_user_md lum, *lump = (struct lov_user_md *)arg;
+ struct lov_user_md_v3 lumv3;
+ struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
+ struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
+ struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
+
int rc = 0;
int set_default = 0;
- LASSERT(sizeof(lum) == sizeof(*lump));
- LASSERT(sizeof(lum.lmm_objects[0]) ==
- sizeof(lump->lmm_objects[0]));
- rc = copy_from_user(&lum, lump, sizeof(lum));
+ LASSERT(sizeof(lumv3) == sizeof(*lumv3p));
+ LASSERT(sizeof(lumv3.lmm_objects[0]) ==
+ sizeof(lumv3p->lmm_objects[0]));
+ /* first try with v1 which is smaller than v3 */
+ rc = copy_from_user(lumv1, lumv1p, sizeof(*lumv1));
if (rc)
RETURN(-EFAULT);
+ if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
+ rc = copy_from_user(&lumv3, lumv3p, sizeof(lumv3));
+ if (rc)
+ RETURN(-EFAULT);
+ }
+
if (inode->i_sb->s_root == file->f_dentry)
set_default = 1;
- rc = ll_dir_setstripe(inode, &lum, set_default);
+ /* in v1 and v3 cases lumv1 points to data */
+ rc = ll_dir_setstripe(inode, lumv1, set_default);
RETURN(rc);
}
if (rc)
GOTO(free_lmm, rc = -EFAULT);
+ switch (lmm->lmm_magic) {
+ case LOV_USER_MAGIC_V1:
+ if (LOV_USER_MAGIC_V1 == cpu_to_le32(LOV_USER_MAGIC_V1))
+ break;
+ /* swab objects first so that stripes num will be sane */
+ lustre_swab_lov_user_md_objects(
+ ((struct lov_user_md_v1 *)lmm)->lmm_objects,
+ ((struct lov_user_md_v1 *)lmm)->lmm_stripe_count);
+ lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
+ break;
+ case LOV_USER_MAGIC_V3:
+ if (LOV_USER_MAGIC_V3 == cpu_to_le32(LOV_USER_MAGIC_V3))
+ break;
+ /* swab objects first so that stripes num will be sane */
+ lustre_swab_lov_user_md_objects(
+ ((struct lov_user_md_v3 *)lmm)->lmm_objects,
+ ((struct lov_user_md_v3 *)lmm)->lmm_stripe_count);
+ lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
+ break;
+ default:
+ GOTO(free_lmm, rc = -EINVAL);
+ }
+
rc = obd_unpackmd(sbi->ll_dt_exp, &lsm, lmm, lmmsize);
if (rc < 0)
GOTO(free_lmm, rc = -ENOMEM);
lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
LASSERT(lmm != NULL);
+ if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
+ (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3)) &&
+ (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_JOIN))) {
+ GOTO(out, rc = -EPROTO);
+ }
+
/*
* This is coming from the MDS, so is probably in
* little endian. We convert it to host endian before
* passing it to userspace.
*/
- if (lmm->lmm_magic == __swab32(LOV_MAGIC)) {
- lustre_swab_lov_user_md((struct lov_user_md *)lmm);
- lustre_swab_lov_user_md_objects((struct lov_user_md *)lmm);
- } else if (lmm->lmm_magic == __swab32(LOV_MAGIC_JOIN)) {
- lustre_swab_lov_user_md_join((struct lov_user_md_join *)lmm);
+ if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
+ /* if function called for directory - we should
+ * avoid swab not existent lsm objects */
+ if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
+ lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
+ if (S_ISREG(body->mode))
+ lustre_swab_lov_user_md_objects(
+ ((struct lov_user_md_v1 *)lmm)->lmm_objects,
+ ((struct lov_user_md_v1 *)lmm)->lmm_stripe_count);
+ } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
+ lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
+ if (S_ISREG(body->mode))
+ lustre_swab_lov_user_md_objects(
+ ((struct lov_user_md_v3 *)lmm)->lmm_objects,
+ ((struct lov_user_md_v3 *)lmm)->lmm_stripe_count);
+ } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_JOIN)) {
+ lustre_swab_lov_user_md_join((struct lov_user_md_join *)lmm);
+ }
}
if (lmm->lmm_magic == LOV_MAGIC_JOIN) {
static int ll_lov_setstripe(struct inode *inode, struct file *file,
unsigned long arg)
{
- struct lov_user_md lum, *lump = (struct lov_user_md *)arg;
+ struct lov_user_md_v3 lumv3;
+ struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
+ struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
+ struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
+ int lum_size;
int rc;
int flags = FMODE_WRITE;
ENTRY;
- /* Bug 1152: copy properly when this is no longer true */
- LASSERT(sizeof(lum) == sizeof(*lump));
- LASSERT(sizeof(lum.lmm_objects[0]) == sizeof(lump->lmm_objects[0]));
- rc = copy_from_user(&lum, lump, sizeof(lum));
+ /* first try with v1 which is smaller than v3 */
+ lum_size = sizeof(struct lov_user_md_v1);
+ rc = copy_from_user(lumv1, lumv1p, lum_size);
if (rc)
RETURN(-EFAULT);
- rc = ll_lov_setstripe_ea_info(inode, file, flags, &lum, sizeof(lum));
+ if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
+ lum_size = sizeof(struct lov_user_md_v3);
+ rc = copy_from_user(&lumv3, lumv3p, lum_size);
+ if (rc)
+ RETURN(-EFAULT);
+ }
+
+ rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
if (rc == 0) {
- put_user(0, &lump->lmm_stripe_count);
+ put_user(0, &lumv1p->lmm_stripe_count);
rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
- 0, ll_i2info(inode)->lli_smd, lump);
+ 0, ll_i2info(inode)->lli_smd,
+ (void *)arg);
}
RETURN(rc);
}
* calculate this (via a call into the LOV + OSCs) each time we make an RPC. */
static int ll_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp)
{
- struct lov_stripe_md lsm = { .lsm_magic = LOV_MAGIC };
+ struct lov_stripe_md lsm = { .lsm_magic = LOV_MAGIC_V3 };
__u32 valsize = sizeof(struct lov_desc);
int rc, easize, def_easize, cookiesize;
struct lov_desc desc;
OBD_CONNECT_JOIN | OBD_CONNECT_ATTRFID |
OBD_CONNECT_VERSION | OBD_CONNECT_MDS_CAPA |
OBD_CONNECT_OSS_CAPA | OBD_CONNECT_CANCELSET|
- OBD_CONNECT_FID | OBD_CONNECT_AT;
+ OBD_CONNECT_FID | OBD_CONNECT_AT |
+ OBD_CONNECT_LOV_V3;
#ifdef HAVE_LRU_RESIZE_SUPPORT
if (sbi->ll_flags & LL_SBI_LRU_RESIZE)
LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0));
if (lsm != NULL) {
if (lli->lli_smd == NULL) {
- if (lsm->lsm_magic != LOV_MAGIC &&
+ if (lsm->lsm_magic != LOV_MAGIC_V1 &&
+ lsm->lsm_magic != LOV_MAGIC_V3 &&
lsm->lsm_magic != LOV_MAGIC_JOIN) {
dump_lsm(D_ERROR, lsm);
LBUG();
MODULES := lov
-lov-objs := lov_log.o lov_obd.o lov_pack.o lproc_lov.o lov_offset.o lov_merge.o lov_request.o lov_qos.o lov_ea.o
+lov-objs := lov_log.o lov_obd.o lov_pack.o lproc_lov.o lov_offset.o lov_merge.o lov_request.o lov_qos.o lov_ea.o lov_pool.o
@INCLUDE_RULES@
if LIBLUSTRE
noinst_LIBRARIES = liblov.a
-liblov_a_SOURCES = lov_log.c lov_obd.c lov_pack.c lov_request.c lov_offset.c lov_qos.c lov_merge.c lov_ea.c lov_internal.h
+liblov_a_SOURCES = lov_log.c lov_pool.c lov_obd.c lov_pack.c lov_request.c lov_offset.c lov_qos.c lov_merge.c lov_ea.c lov_internal.h
liblov_a_CPPFLAGS = $(LLCPPFLAGS)
liblov_a_CFLAGS = $(LLCFLAGS)
endif
lov_SOURCES := \
lov_log.c \
+ lov_pool.c \
lov_obd.c \
lov_pack.c \
lov_request.c \
if (stripe_count == 0 || stripe_count > LOV_V1_INSANE_STRIPE_COUNT) {
CERROR("bad stripe count %d\n", stripe_count);
- lov_dump_lmm_v1(D_WARNING, lmm);
+ lov_dump_lmm(D_WARNING, lmm);
return -EINVAL;
}
-
+
if (lmm->lmm_object_id == 0) {
CERROR("zero object id\n");
- lov_dump_lmm_v1(D_WARNING, lmm);
+ lov_dump_lmm(D_WARNING, lmm);
return -EINVAL;
}
-
+
if (lmm->lmm_pattern != cpu_to_le32(LOV_PATTERN_RAID0)) {
CERROR("bad striping pattern\n");
- lov_dump_lmm_v1(D_WARNING, lmm);
+ lov_dump_lmm(D_WARNING, lmm);
return -EINVAL;
}
0xffffffff)) {
CERROR("bad stripe size %u\n",
le32_to_cpu(lmm->lmm_stripe_size));
- lov_dump_lmm_v1(D_WARNING, lmm);
+ lov_dump_lmm(D_WARNING, lmm);
return -EINVAL;
}
return 0;
lsm->lsm_oinfo[i] = loi;
}
lsm->lsm_stripe_count = stripe_count;
+ lsm->lsm_pool_name[0] = '\0';
return lsm;
err:
static void lsm_unpackmd_common(struct lov_stripe_md *lsm,
struct lov_mds_md *lmm)
{
+ /*
+ * This supposes lov_mds_md_v1/v3 first fields are
+ * are the same
+ */
lsm->lsm_object_id = le64_to_cpu(lmm->lmm_object_id);
lsm->lsm_object_gr = le64_to_cpu(lmm->lmm_object_gr);
lsm->lsm_stripe_size = le32_to_cpu(lmm->lmm_stripe_size);
lsm->lsm_pattern = le32_to_cpu(lmm->lmm_pattern);
+ lsm->lsm_pool_name[0] = '\0';
}
static void
return 0;
}
-static int lsm_lmm_verify_plain(struct lov_mds_md *lmm, int lmm_bytes,
+static int lsm_lmm_verify_v1(struct lov_mds_md_v1 *lmm, int lmm_bytes,
int *stripe_count)
{
if (lmm_bytes < sizeof(*lmm)) {
- CERROR("lov_mds_md too small: %d, need at least %d\n",
+ CERROR("lov_mds_md_v1 too small: %d, need at least %d\n",
lmm_bytes, (int)sizeof(*lmm));
return -EINVAL;
}
*stripe_count = le32_to_cpu(lmm->lmm_stripe_count);
- if (lmm_bytes < lov_mds_md_v1_size(*stripe_count)) {
- CERROR("LOV EA too small: %d, need %d\n",
- lmm_bytes, lov_mds_md_v1_size(*stripe_count));
+ if (lmm_bytes < lov_mds_md_size(*stripe_count, LOV_MAGIC_V1)) {
+ CERROR("LOV EA V1 too small: %d, need %d\n",
+ lmm_bytes, lov_mds_md_size(*stripe_count, LOV_MAGIC_V1));
lov_dump_lmm_v1(D_WARNING, lmm);
return -EINVAL;
}
return lsm_lmm_verify_common(lmm, lmm_bytes, *stripe_count);
}
-int lsm_unpackmd_plain(struct lov_obd *lov, struct lov_stripe_md *lsm,
+int lsm_unpackmd_v1(struct lov_obd *lov, struct lov_stripe_md *lsm,
struct lov_mds_md_v1 *lmm)
{
struct lov_oinfo *loi;
return 0;
}
-struct lsm_operations lsm_plain_ops = {
+struct lsm_operations lsm_v1_ops = {
.lsm_free = lsm_free_plain,
.lsm_destroy = lsm_destroy_plain,
.lsm_stripe_by_index = lsm_stripe_by_index_plain,
.lsm_stripe_offset_by_index = lsm_stripe_offset_by_index_plain,
.lsm_stripe_offset_by_offset = lsm_stripe_offset_by_offset_plain,
.lsm_stripe_index_by_offset = lsm_stripe_index_by_offset_plain,
- .lsm_lmm_verify = lsm_lmm_verify_plain,
- .lsm_unpackmd = lsm_unpackmd_plain,
+ .lsm_lmm_verify = lsm_lmm_verify_v1,
+ .lsm_unpackmd = lsm_unpackmd_v1,
};
struct lov_extent *lovea_off2le(struct lov_stripe_md *lsm, obd_off lov_off)
.lsm_lmm_verify = lsm_lmm_verify_join,
.lsm_unpackmd = lsm_unpackmd_join,
};
+
+
+static int lsm_lmm_verify_v3(struct lov_mds_md *lmmv1, int lmm_bytes,
+ int *stripe_count)
+{
+ struct lov_mds_md_v3 *lmm;
+
+ lmm = (struct lov_mds_md_v3 *)lmmv1;
+
+ if (lmm_bytes < sizeof(*lmm)) {
+ CERROR("lov_mds_md_v3 too small: %d, need at least %d\n",
+ lmm_bytes, (int)sizeof(*lmm));
+ return -EINVAL;
+ }
+
+ *stripe_count = le32_to_cpu(lmm->lmm_stripe_count);
+
+ if (lmm_bytes < lov_mds_md_size(*stripe_count, LOV_MAGIC_V3)) {
+ CERROR("LOV EA V3 too small: %d, need %d\n",
+ lmm_bytes, lov_mds_md_size(*stripe_count, LOV_MAGIC_V3));
+ lov_dump_lmm_v3(D_WARNING, lmm);
+ return -EINVAL;
+ }
+
+ return lsm_lmm_verify_common((struct lov_mds_md_v1 *)lmm, lmm_bytes,
+ *stripe_count);
+}
+
+int lsm_unpackmd_v3(struct lov_obd *lov, struct lov_stripe_md *lsm,
+ struct lov_mds_md *lmmv1)
+{
+ struct lov_mds_md_v3 *lmm;
+ struct lov_oinfo *loi;
+ int i;
+
+ lmm = (struct lov_mds_md_v3 *)lmmv1;
+
+ lsm_unpackmd_common(lsm, (struct lov_mds_md_v1 *)lmm);
+ strncpy(lsm->lsm_pool_name, lmm->lmm_pool_name, MAXPOOLNAME);
+
+ for (i = 0; i < lsm->lsm_stripe_count; i++) {
+ /* XXX LOV STACKING call down to osc_unpackmd() */
+ loi = lsm->lsm_oinfo[i];
+ loi->loi_id = le64_to_cpu(lmm->lmm_objects[i].l_object_id);
+ loi->loi_gr = le64_to_cpu(lmm->lmm_objects[i].l_object_gr);
+ loi->loi_ost_idx = le32_to_cpu(lmm->lmm_objects[i].l_ost_idx);
+ loi->loi_ost_gen = le32_to_cpu(lmm->lmm_objects[i].l_ost_gen);
+ if (loi->loi_ost_idx >= lov->desc.ld_tgt_count) {
+ CERROR("OST index %d more than OST count %d\n",
+ loi->loi_ost_idx, lov->desc.ld_tgt_count);
+ lov_dump_lmm_v3(D_WARNING, lmm);
+ return -EINVAL;
+ }
+ if (!lov->lov_tgts[loi->loi_ost_idx]) {
+ CERROR("OST index %d missing\n", loi->loi_ost_idx);
+ lov_dump_lmm_v3(D_WARNING, lmm);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+struct lsm_operations lsm_v3_ops = {
+ .lsm_free = lsm_free_plain,
+ .lsm_destroy = lsm_destroy_plain,
+ .lsm_stripe_by_index = lsm_stripe_by_index_plain,
+ .lsm_stripe_by_offset = lsm_stripe_by_offset_plain,
+ .lsm_revalidate = lsm_revalidate_plain,
+ .lsm_stripe_offset_by_index = lsm_stripe_offset_by_index_plain,
+ .lsm_stripe_offset_by_offset = lsm_stripe_offset_by_offset_plain,
+ .lsm_stripe_index_by_offset = lsm_stripe_index_by_offset_plain,
+ .lsm_lmm_verify = lsm_lmm_verify_v3,
+ .lsm_unpackmd = lsm_unpackmd_v3,
+};
+
void lov_dump_lmm_v1(int level, struct lov_mds_md_v1 *lmm);
void lov_dump_lmm_join(int level, struct lov_mds_md_join *lmmj);
+void lov_dump_lmm_v3(int level, struct lov_mds_md_v3 *lmm);
+void lov_dump_lmm(int level, void *lmm);
+
/* lov_ea.c */
int lov_unpackmd_join(struct lov_obd *lov, struct lov_stripe_md *lsm,
struct lov_mds_md *lmm);
}
#endif
+/* pools */
+extern lustre_hash_ops_t pool_hash_operations;
+/* ost_pool methods */
+int lov_ost_pool_init(struct ost_pool *op, unsigned int count);
+int lov_ost_pool_extend(struct ost_pool *op, unsigned int max_count);
+int lov_ost_pool_add(struct ost_pool *op, __u32 idx, unsigned int max_count);
+int lov_ost_pool_remove(struct ost_pool *op, __u32 idx);
+int lov_ost_pool_free(struct ost_pool *op);
+
+/* high level pool methods */
+int lov_pool_new(struct obd_device *obd, char *poolname);
+int lov_pool_del(struct obd_device *obd, char *poolname);
+int lov_pool_add(struct obd_device *obd, char *poolname, char *ostname);
+int lov_pool_remove(struct obd_device *obd, char *poolname, char *ostname);
+void lov_dump_pool(int level, struct pool_desc *pool);
+struct pool_desc *lov_find_pool(struct lov_obd *lov, char *poolname);
+int lov_check_index_in_pool(__u32 idx, struct pool_desc *pool);
+
+
#endif
lov->lov_tgts, lov->lov_tgt_size);
}
-
OBD_ALLOC_PTR(tgt);
if (!tgt) {
mutex_up(&lov->lov_lock);
lov->lov_tgts[index] = tgt;
if (index >= lov->desc.ld_tgt_count)
lov->desc.ld_tgt_count = index + 1;
+
+ rc = lov_ost_pool_add(&lov->lov_packed, index, lov->lov_tgt_size);
+ if (rc)
+ RETURN(rc);
+
mutex_up(&lov->lov_lock);
CDEBUG(D_CONFIG, "idx=%d ltd_gen=%d ld_tgt_count=%d\n",
* maximum tgt index for computing the mds_max_easize. So we can't
* shrink it. */
+ lov_ost_pool_remove(&lov->lov_packed, index);
lov->lov_tgts[index] = NULL;
OBD_FREE_PTR(tgt);
struct lov_desc *desc;
struct lov_obd *lov = &obd->u.lov;
int count;
+ int rc;
ENTRY;
if (LUSTRE_CFG_BUFLEN(lcfg, 1) < 1) {
desc->ld_active_tgt_count = 0;
lov->desc = *desc;
lov->lov_tgt_size = 0;
+ rc = lov_ost_pool_init(&lov->lov_packed, 0);
+ if (rc)
+ RETURN(rc);
+
sema_init(&lov->lov_lock, 1);
atomic_set(&lov->lov_refcount, 0);
CFS_INIT_LIST_HEAD(&lov->lov_qos.lq_oss_list);
init_rwsem(&lov->lov_qos.lq_rw_sem);
lov->lov_qos.lq_dirty = 1;
- lov->lov_qos.lq_dirty_rr = 1;
+ lov->lov_qos.lq_rr.lqr_dirty = 1;
lov->lov_qos.lq_reset = 1;
/* Default priority is toward free space balance */
lov->lov_qos.lq_prio_free = 232;
+ lov->lov_pools_hash_body = lustre_hash_init("POOLS", 128, 128,
+ &pool_hash_operations,
+ 0);
+
+ CFS_INIT_LIST_HEAD(&lov->lov_pool_list);
+ lov->lov_pool_count = 0;
+
lprocfs_lov_init_vars(&lvars);
lprocfs_obd_setup(obd, lvars.obd_vars);
#ifdef LPROCFS
CWARN("Error adding the target_obd file\n");
}
#endif
+ lov->lov_pool_proc_entry = lprocfs_register("pools",
+ obd->obd_proc_entry,
+ NULL, NULL);
RETURN(0);
}
static int lov_cleanup(struct obd_device *obd)
{
struct lov_obd *lov = &obd->u.lov;
+ struct list_head *pos, *tmp;
+ struct pool_desc *pool;
+
+ list_for_each_safe(pos, tmp, &lov->lov_pool_list) {
+ pool = list_entry(pos, struct pool_desc, pool_list);
+ list_del(&pool->pool_list);
+ lustre_hash_del_key(lov->lov_pools_hash_body, pool->pool_name);
+ lov_ost_pool_free(&(pool->pool_rr.lqr_pool));
+ lov_ost_pool_free(&(pool->pool_obds));
+ OBD_FREE(pool, sizeof(*pool));
+ }
+ lustre_hash_exit(lov->lov_pools_hash_body);
lprocfs_obd_cleanup(obd);
+
+ lov_ost_pool_free(&lov->lov_packed);
+
if (lov->lov_tgts) {
int i;
for (i = 0; i < lov->desc.ld_tgt_count; i++) {
lov->lov_tgt_size = 0;
}
- if (lov->lov_qos.lq_rr_size)
- OBD_FREE(lov->lov_qos.lq_rr_array, lov->lov_qos.lq_rr_size);
+ lov_ost_pool_free(&(lov->lov_qos.lq_rr.lqr_pool));
RETURN(0);
}
lcfg, obd);
GOTO(out, rc);
}
+ case LCFG_POOL_NEW:
+ case LCFG_POOL_ADD:
+ case LCFG_POOL_DEL:
+ case LCFG_POOL_REM:
+ GOTO(out, rc);
+
default: {
CERROR("Unknown command: %d\n", lcfg->lcfg_command);
GOTO(out, rc = -EINVAL);
#define ASSERT_LSM_MAGIC(lsmp) \
do { \
LASSERT((lsmp) != NULL); \
- LASSERTF(((lsmp)->lsm_magic == LOV_MAGIC || \
+ LASSERTF(((lsmp)->lsm_magic == LOV_MAGIC_V1 || \
+ (lsmp)->lsm_magic == LOV_MAGIC_V3 || \
(lsmp)->lsm_magic == LOV_MAGIC_JOIN), "%p->lsm_magic=%x\n", \
(lsmp), (lsmp)->lsm_magic); \
} while (0)
.o_unregister_page_removal_cb = lov_unregister_page_removal_cb,
.o_register_lock_cancel_cb = lov_register_lock_cancel_cb,
.o_unregister_lock_cancel_cb = lov_unregister_lock_cancel_cb,
+ .o_pool_new = lov_pool_new,
+ .o_pool_rem = lov_pool_remove,
+ .o_pool_add = lov_pool_add,
+ .o_pool_del = lov_pool_del,
};
static quota_interface_t *quota_interface;
le32_to_cpu(lmmj->lmmj_extent_count));
}
+void lov_dump_lmm_v3(int level, struct lov_mds_md_v3 *lmm)
+{
+ struct lov_ost_data_v1 *lod;
+ int i;
+
+ CDEBUG(level, "objid "LPX64", magic 0x%08x, pattern %#x\n",
+ le64_to_cpu(lmm->lmm_object_id), le32_to_cpu(lmm->lmm_magic),
+ le32_to_cpu(lmm->lmm_pattern));
+ CDEBUG(level,"stripe_size %u, stripe_count %u\n",
+ le32_to_cpu(lmm->lmm_stripe_size),
+ le32_to_cpu(lmm->lmm_stripe_count));
+ CDEBUG(level,"pool_name "POOLNAMEF"\n", lmm->lmm_pool_name);
+
+ if (le32_to_cpu(lmm->lmm_stripe_count) <= LOV_V1_INSANE_STRIPE_COUNT) {
+ for (i = 0, lod = lmm->lmm_objects;
+ i < (int)le32_to_cpu(lmm->lmm_stripe_count); i++, lod++)
+ CDEBUG(level,
+ "stripe %u idx %u subobj "LPX64"/"LPX64"\n",
+ i, le32_to_cpu(lod->l_ost_idx),
+ le64_to_cpu(lod->l_object_gr),
+ le64_to_cpu(lod->l_object_id));
+ } else {
+ CDEBUG(level, "bad stripe_count %u > max_stripe_count %u\n",
+ le32_to_cpu(lmm->lmm_stripe_count),
+ LOV_V1_INSANE_STRIPE_COUNT);
+ }
+}
+
+void lov_dump_lmm(int level, void *lmm)
+{
+ int magic;
+
+ magic = ((struct lov_mds_md_v1 *)(lmm))->lmm_magic;
+ switch (magic) {
+ case LOV_MAGIC_V1:
+ return lov_dump_lmm_v1(level, (struct lov_mds_md_v1 *)(lmm));
+ case LOV_MAGIC_JOIN:
+ return lov_dump_lmm_join(level, (struct lov_mds_md_join *)(lmm));
+ case LOV_MAGIC_V3:
+ return lov_dump_lmm_v3(level, (struct lov_mds_md_v3 *)(lmm));
+ default:
+ CERROR("Cannot recognize lmm_magic %x", magic);
+ }
+ return;
+}
+
#define LMM_ASSERT(test) \
do { \
if (!(test)) lov_dump_lmm(D_ERROR, lmm); \
{
struct obd_device *obd = class_exp2obd(exp);
struct lov_obd *lov = &obd->u.lov;
- struct lov_mds_md *lmm;
+ struct lov_mds_md_v1 *lmmv1;
+ struct lov_mds_md_v3 *lmmv3;
int stripe_count = lov->desc.ld_tgt_count;
- int lmm_size;
+ struct lov_ost_data_v1 *lmm_objects;
+ int lmm_size, lmm_magic;
int i;
ENTRY;
if (lsm) {
- if (lsm->lsm_magic != LOV_MAGIC) {
- CERROR("bad mem LOV MAGIC: 0x%08X != 0x%08X\n",
- lsm->lsm_magic, LOV_MAGIC);
- RETURN(-EINVAL);
- }
+ lmm_magic = lsm->lsm_magic;
+
/* If we are just sizing the EA, limit the stripe count
* to the actual number of OSTs in this filesystem. */
if (!lmmp) {
- stripe_count = lov_get_stripecnt(lov, lsm->lsm_stripe_count);
+ stripe_count = lov_get_stripecnt(lov,
+ lsm->lsm_stripe_count);
lsm->lsm_stripe_count = stripe_count;
} else {
stripe_count = lsm->lsm_stripe_count;
}
+ } else if (lmmp && *lmmp) {
+ lmm_magic = le32_to_cpu((*lmmp)->lmm_magic);
+ } else {
+ /* lsm == NULL and lmmp == NULL */
+ lmm_magic = LOV_MAGIC;
+ }
+
+ if ((lmm_magic != LOV_MAGIC_V1) &&
+ (lmm_magic != LOV_MAGIC_V3)) {
+ CERROR("bad mem LOV MAGIC: 0x%08X != 0x%08X nor 0x%08X\n",
+ lmm_magic, LOV_MAGIC_V1, LOV_MAGIC_V3);
+ RETURN(-EINVAL);
+
}
/* XXX LOV STACKING call into osc for sizes */
- lmm_size = lov_mds_md_size(stripe_count);
+ lmm_size = lov_mds_md_size(stripe_count, lmm_magic);
if (!lmmp)
RETURN(lmm_size);
if (*lmmp && !lsm) {
stripe_count = le32_to_cpu((*lmmp)->lmm_stripe_count);
- OBD_FREE(*lmmp, lov_mds_md_size(stripe_count));
+ lmm_size = lov_mds_md_size(stripe_count, lmm_magic);
+ OBD_FREE(*lmmp, lmm_size);
*lmmp = NULL;
RETURN(0);
}
RETURN(-ENOMEM);
}
- lmm = *lmmp;
- lmm->lmm_magic = cpu_to_le32(LOV_MAGIC); /* only write new format */
+ CDEBUG(D_INFO, "lov_packmd: LOV_MAGIC 0x%08X, lmm_size = %d \n",
+ lmm_magic, lmm_size);
+
+ lmmv1 = *lmmp;
+ lmmv3 = (struct lov_mds_md_v3 *)*lmmp;
+ if (lmm_magic == LOV_MAGIC_V3)
+ lmmv3->lmm_magic = cpu_to_le32(LOV_MAGIC_V3);
+ else
+ lmmv1->lmm_magic = cpu_to_le32(LOV_MAGIC_V1);
if (!lsm)
RETURN(lmm_size);
- lmm->lmm_object_id = cpu_to_le64(lsm->lsm_object_id);
- lmm->lmm_object_gr = cpu_to_le64(lsm->lsm_object_gr);
- lmm->lmm_stripe_size = cpu_to_le32(lsm->lsm_stripe_size);
- lmm->lmm_stripe_count = cpu_to_le32(stripe_count);
- lmm->lmm_pattern = cpu_to_le32(lsm->lsm_pattern);
+ /* lmmv1 and lmmv3 point to the same struct and have the
+ * same first fields
+ */
+ lmmv1->lmm_object_id = cpu_to_le64(lsm->lsm_object_id);
+ lmmv1->lmm_object_gr = cpu_to_le64(lsm->lsm_object_gr);
+ lmmv1->lmm_stripe_size = cpu_to_le32(lsm->lsm_stripe_size);
+ lmmv1->lmm_stripe_count = cpu_to_le32(stripe_count);
+ lmmv1->lmm_pattern = cpu_to_le32(lsm->lsm_pattern);
+ if (lsm->lsm_magic == LOV_MAGIC_V3) {
+ strncpy(lmmv3->lmm_pool_name, lsm->lsm_pool_name, MAXPOOLNAME);
+ lmm_objects = lmmv3->lmm_objects;
+ } else {
+ lmm_objects = lmmv1->lmm_objects;
+ }
for (i = 0; i < stripe_count; i++) {
struct lov_oinfo *loi = lsm->lsm_oinfo[i];
/* XXX LOV STACKING call down to osc_packmd() to do packing */
LASSERTF(loi->loi_id, "lmm_oid "LPU64" stripe %u/%u idx %u\n",
- lmm->lmm_object_id, i, stripe_count, loi->loi_ost_idx);
- lmm->lmm_objects[i].l_object_id = cpu_to_le64(loi->loi_id);
- lmm->lmm_objects[i].l_object_gr = cpu_to_le64(loi->loi_gr);
- lmm->lmm_objects[i].l_ost_gen = cpu_to_le32(loi->loi_ost_gen);
- lmm->lmm_objects[i].l_ost_idx = cpu_to_le32(loi->loi_ost_idx);
+ lmmv1->lmm_object_id, i, stripe_count, loi->loi_ost_idx);
+ lmm_objects[i].l_object_id = cpu_to_le64(loi->loi_id);
+ lmm_objects[i].l_object_gr = cpu_to_le64(loi->loi_gr);
+ lmm_objects[i].l_ost_gen = cpu_to_le32(loi->loi_ost_gen);
+ lmm_objects[i].l_ost_idx = cpu_to_le32(loi->loi_ost_idx);
}
RETURN(lmm_size);
int rc;
if (lsm_op_find(le32_to_cpu(*(__u32 *)lmm)) == NULL) {
- CERROR("bad disk LOV MAGIC: 0x%08X; dumping V1 LMM:\n",
- le32_to_cpu(*(__u32 *)lmm));
- lov_dump_lmm_v1(D_WARNING, lmm);
+ char *buffer;
+ int sz;
+
+ CERROR("bad disk LOV MAGIC: 0x%08X; dumping LMM (size=%d):\n",
+ le32_to_cpu(*(__u32 *)lmm), lmm_bytes);
+ sz = lmm_bytes * 2 + 1;
+ OBD_ALLOC(buffer, sz);
+ if (buffer != NULL) {
+ int i;
+
+ for (i = 0; i < lmm_bytes; i++)
+ sprintf(buffer+2*i, "%.2X", ((char *)lmm)[i]);
+ buffer[sz] = '\0';
+ CERROR("%s\n", buffer);
+ OBD_FREE(buffer, sz);
+ }
return -EINVAL;
}
rc = lsm_op_find(le32_to_cpu(*(__u32 *)lmm))->lsm_lmm_verify(lmm,
(*lsmp)->lsm_stripe_count = stripe_count;
(*lsmp)->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES * stripe_count;
(*lsmp)->lsm_pattern = pattern;
+ (*lsmp)->lsm_pool_name[0] = '\0';
(*lsmp)->lsm_oinfo[0]->loi_ost_idx = ~0;
for (i = 0; i < stripe_count; i++)
{
struct obd_device *obd = class_exp2obd(exp);
struct lov_obd *lov = &obd->u.lov;
- struct lov_user_md lum;
+ struct lov_user_md_v3 lumv3;
+ struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
+ int lmm_magic;
int stripe_count;
int rc;
ENTRY;
- rc = copy_from_user(&lum, lump, sizeof(lum));
+ rc = copy_from_user(&lumv3, lump, sizeof(struct lov_user_md_v1));
if (rc)
RETURN(-EFAULT);
- if (lum.lmm_magic != LOV_USER_MAGIC) {
- if (lum.lmm_magic == __swab32(LOV_USER_MAGIC)) {
- lustre_swab_lov_user_md(&lum);
- } else {
- CDEBUG(D_IOCTL, "bad userland LOV MAGIC:"
- " %#08x != %#08x\n",
- lum.lmm_magic, LOV_USER_MAGIC);
- RETURN(-EINVAL);
- }
+ lmm_magic = lumv1->lmm_magic;
+
+ if (lmm_magic == __swab32(LOV_USER_MAGIC_V1)) {
+ lustre_swab_lov_user_md_v1(lumv1);
+ lmm_magic = LOV_USER_MAGIC_V1;
+ } else if (lmm_magic == LOV_USER_MAGIC_V3) {
+ rc = copy_from_user(&lumv3, lump, sizeof(lumv3));
+ if (rc)
+ RETURN(-EFAULT);
+ } else if (lmm_magic == __swab32(LOV_USER_MAGIC_V3)) {
+ rc = copy_from_user(&lumv3, lump, sizeof(lumv3));
+ if (rc)
+ RETURN(-EFAULT);
+ lustre_swab_lov_user_md_v3(&lumv3);
+ lmm_magic = LOV_USER_MAGIC_V3;
+ } else if (lmm_magic != LOV_USER_MAGIC_V1) {
+ CDEBUG(D_IOCTL,
+ "bad userland LOV MAGIC: %#08x != %#08x nor %#08x\n",
+ lmm_magic, LOV_USER_MAGIC_V1, LOV_USER_MAGIC_V3);
+ RETURN(-EINVAL);
}
- if (lum.lmm_pattern == 0) {
- lum.lmm_pattern = lov->desc.ld_pattern ?
+ /* in the rest of the tests, as *lumv1 and lumv3 have the same
+ * fields, we use lumv1 to avoid code duplication */
+
+ if (lumv1->lmm_pattern == 0) {
+ lumv1->lmm_pattern = lov->desc.ld_pattern ?
lov->desc.ld_pattern : LOV_PATTERN_RAID0;
}
- if (lum.lmm_pattern != LOV_PATTERN_RAID0) {
+ if (lumv1->lmm_pattern != LOV_PATTERN_RAID0) {
CDEBUG(D_IOCTL, "bad userland stripe pattern: %#x\n",
- lum.lmm_pattern);
+ lumv1->lmm_pattern);
RETURN(-EINVAL);
}
/* 64kB is the largest common page size we see (ia64), and matches the
* check in lfs */
- if (lum.lmm_stripe_size & (LOV_MIN_STRIPE_SIZE - 1)) {
+ if (lumv1->lmm_stripe_size & (LOV_MIN_STRIPE_SIZE - 1)) {
CDEBUG(D_IOCTL, "stripe size %u not multiple of %u, fixing\n",
- lum.lmm_stripe_size, LOV_MIN_STRIPE_SIZE);
- lum.lmm_stripe_size = LOV_MIN_STRIPE_SIZE;
+ lumv1->lmm_stripe_size, LOV_MIN_STRIPE_SIZE);
+ lumv1->lmm_stripe_size = LOV_MIN_STRIPE_SIZE;
}
- if ((lum.lmm_stripe_offset >= lov->desc.ld_tgt_count) &&
- (lum.lmm_stripe_offset != (typeof(lum.lmm_stripe_offset))(-1))) {
+ if ((lumv1->lmm_stripe_offset >= lov->desc.ld_tgt_count) &&
+ (lumv1->lmm_stripe_offset !=
+ (typeof(lumv1->lmm_stripe_offset))(-1))) {
CDEBUG(D_IOCTL, "stripe offset %u > number of OSTs %u\n",
- lum.lmm_stripe_offset, lov->desc.ld_tgt_count);
+ lumv1->lmm_stripe_offset, lov->desc.ld_tgt_count);
RETURN(-EINVAL);
}
- stripe_count = lov_get_stripecnt(lov, lum.lmm_stripe_count);
+ stripe_count = lov_get_stripecnt(lov, lumv1->lmm_stripe_count);
+
+ if (lmm_magic == LOV_USER_MAGIC_V3) {
+ struct pool_desc *pool;
+
+ pool = lov_find_pool(lov, lumv3.lmm_pool_name);
+ if (pool == NULL)
+ RETURN(-EINVAL);
+
+ if (lumv3.lmm_stripe_offset !=
+ (typeof(lumv3.lmm_stripe_offset))(-1)) {
+ rc = lov_check_index_in_pool(lumv3.lmm_stripe_offset,
+ pool);
+ if (rc < 0)
+ RETURN(-EINVAL);
+ }
+
+ if (stripe_count > pool_tgt_count(pool))
+ stripe_count = pool_tgt_count(pool);
+ }
- if ((__u64)lum.lmm_stripe_size * stripe_count > ~0UL) {
+ if ((__u64)lumv1->lmm_stripe_size * stripe_count > ~0UL) {
CDEBUG(D_IOCTL, "stripe width %ux%i exeeds %lu bytes\n",
- lum.lmm_stripe_size, (int)lum.lmm_stripe_count, ~0UL);
+ lumv1->lmm_stripe_size, (int)lumv1->lmm_stripe_count,
+ ~0UL);
RETURN(-EINVAL);
}
- rc = lov_alloc_memmd(lsmp, stripe_count, lum.lmm_pattern, LOV_MAGIC);
+ rc = lov_alloc_memmd(lsmp, stripe_count, lumv1->lmm_pattern, lmm_magic);
if (rc >= 0) {
- (*lsmp)->lsm_oinfo[0]->loi_ost_idx = lum.lmm_stripe_offset;
- (*lsmp)->lsm_stripe_size = lum.lmm_stripe_size;
+ (*lsmp)->lsm_oinfo[0]->loi_ost_idx = lumv1->lmm_stripe_offset;
+ (*lsmp)->lsm_stripe_size = lumv1->lmm_stripe_size;
+ if (lmm_magic == LOV_USER_MAGIC_V3)
+ strncpy((*lsmp)->lsm_pool_name, lumv3.lmm_pool_name,
+ MAXPOOLNAME);
rc = 0;
}
- RETURN(0);
+ RETURN(rc);
}
/* Configure object striping information on a new file.
struct obd_export *oexp;
struct lov_obd *lov = &exp->exp_obd->u.lov;
obd_id last_id = 0;
+ struct lov_user_ost_data_v1 *lmm_objects;
ENTRY;
+
+ if (lump->lmm_magic == LOV_USER_MAGIC_V3)
+ lmm_objects = ((struct lov_user_md_v3 *)lump)->lmm_objects;
+ else
+ lmm_objects = lump->lmm_objects;
+
for (i = 0; i < lump->lmm_stripe_count; i++) {
__u32 len = sizeof(last_id);
- oexp = lov->lov_tgts[lump->lmm_objects[i].l_ost_idx]->ltd_exp;
+ oexp = lov->lov_tgts[lmm_objects[i].l_ost_idx]->ltd_exp;
rc = obd_get_info(oexp, sizeof(KEY_LAST_ID), KEY_LAST_ID,
&len, &last_id, NULL);
if (rc)
RETURN(rc);
- if (lump->lmm_objects[i].l_object_id > last_id) {
+ if (lmm_objects[i].l_object_id > last_id) {
CERROR("Setting EA for object > than last id on "
"ost idx %d "LPD64" > "LPD64" \n",
- lump->lmm_objects[i].l_ost_idx,
- lump->lmm_objects[i].l_object_id, last_id);
+ lmm_objects[i].l_ost_idx,
+ lmm_objects[i].l_object_id, last_id);
RETURN(-EINVAL);
}
}
for (i = 0; i < lump->lmm_stripe_count; i++) {
(*lsmp)->lsm_oinfo[i]->loi_ost_idx =
- lump->lmm_objects[i].l_ost_idx;
- (*lsmp)->lsm_oinfo[i]->loi_id = lump->lmm_objects[i].l_object_id;
- (*lsmp)->lsm_oinfo[i]->loi_gr = lump->lmm_objects[i].l_object_gr;
+ lmm_objects[i].l_ost_idx;
+ (*lsmp)->lsm_oinfo[i]->loi_id = lmm_objects[i].l_object_id;
+ (*lsmp)->lsm_oinfo[i]->loi_gr = lmm_objects[i].l_object_gr;
}
RETURN(0);
}
/*
* XXX huge struct allocated on stack.
*/
- struct lov_user_md lum;
+ /* we use lov_user_md_v3 because it is larger than lov_user_md_v1 */
+ struct lov_user_md_v3 lum;
struct lov_mds_md *lmmk = NULL;
int rc, lmm_size;
+ int lum_size;
mm_segment_t seg;
ENTRY;
*/
seg = get_fs();
set_fs(KERNEL_DS);
- rc = copy_from_user(&lum, lump, sizeof(lum));
+
+ /* we only need the header part from user space to get lmm_magic and
+ * lmm_stripe_count, (the header part is common to v1 and v3) */
+ lum_size = sizeof(struct lov_user_md_v1);
+ rc = copy_from_user(&lum, lump, lum_size);
+
if (rc)
rc = -EFAULT;
- else if (lum.lmm_magic != LOV_USER_MAGIC)
+ else if ((lum.lmm_magic != LOV_USER_MAGIC) &&
+ (lum.lmm_magic != LOV_USER_MAGIC_V3))
rc = -EINVAL;
else {
+ /* if v3 we just have to update the lum_size */
+ if (lum.lmm_magic == LOV_USER_MAGIC_V3)
+ lum_size = sizeof(struct lov_user_md_v3);
+
rc = lov_packmd(exp, &lmmk, lsm);
if (rc < 0)
RETURN(rc);
rc = 0;
/* FIXME: Bug 1185 - copy fields properly when structs change */
- CLASSERT(sizeof lum == sizeof *lmmk);
+ /* struct lov_user_md_v3 and struct lov_mds_md_v3 must be the same */
+ CLASSERT(sizeof(lum) == sizeof(struct lov_mds_md_v3));
CLASSERT(sizeof lum.lmm_objects[0] ==
sizeof lmmk->lmm_objects[0]);
/* User wasn't expecting this many OST entries */
if (lum.lmm_stripe_count == 0) {
- if (copy_to_user(lump, lmmk, sizeof lum))
+ if (copy_to_user(lump, lmmk, lum_size))
rc = -EFAULT;
} else if (lum.lmm_stripe_count < lmmk->lmm_stripe_count) {
rc = -EOVERFLOW;
- } else if (copy_to_user(lump, lmmk, sizeof lum))
+ } else if (copy_to_user(lump, lmmk, lmm_size))
rc = -EFAULT;
obd_free_diskmd(exp, &lmmk);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see [sun.com URL with a
+ * copy of GPLv2].
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/lov/lov_pool.c
+ *
+ * OST pool methods
+ *
+ * Author: Jacques-Charles LAFOUCRIERE <jc.lafoucriere@cea.fr>
+ */
+
+#define DEBUG_SUBSYSTEM S_LOV
+
+#ifdef __KERNEL__
+#include <libcfs/libcfs.h>
+#else
+#include <liblustre.h>
+#endif
+
+#include <obd.h>
+#include "lov_internal.h"
+
+/*
+ * hash function using a Rotating Hash algorithm
+ * Knuth, D. The Art of Computer Programming,
+ * Volume 3: Sorting and Searching,
+ * Chapter 6.4.
+ * Addison Wesley, 1973
+ */
+static __u32 pool_hashfn(lustre_hash_t *hash_body, void *key, unsigned mask)
+{
+ int i;
+ __u32 result;
+ char *poolname;
+
+ result = 0;
+ poolname = (char *)key;
+ for (i = 0; i < MAXPOOLNAME; i++) {
+ if (poolname[i] == '\0')
+ break;
+ result = (result << 4)^(result >> 28) ^ poolname[i];
+ }
+ return (result % mask);
+}
+
+static void *pool_key(struct hlist_node *hnode)
+{
+ struct pool_desc *pool;
+
+ pool = hlist_entry(hnode, struct pool_desc, pool_hash);
+ return (pool->pool_name);
+}
+
+static int pool_hashkey_compare(void *key, struct hlist_node *compared_hnode)
+{
+ char *pool_name;
+ struct pool_desc *pool;
+ int rc;
+
+ pool_name = (char *)key;
+ pool = hlist_entry(compared_hnode, struct pool_desc, pool_hash);
+ rc = strncmp(pool_name, pool->pool_name, MAXPOOLNAME);
+ return (!rc);
+}
+
+static void *pool_hashrefcount_get(struct hlist_node *hnode)
+{
+ struct pool_desc *pool;
+
+ pool = hlist_entry(hnode, struct pool_desc, pool_hash);
+ return (pool);
+}
+
+static void *pool_hashrefcount_put(struct hlist_node *hnode)
+{
+ struct pool_desc *pool;
+
+ pool = hlist_entry(hnode, struct pool_desc, pool_hash);
+ return (pool);
+}
+
+lustre_hash_ops_t pool_hash_operations = {
+ .lh_hash = pool_hashfn,
+ .lh_key = pool_key,
+ .lh_compare = pool_hashkey_compare,
+ .lh_get = pool_hashrefcount_get,
+ .lh_put = pool_hashrefcount_put,
+};
+
+#ifdef LPROCFS
+/* ifdef needed for liblustre support */
+/*
+ * pool /proc seq_file methods
+ */
+/*
+ * iterator is used to go through the target pool entries
+ * index is the current entry index in the lp_array[] array
+ * index >= pos returned to the seq_file interface
+ * pos is from 0 to (pool->pool_obds.op_count - 1)
+ */
+#define POOL_IT_MAGIC 0xB001CEA0
+struct pool_iterator {
+ int magic;
+ struct pool_desc *pool;
+ int idx; /* from 0 to pool_tgt_size - 1 */
+};
+
+static void *pool_proc_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct pool_iterator *iter = (struct pool_iterator *)s->private;
+ int prev_idx;
+
+ LASSERTF(iter->magic == POOL_IT_MAGIC, "%08X", iter->magic);
+
+ /* test if end of file */
+ if (*pos >= pool_tgt_count(iter->pool))
+ return NULL;
+
+ /* iterate to find a non empty entry */
+ prev_idx = iter->idx;
+ read_lock(&pool_tgt_rwlock(iter->pool));
+ iter->idx++;
+ if (iter->idx == pool_tgt_count(iter->pool)) {
+ iter->idx = prev_idx; /* we stay on the last entry */
+ read_unlock(&pool_tgt_rwlock(iter->pool));
+ return NULL;
+ }
+ read_unlock(&pool_tgt_rwlock(iter->pool));
+ (*pos)++;
+ /* return != NULL to continue */
+ return iter;
+}
+
+static void *pool_proc_start(struct seq_file *s, loff_t *pos)
+{
+ struct pool_desc *pool = (struct pool_desc *)s->private;
+ struct pool_iterator *iter;
+
+ if ((pool_tgt_count(pool) == 0) ||
+ (*pos >= pool_tgt_count(pool)))
+ return NULL;
+
+ OBD_ALLOC(iter, sizeof(struct pool_iterator));
+ if (!iter)
+ return ERR_PTR(-ENOMEM);
+ iter->magic = POOL_IT_MAGIC;
+ iter->pool = pool;
+ iter->idx = 0;
+
+ /* we use seq_file private field to memorized iterator so
+ * we can free it at stop() */
+ /* /!\ do not forget to restore it to pool before freeing it */
+ s->private = iter;
+ if (*pos > 0) {
+ loff_t i;
+ void *ptr;
+
+ i = 0;
+ do {
+ ptr = pool_proc_next(s, &iter, &i);
+ } while ((i < *pos) && (ptr != NULL));
+ return ptr;
+ }
+ return iter;
+}
+
+static void pool_proc_stop(struct seq_file *s, void *v)
+{
+ struct pool_iterator *iter = (struct pool_iterator *)s->private;
+
+ /* in some cases stop() method is called 2 times, without
+ * calling start() method (see seq_read() from fs/seq_file.c)
+ * we have to free only if s->private is an iterator */
+ if ((iter) && (iter->magic == POOL_IT_MAGIC)) {
+ /* we restore s->private so next call to pool_proc_start()
+ * will work */
+ s->private = iter->pool;
+ OBD_FREE(iter, sizeof(struct pool_iterator));
+ }
+ return;
+}
+
+static int pool_proc_show(struct seq_file *s, void *v)
+{
+ struct pool_iterator *iter = (struct pool_iterator *)v;
+ struct lov_tgt_desc *tgt;
+
+ LASSERTF(iter->magic == POOL_IT_MAGIC, "%08X", iter->magic);
+ LASSERT(iter->pool != NULL);
+ LASSERT(iter->idx <= pool_tgt_count(iter->pool));
+
+ read_lock(&pool_tgt_rwlock(iter->pool));
+ tgt = pool_tgt(iter->pool, iter->idx);
+ read_unlock(&pool_tgt_rwlock(iter->pool));
+ if (tgt)
+ seq_printf(s, "%s\n", obd_uuid2str(&(tgt->ltd_uuid)));
+
+ return 0;
+}
+
+static struct seq_operations pool_proc_ops = {
+ .start = pool_proc_start,
+ .next = pool_proc_next,
+ .stop = pool_proc_stop,
+ .show = pool_proc_show,
+};
+
+static int pool_proc_open(struct inode *inode, struct file *file)
+{
+ int rc;
+
+ rc = seq_open(file, &pool_proc_ops);
+ if (!rc) {
+ struct seq_file *s = file->private_data;
+ s->private = PROC_I(inode)->pde->data;
+ }
+ return rc;
+}
+
+static struct file_operations pool_proc_operations = {
+ .open = pool_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+#endif /* LPROCFS */
+
+void lov_dump_pool(int level, struct pool_desc *pool)
+{
+ int i;
+
+ CDEBUG(level, "pool "POOLNAMEF" has %d members\n",
+ pool->pool_name, pool->pool_obds.op_count);
+ read_lock(&pool_tgt_rwlock(pool));
+ for (i = 0; i < pool_tgt_count(pool) ; i++) {
+ if (!pool_tgt(pool, i) || !(pool_tgt(pool, i))->ltd_exp)
+ continue;
+ CDEBUG(level, "pool "POOLNAMEF"[%d] = %s\n", pool->pool_name,
+ i, obd_uuid2str(&((pool_tgt(pool, i))->ltd_uuid)));
+ }
+ read_unlock(&pool_tgt_rwlock(pool));
+}
+
+#define LOV_POOL_INIT_COUNT 2
+int lov_ost_pool_init(struct ost_pool *op, unsigned int count)
+{
+ if (count == 0)
+ count = LOV_POOL_INIT_COUNT;
+ op->op_array = NULL;
+ op->op_count = 0;
+ op->op_rwlock = RW_LOCK_UNLOCKED;
+ op->op_size = count;
+ OBD_ALLOC(op->op_array, op->op_size * sizeof(op->op_array[0]));
+ if (op->op_array == NULL) {
+ op->op_size = 0;
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+int lov_ost_pool_extend(struct ost_pool *op, unsigned int max_count)
+{
+ __u32 *new;
+ int new_size;
+
+ LASSERT(max_count != 0);
+
+ if (op->op_count < op->op_size)
+ return 0;
+
+ new_size = min(max_count, 2 * op->op_size);
+ OBD_ALLOC(new, new_size * sizeof(op->op_array[0]));
+ if (new == NULL)
+ return -ENOMEM;
+
+ /* copy old array to new one */
+ memcpy(new, op->op_array, op->op_size * sizeof(op->op_array[0]));
+ write_lock(&op->op_rwlock);
+ OBD_FREE(op->op_array, op->op_size * sizeof(op->op_array[0]));
+ op->op_array = new;
+ op->op_size = new_size;
+ write_unlock(&op->op_rwlock);
+ return 0;
+}
+
+int lov_ost_pool_add(struct ost_pool *op, __u32 idx, unsigned int max_count)
+{
+ int rc, i;
+
+ rc = lov_ost_pool_extend(op, max_count);
+ if (rc)
+ return rc;
+
+ /* search ost in pool array */
+ read_lock(&op->op_rwlock);
+ for (i = 0; i < op->op_count; i++) {
+ if (op->op_array[i] == idx) {
+ read_unlock(&op->op_rwlock);
+ return -EEXIST;
+ }
+ }
+ /* ost not found we add it */
+ op->op_array[op->op_count] = idx;
+ op->op_count++;
+ read_unlock(&op->op_rwlock);
+ return 0;
+}
+
+int lov_ost_pool_remove(struct ost_pool *op, __u32 idx)
+{
+ int i;
+
+ read_lock(&op->op_rwlock);
+ for (i = 0; i < op->op_count; i++) {
+ if (op->op_array[i] == idx) {
+ memmove(&op->op_array[i], &op->op_array[i + 1],
+ (op->op_count - i - 1) * sizeof(op->op_array[0]));
+ op->op_count--;
+ read_unlock(&op->op_rwlock);
+ return 0;
+ }
+ }
+ read_unlock(&op->op_rwlock);
+ return -EINVAL;
+}
+
+int lov_ost_pool_free(struct ost_pool *op)
+{
+ if (op->op_size == 0)
+ return 0;
+
+ write_lock(&op->op_rwlock);
+ OBD_FREE(op->op_array, op->op_size * sizeof(op->op_array[0]));
+ op->op_array = NULL;
+ op->op_count = 0;
+ op->op_size = 0;
+ write_unlock(&op->op_rwlock);
+ return 0;
+}
+
+
+int lov_pool_new(struct obd_device *obd, char *poolname)
+{
+ struct lov_obd *lov;
+ struct pool_desc *new_pool;
+ int rc;
+
+ lov = &(obd->u.lov);
+
+ OBD_ALLOC(new_pool, sizeof(*new_pool));
+
+ if (new_pool == NULL)
+ return -ENOMEM;
+
+ if (strlen(poolname) > MAXPOOLNAME)
+ return -ENAMETOOLONG;
+
+ strncpy(new_pool->pool_name, poolname, MAXPOOLNAME);
+ new_pool->pool_name[MAXPOOLNAME] = '\0';
+ new_pool->pool_lov = lov;
+ rc = lov_ost_pool_init(&new_pool->pool_obds, 0);
+ if (rc)
+ return rc;
+
+ memset(&(new_pool->pool_rr), 0, sizeof(struct lov_qos_rr));
+ rc = lov_ost_pool_init(&new_pool->pool_rr.lqr_pool, 0);
+ if (rc)
+ return rc;
+
+ spin_lock(&obd->obd_dev_lock);
+ /* check if pool alreaddy exists */
+ if (lustre_hash_lookup(lov->lov_pools_hash_body,
+ poolname) != NULL) {
+ spin_unlock(&obd->obd_dev_lock);
+ lov_ost_pool_free(&new_pool->pool_obds);
+ OBD_FREE(new_pool, sizeof(*new_pool));
+ return -EEXIST;
+ }
+
+ INIT_HLIST_NODE(&new_pool->pool_hash);
+ lustre_hash_add_unique(lov->lov_pools_hash_body, poolname,
+ &new_pool->pool_hash);
+ list_add_tail(&new_pool->pool_list, &lov->lov_pool_list);
+ lov->lov_pool_count++;
+ spin_unlock(&obd->obd_dev_lock);
+
+ CDEBUG(D_CONFIG, POOLNAMEF" is pool #%d\n",
+ poolname, lov->lov_pool_count);
+
+#ifdef LPROCFS
+ /* ifdef needed for liblustre */
+ new_pool->pool_proc_entry = lprocfs_add_simple(lov->lov_pool_proc_entry,
+ poolname,
+ NULL, NULL,
+ new_pool,
+ &pool_proc_operations);
+#endif
+
+ if (IS_ERR(new_pool->pool_proc_entry)) {
+ CWARN("Cannot add proc pool entry "POOLNAMEF"\n", poolname);
+ new_pool->pool_proc_entry = NULL;
+ }
+
+ return 0;
+}
+
+int lov_pool_del(struct obd_device *obd, char *poolname)
+{
+ struct lov_obd *lov;
+ struct pool_desc *pool;
+
+ lov = &(obd->u.lov);
+
+ spin_lock(&obd->obd_dev_lock);
+ pool = lustre_hash_lookup(lov->lov_pools_hash_body,
+ poolname);
+ if (pool == NULL) {
+ spin_unlock(&obd->obd_dev_lock);
+ return -ENOENT;
+ }
+
+#ifdef LPROCFS
+ if (pool->pool_proc_entry != NULL)
+ remove_proc_entry(pool->pool_proc_entry->name,
+ pool->pool_proc_entry->parent);
+#endif
+
+ /* pool is kept in the list to be freed by lov_cleanup()
+ * list_del(&pool->pool_list);
+ */
+ lustre_hash_del_key(lov->lov_pools_hash_body, poolname);
+
+ lov->lov_pool_count--;
+
+ spin_unlock(&obd->obd_dev_lock);
+
+ /* pool struct is not freed because it may be used by
+ * some open in /proc
+ * the struct is freed at lov_cleanup()
+ */
+ /*
+ if (pool->pool_rr.lqr_size != 0)
+ OBD_FREE(pool->pool_rr.lqr_array, pool->pool_rr.lqr_size);
+ lov_ost_pool_free(&pool->pool_obds);
+ OBD_FREE(pool, sizeof(*pool));
+ */
+ return 0;
+}
+
+
+int lov_pool_add(struct obd_device *obd, char *poolname, char *ostname)
+{
+ struct obd_uuid ost_uuid;
+ struct lov_obd *lov;
+ struct pool_desc *pool;
+ unsigned int i, lov_idx;
+ int rc;
+
+ lov = &(obd->u.lov);
+
+ pool = lustre_hash_lookup(lov->lov_pools_hash_body, poolname);
+ if (pool == NULL) {
+ return -ENOENT;
+ }
+
+ /* allocate pool tgt array if needed */
+ mutex_down(&lov->lov_lock);
+ rc = lov_ost_pool_extend(&pool->pool_obds, lov->lov_tgt_size);
+ if (rc) {
+ mutex_up(&lov->lov_lock);
+ return rc;
+ }
+ mutex_up(&lov->lov_lock);
+
+ obd_str2uuid(&ost_uuid, ostname);
+
+ spin_lock(&obd->obd_dev_lock);
+
+ /* search ost in lov array */
+ for (i = 0; i < lov->desc.ld_tgt_count; i++) {
+ if (!lov->lov_tgts[i])
+ continue;
+
+ if (obd_uuid_equals(&ost_uuid, &(lov->lov_tgts[i]->ltd_uuid)))
+ break;
+ }
+
+ /* test if ost found in lov */
+ if (i == lov->desc.ld_tgt_count) {
+ spin_unlock(&obd->obd_dev_lock);
+ return -EINVAL;
+ }
+
+ spin_unlock(&obd->obd_dev_lock);
+
+ lov_idx = i;
+
+ rc = lov_ost_pool_add(&pool->pool_obds, lov_idx, lov->lov_tgt_size);
+ if (rc)
+ return rc;
+
+ pool->pool_rr.lqr_dirty = 1;
+
+ CDEBUG(D_CONFIG, "Added %s to "POOLNAMEF" as member %d\n",
+ ostname, poolname, pool_tgt_count(pool));
+ return 0;
+}
+
+int lov_pool_remove(struct obd_device *obd, char *poolname, char *ostname)
+{
+ struct obd_uuid ost_uuid;
+ struct lov_obd *lov;
+ struct pool_desc *pool;
+ unsigned int i, lov_idx;
+
+ lov = &(obd->u.lov);
+
+ spin_lock(&obd->obd_dev_lock);
+ pool = lustre_hash_lookup(lov->lov_pools_hash_body, poolname);
+ if (pool == NULL) {
+ spin_unlock(&obd->obd_dev_lock);
+ return -ENOENT;
+ }
+
+ obd_str2uuid(&ost_uuid, ostname);
+
+ /* search ost in lov array, to get index */
+ for (i = 0; i < lov->desc.ld_tgt_count; i++) {
+ if (!lov->lov_tgts[i])
+ continue;
+
+ if (obd_uuid_equals(&ost_uuid, &(lov->lov_tgts[i]->ltd_uuid)))
+ break;
+ }
+
+ /* test if ost found in lov */
+ if (i == lov->desc.ld_tgt_count) {
+ spin_unlock(&obd->obd_dev_lock);
+ return -EINVAL;
+ }
+
+ spin_unlock(&obd->obd_dev_lock);
+
+ lov_idx = i;
+
+ lov_ost_pool_remove(&pool->pool_obds, lov_idx);
+
+ pool->pool_rr.lqr_dirty = 1;
+
+ CDEBUG(D_CONFIG, "%s removed from "POOLNAMEF"\n", ostname, poolname);
+
+ return 0;
+}
+
+int lov_check_index_in_pool(__u32 idx, struct pool_desc *pool)
+{
+ int i;
+
+ read_lock(&pool_tgt_rwlock(pool));
+ for (i = 0; i < pool_tgt_count(pool); i++) {
+ if (pool_tgt_array(pool)[i] == idx) {
+ read_unlock(&pool_tgt_rwlock(pool));
+ return 0;
+ }
+ }
+ read_unlock(&pool_tgt_rwlock(pool));
+ return -ENOENT;
+}
+
+struct pool_desc *lov_find_pool(struct lov_obd *lov, char *poolname)
+{
+ struct pool_desc *pool;
+
+ pool = NULL;
+ if (poolname[0] != '\0') {
+ pool = lustre_hash_lookup(lov->lov_pools_hash_body, poolname);
+ if (pool == NULL)
+ CWARN("Request for an unknown pool ("POOLNAMEF")\n",
+ poolname);
+ if ((pool != NULL) && (pool_tgt_count(pool) == 0)) {
+ CWARN("Request for an empty pool ("POOLNAMEF")\n",
+ poolname);
+ pool = NULL;
+ }
+ }
+ return pool;
+}
+
list_add_tail(&oss->lqo_oss_list, &temposs->lqo_oss_list);
lov->lov_qos.lq_dirty = 1;
- lov->lov_qos.lq_dirty_rr = 1;
+ lov->lov_qos.lq_rr.lqr_dirty = 1;
CDEBUG(D_QOS, "add tgt %s to OSS %s (%d OSTs)\n",
obd_uuid2str(&lov->lov_tgts[index]->ltd_uuid),
}
lov->lov_qos.lq_dirty = 1;
- lov->lov_qos.lq_dirty_rr = 1;
+ lov->lov_qos.lq_rr.lqr_dirty = 1;
out:
up_write(&lov->lov_qos.lq_rw_sem);
RETURN(rc);
}
/* We just used this index for a stripe; adjust everyone's weights */
-static int qos_used(struct lov_obd *lov, __u32 index, __u64 *total_wt)
+static int qos_used(struct lov_obd *lov, struct ost_pool *osts,
+ __u32 index, __u64 *total_wt)
{
struct lov_qos_oss *oss;
- int i;
+ int j;
ENTRY;
/* Don't allocate from this stripe anymore, until the next alloc_qos */
*total_wt = 0;
/* Decrease all OST penalties */
- for (i = 0; i < lov->desc.ld_tgt_count; i++) {
+ for (j = 0; j < osts->op_count; j++) {
+ int i;
+
+ i = osts->op_array[j];
if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_active)
continue;
if (lov->lov_tgts[i]->ltd_qos.ltq_penalty <
*total_wt += lov->lov_tgts[i]->ltd_qos.ltq_weight;
#ifdef QOS_DEBUG
- CDEBUG(D_QOS, "recalc tgt %d avail="LPU64
+ CDEBUG(D_QOS, "recalc tgt %d usable=%d avail="LPU64
" ostppo="LPU64" ostp="LPU64" ossppo="LPU64
" ossp="LPU64" wt="LPU64"\n",
- i, TGT_BAVAIL(i) >> 10,
+ i, lov->lov_tgts[i]->ltd_qos.ltq_usable,
+ TGT_BAVAIL(i) >> 10,
lov->lov_tgts[i]->ltd_qos.ltq_penalty_per_obj >> 10,
lov->lov_tgts[i]->ltd_qos.ltq_penalty >> 10,
lov->lov_tgts[i]->ltd_qos.ltq_oss->lqo_penalty_per_obj>>10,
#define LOV_QOS_EMPTY ((__u32)-1)
/* compute optimal round-robin order, based on OSTs per OSS */
-static int qos_calc_rr(struct lov_obd *lov)
+static int qos_calc_rr(struct lov_obd *lov, struct ost_pool *src_pool,
+ struct lov_qos_rr *lqr)
{
struct lov_qos_oss *oss;
- unsigned ost_count, placed, real_count;
- int i;
+ unsigned placed, real_count;
+ int i, rc;
ENTRY;
- if (!lov->lov_qos.lq_dirty_rr) {
- LASSERT(lov->lov_qos.lq_rr_size);
+ if (!lqr->lqr_dirty) {
+ LASSERT(lqr->lqr_pool.op_size);
RETURN(0);
}
* Check again. While we were sleeping on @lq_rw_sem something could
* change.
*/
- if (!lov->lov_qos.lq_dirty_rr) {
- LASSERT(lov->lov_qos.lq_rr_size);
+ if (!lqr->lqr_dirty) {
+ LASSERT(lqr->lqr_pool.op_size);
up_write(&lov->lov_qos.lq_rw_sem);
RETURN(0);
}
- ost_count = lov->desc.ld_tgt_count;
-
- if (lov->lov_qos.lq_rr_size)
- OBD_FREE(lov->lov_qos.lq_rr_array, lov->lov_qos.lq_rr_size);
- lov->lov_qos.lq_rr_size = ost_count *
- sizeof(lov->lov_qos.lq_rr_array[0]);
- OBD_ALLOC(lov->lov_qos.lq_rr_array, lov->lov_qos.lq_rr_size);
- if (!lov->lov_qos.lq_rr_array) {
- lov->lov_qos.lq_rr_size = 0;
+ if (lqr->lqr_pool.op_size)
+ lov_ost_pool_free(&lqr->lqr_pool);
+ rc = lov_ost_pool_init(&lqr->lqr_pool, src_pool->op_count);
+ if (rc) {
up_write(&lov->lov_qos.lq_rw_sem);
- RETURN(-ENOMEM);
+ RETURN(rc);
}
- real_count = 0;
- for (i = 0; i < ost_count; i++) {
- lov->lov_qos.lq_rr_array[i] = LOV_QOS_EMPTY;
- if (lov->lov_tgts[i])
- real_count++;
- }
+ for (i = 0; i < src_pool->op_count; i++)
+ lqr->lqr_pool.op_array[i] = LOV_QOS_EMPTY;
+ lqr->lqr_pool.op_count = src_pool->op_count;
/* Place all the OSTs from 1 OSS at the same time. */
+ real_count = lqr->lqr_pool.op_count;
placed = 0;
list_for_each_entry(oss, &lov->lov_qos.lq_oss_list, lqo_oss_list) {
int j = 0;
- for (i = 0; i < ost_count; i++) {
- if (lov->lov_tgts[i] &&
- lov->lov_tgts[i]->ltd_qos.ltq_oss == oss) {
+ for (i = 0; i < lqr->lqr_pool.op_count; i++) {
+ if (lov->lov_tgts[src_pool->op_array[i]] &&
+ (lov->lov_tgts[src_pool->op_array[i]]->ltd_qos.ltq_oss == oss)) {
/* Evenly space these OSTs across arrayspace */
- int next = j * ost_count / oss->lqo_ost_count;
- LASSERT(next < ost_count);
- while (lov->lov_qos.lq_rr_array[next] !=
+ int next = j * lqr->lqr_pool.op_count / oss->lqo_ost_count;
+ while (lqr->lqr_pool.op_array[next] !=
LOV_QOS_EMPTY)
- next = (next + 1) % ost_count;
- lov->lov_qos.lq_rr_array[next] = i;
+ next = (next + 1) % lqr->lqr_pool.op_count;
+ lqr->lqr_pool.op_array[next] = src_pool->op_array[i];
j++;
placed++;
}
}
- LASSERT(j == oss->lqo_ost_count);
}
- lov->lov_qos.lq_dirty_rr = 0;
+ lqr->lqr_dirty = 0;
up_write(&lov->lov_qos.lq_rw_sem);
if (placed != real_count) {
LCONSOLE_ERROR_MSG(0x14e, "Failed to place all OSTs in the "
"round-robin list (%d of %d).\n",
placed, real_count);
- for (i = 0; i < ost_count; i++) {
+ for (i = 0; i < lqr->lqr_pool.op_count; i++) {
LCONSOLE(D_WARNING, "rr #%d ost idx=%d\n", i,
- lov->lov_qos.lq_rr_array[i]);
+ lqr->lqr_pool.op_array[i]);
}
- lov->lov_qos.lq_dirty_rr = 1;
+ lqr->lqr_dirty = 1;
RETURN(-EAGAIN);
}
#ifdef QOS_DEBUG
- for (i = 0; i < ost_count; i++) {
+ for (i = 0; i < lqr->lqr_pool.op_count; i++) {
LCONSOLE(D_QOS, "rr #%d ost idx=%d\n", i,
- lov->lov_qos.lq_rr_array[i]);
+ lqr->lqr_pool.op_array[i]);
}
#endif
#define LOV_CREATE_RESEED_MIN 1000
/* Allocate objects on osts with round-robin algorithm */
static int alloc_rr(struct lov_obd *lov, int *idx_arr, int *stripe_cnt,
- int flags)
+ char *poolname, int flags)
{
- unsigned array_idx, ost_count = lov->desc.ld_tgt_count;
- unsigned ost_active_count = lov->desc.ld_active_tgt_count;
+ unsigned array_idx;
int i, *idx_pos;
__u32 ost_idx;
int ost_start_idx_temp;
int speed = 0;
int stripe_cnt_min = min_stripe_count(*stripe_cnt, flags);
+ struct pool_desc *pool;
+ struct ost_pool *osts;
+ struct lov_qos_rr *lqr;
ENTRY;
- i = qos_calc_rr(lov);
- if (i)
+ pool = lov_find_pool(lov, poolname);
+ if (pool == NULL) {
+ osts = &(lov->lov_packed);
+ lqr = &(lov->lov_qos.lq_rr);
+ } else {
+ read_lock(&pool_tgt_rwlock(pool));
+ osts = &(pool->pool_obds);
+ lqr = &(pool->pool_rr);
+ }
+
+ i = qos_calc_rr(lov, osts, lqr);
+ if (i) {
+ if (pool != NULL)
+ read_unlock(&pool_tgt_rwlock(pool));
RETURN(i);
+ }
- if (--lov->lov_start_count <= 0) {
- lov->lov_start_idx = ll_rand() % ost_count;
- lov->lov_start_count =
- (LOV_CREATE_RESEED_MIN / max(ost_active_count, 1U) +
- LOV_CREATE_RESEED_MULT) * max(ost_active_count, 1U);
- } else if (stripe_cnt_min >= ost_active_count ||
- lov->lov_start_idx > ost_count) {
+ if (--lqr->lqr_start_count <= 0) {
+ lqr->lqr_start_idx = ll_rand() % osts->op_count;
+ lqr->lqr_start_count =
+ (LOV_CREATE_RESEED_MIN / max(osts->op_count, 1U) +
+ LOV_CREATE_RESEED_MULT) * max(osts->op_count, 1U);
+ } else if (stripe_cnt_min >= osts->op_count ||
+ lqr->lqr_start_idx > osts->op_count) {
/* If we have allocated from all of the OSTs, slowly
* precess the next start if the OST/stripe count isn't
* already doing this for us. */
- lov->lov_start_idx %= ost_count;
- if (*stripe_cnt > 1 && (ost_active_count % (*stripe_cnt)) != 1)
- ++lov->lov_offset_idx;
+ lqr->lqr_start_idx %= osts->op_count;
+ if (*stripe_cnt > 1 && (osts->op_count % (*stripe_cnt)) != 1)
+ ++lqr->lqr_offset_idx;
}
down_read(&lov->lov_qos.lq_rw_sem);
- ost_start_idx_temp = lov->lov_start_idx;
+ ost_start_idx_temp = lqr->lqr_start_idx;
repeat_find:
- array_idx = (lov->lov_start_idx + lov->lov_offset_idx) % ost_count;
+ array_idx = (lqr->lqr_start_idx + lqr->lqr_offset_idx) % osts->op_count;
idx_pos = idx_arr;
#ifdef QOS_DEBUG
- CDEBUG(D_QOS, "want %d startidx %d startcnt %d offset %d active %d "
- "count %d arrayidx %d\n",
- stripe_cnt, lov->lov_start_idx, lov->lov_start_count,
- lov->lov_offset_idx, ost_active_count, ost_count, array_idx);
+ CDEBUG(D_QOS, "pool '%s' want %d startidx %d startcnt %d offset %d "
+ "active %d count %d arrayidx %d\n", poolname,
+ *stripe_cnt, lqr->lqr_start_idx, lqr->lqr_start_count,
+ lqr->lqr_offset_idx, osts->op_count, osts->op_count, array_idx);
#endif
- for (i = 0; i < ost_count; i++, array_idx=(array_idx + 1) % ost_count) {
- ++lov->lov_start_idx;
- ost_idx = lov->lov_qos.lq_rr_array[array_idx];
+ for (i = 0; i < osts->op_count;
+ i++, array_idx=(array_idx + 1) % osts->op_count) {
+ ++lqr->lqr_start_idx;
+ ost_idx = lqr->lqr_pool.op_array[array_idx];
#ifdef QOS_DEBUG
CDEBUG(D_QOS, "#%d strt %d act %d strp %d ary %d idx %d\n",
- i, lov->lov_start_idx,
+ i, lqr->lqr_start_idx,
((ost_idx != LOV_QOS_EMPTY) && lov->lov_tgts[ost_idx]) ?
lov->lov_tgts[ost_idx]->ltd_active : 0,
idx_pos - idx_arr, array_idx, ost_idx);
if ((speed < 2) && (idx_pos - idx_arr < stripe_cnt_min)) {
/* Try again, allowing slower OSCs */
speed++;
- lov->lov_start_idx = ost_start_idx_temp;
+ lqr->lqr_start_idx = ost_start_idx_temp;
goto repeat_find;
}
+ if (pool != NULL)
+ read_unlock(&pool_tgt_rwlock(pool));
+
up_read(&lov->lov_qos.lq_rw_sem);
*stripe_cnt = idx_pos - idx_arr;
static int alloc_specific(struct lov_obd *lov, struct lov_stripe_md *lsm,
int *idx_arr)
{
- unsigned ost_idx, ost_count = lov->desc.ld_tgt_count;
+ unsigned ost_idx, array_idx, ost_count;
int i, *idx_pos;
int speed = 0;
+ struct pool_desc *pool = NULL;
+ struct ost_pool *osts;
ENTRY;
+ pool = lov_find_pool(lov, lsm->lsm_pool_name);
+ if (pool == NULL) {
+ osts = &(lov->lov_packed);
+ } else {
+ read_lock(&pool_tgt_rwlock(pool));
+ osts = &(pool->pool_obds);
+ }
+
+ ost_count = osts->op_count;
+
repeat_find:
- ost_idx = lsm->lsm_oinfo[0]->loi_ost_idx;
+ /* search loi_ost_idx in ost array */
+ array_idx = 0;
+ for (i = 0; i < ost_count; i++) {
+ if (osts->op_array[i] == lsm->lsm_oinfo[0]->loi_ost_idx) {
+ array_idx = i;
+ break;
+ }
+ }
+ if (i == ost_count) {
+ if (pool != NULL)
+ read_unlock(&pool_tgt_rwlock(pool));
+ CERROR("Start index %d not found in pool '%s'\n",
+ lsm->lsm_oinfo[0]->loi_ost_idx, lsm->lsm_pool_name);
+ RETURN(-EINVAL);
+ }
+
idx_pos = idx_arr;
- for (i = 0; i < ost_count; i++, ost_idx = (ost_idx + 1) % ost_count) {
+ for (i = 0; i < ost_count;
+ i++, array_idx = (array_idx + 1) % ost_count) {
+ ost_idx = osts->op_array[array_idx];
+
if (!lov->lov_tgts[ost_idx] ||
!lov->lov_tgts[ost_idx]->ltd_active) {
continue;
*idx_pos = ost_idx;
idx_pos++;
/* We have enough stripes */
- if (idx_pos - idx_arr == lsm->lsm_stripe_count)
+ if (idx_pos - idx_arr == lsm->lsm_stripe_count) {
+ if (pool != NULL)
+ read_unlock(&pool_tgt_rwlock(pool));
RETURN(0);
+ }
}
if (speed < 2) {
/* Try again, allowing slower OSCs */
CERROR("can't lstripe objid "LPX64": have %d want %u\n",
lsm->lsm_object_id, (int)(idx_pos - idx_arr),
lsm->lsm_stripe_count);
+
+ if (pool != NULL)
+ read_unlock(&pool_tgt_rwlock(pool));
+
RETURN(-EFBIG);
}
- network resources (shared OSS's)
*/
static int alloc_qos(struct obd_export *exp, int *idx_arr, int *stripe_cnt,
- int flags)
+ char *poolname, int flags)
{
struct lov_obd *lov = &exp->exp_obd->u.lov;
static time_t last_warn = 0;
time_t now = cfs_time_current_sec();
__u64 total_bavail, total_weight = 0;
- __u32 ost_count;
int nfound, good_osts, i, warn = 0, rc = 0;
int stripe_cnt_min = min_stripe_count(*stripe_cnt, flags);
+ struct pool_desc *pool;
+ struct ost_pool *osts;
+ struct lov_qos_rr *lqr;
ENTRY;
if (stripe_cnt_min < 1)
GOTO(out_nolock, rc = -EINVAL);
+ pool = lov_find_pool(lov, poolname);
+ if (pool == NULL) {
+ osts = &(lov->lov_packed);
+ lqr = &(lov->lov_qos.lq_rr);
+ } else {
+ read_lock(&pool_tgt_rwlock(pool));
+ osts = &(pool->pool_obds);
+ lqr = &(pool->pool_rr);
+ }
+
lov_getref(exp->exp_obd);
/* Detect -EAGAIN early, before expensive lock is taken. */
if (!lov->lov_qos.lq_dirty && lov->lov_qos.lq_same_space)
GOTO(out, rc = -EAGAIN);
- ost_count = lov->desc.ld_tgt_count;
-
if (lov->desc.ld_active_tgt_count < 2)
GOTO(out, rc = -EAGAIN);
if (cfs_time_sub(now, last_warn) > 60 * 30)
warn = 1;
/* Find all the OSTs that are valid stripe candidates */
- for (i = 0; i < ost_count; i++) {
+ for (i = 0; i < osts->op_count; i++) {
__u64 bavail;
- if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_active)
+ if (!lov->lov_tgts[osts->op_array[i]] ||
+ !lov->lov_tgts[osts->op_array[i]]->ltd_active)
continue;
- bavail = TGT_BAVAIL(i);
+ bavail = TGT_BAVAIL(osts->op_array[i]);
if (!bavail) {
if (warn) {
CDEBUG(D_QOS, "no free space on %s\n",
- obd_uuid2str(&lov->lov_tgts[i]->ltd_uuid));
+ obd_uuid2str(&lov->lov_tgts[osts->op_array[i]]->ltd_uuid));
last_warn = now;
}
continue;
}
- if (!TGT_FFREE(i)) {
+ if (!TGT_FFREE(osts->op_array[i])) {
if (warn) {
CDEBUG(D_QOS, "no free inodes on %s\n",
- obd_uuid2str(&lov->lov_tgts[i]->ltd_uuid));
+ obd_uuid2str(&lov->lov_tgts[osts->op_array[i]]->ltd_uuid));
last_warn = now;
}
continue;
/* Fail Check before osc_precreate() is called
so we can only 'fail' single OSC. */
- if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OSC_PRECREATE) && i == 0)
+ if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OSC_PRECREATE) && osts->op_array[i] == 0)
continue;
- if (obd_precreate(lov->lov_tgts[i]->ltd_exp) > 2)
+ if (obd_precreate(lov->lov_tgts[osts->op_array[i]]->ltd_exp) > 2)
continue;
- lov->lov_tgts[i]->ltd_qos.ltq_usable = 1;
- qos_calc_weight(lov, i);
+ lov->lov_tgts[osts->op_array[i]]->ltd_qos.ltq_usable = 1;
+ qos_calc_weight(lov, osts->op_array[i]);
total_bavail += bavail;
- total_weight += lov->lov_tgts[i]->ltd_qos.ltq_weight;
+ total_weight += lov->lov_tgts[osts->op_array[i]]->ltd_qos.ltq_weight;
good_osts++;
}
+#ifdef QOS_DEBUG
+ CDEBUG(D_QOS, "found %d good osts\n", good_osts);
+#endif
+
if (good_osts < stripe_cnt_min)
GOTO(out, rc = -EAGAIN);
/* On average, this will hit larger-weighted osts more often.
0-weight osts will always get used last (only when rand=0).*/
- for (i = 0; i < ost_count; i++) {
- if (!lov->lov_tgts[i] ||
- !lov->lov_tgts[i]->ltd_qos.ltq_usable)
+ for (i = 0; i < osts->op_count; i++) {
+ if (!lov->lov_tgts[osts->op_array[i]] ||
+ !lov->lov_tgts[osts->op_array[i]]->ltd_qos.ltq_usable)
continue;
- cur_weight += lov->lov_tgts[i]->ltd_qos.ltq_weight;
+ cur_weight += lov->lov_tgts[osts->op_array[i]]->ltd_qos.ltq_weight;
+#ifdef QOS_DEBUG
+ CDEBUG(D_QOS, "stripe_cnt=%d nfound=%d cur_weight="LPU64
+ " rand="LPU64" total_weight="LPU64"\n",
+ *stripe_cnt, nfound, cur_weight, rand, total_weight);
+#endif
if (cur_weight >= rand) {
#ifdef QOS_DEBUG
CDEBUG(D_QOS, "assigned stripe=%d to idx=%d\n",
- nfound, i);
+ nfound, osts->op_array[i]);
#endif
- idx_arr[nfound++] = i;
- qos_used(lov, i, &total_weight);
+ idx_arr[nfound++] = osts->op_array[i];
+ qos_used(lov, osts, osts->op_array[i], &total_weight);
rc = 0;
break;
}
LASSERT(nfound == *stripe_cnt);
out:
+ if (pool != NULL)
+ read_unlock(&pool_tgt_rwlock(pool));
+
up_write(&lov->lov_qos.lq_rw_sem);
out_nolock:
if (rc == -EAGAIN)
- rc = alloc_rr(lov, idx_arr, stripe_cnt, flags);
+ rc = alloc_rr(lov, idx_arr, stripe_cnt, poolname, flags);
lov_putref(exp->exp_obd);
RETURN(rc);
if (newea ||
lsm->lsm_oinfo[0]->loi_ost_idx >= lov->desc.ld_tgt_count)
- rc = alloc_qos(exp, tmp_arr, &stripe_cnt, flags);
+ rc = alloc_qos(exp, tmp_arr, &stripe_cnt,
+ lsm->lsm_pool_name, flags);
else
rc = alloc_specific(lov, lsm, tmp_arr);
LASSERT(S_ISDIR(mdd_object_type(obj)));
lum = (struct lov_user_md*)buf->lb_buf;
- /* if { size, offset, count } = { 0, -1, 0 } (i.e. all default
+ /* if { size, offset, count } = { 0, -1, 0 } and no pool (i.e. all default
* values specified) then delete default striping from dir. */
- if ((lum->lmm_stripe_size == 0 && lum->lmm_stripe_count == 0 &&
- lum->lmm_stripe_offset == (typeof(lum->lmm_stripe_offset))(-1)) ||
- /* lmm_stripe_size == -1 is deprecated in 1.4.6 */
- lum->lmm_stripe_size == (typeof(lum->lmm_stripe_size))(-1)){
+ if (lum->lmm_stripe_size == 0 && lum->lmm_stripe_count == 0 &&
+ lum->lmm_stripe_offset == (typeof(lum->lmm_stripe_offset))(-1) &&
+ lum->lmm_magic != LOV_USER_MAGIC_V3) {
rc = mdd_xattr_set_txn(env, obj, &LU_BUF_NULL,
MDS_LOV_MD_NAME, 0, handle);
if (rc == -ENODATA)
if (lmmp == NULL && lmm_size == 0) {
struct mdd_device *mdd = mdd_obj2mdd_dev(child);
struct lov_mds_md *lmm = mdd_max_lmm_get(env, mdd);
- int size = sizeof(*lmm);
+ int size = sizeof(struct lov_mds_md_v3);
/* Get parent dir stripe and set */
if (pobj != NULL)
{
struct mds_obd *mds = &obd->u.mds;
int j;
+ struct lov_ost_data_v1 *lmm_objects;
ENTRY;
/* if we create file without objects - lmm is NULL */
if (lmm == NULL)
return;
+ if (le32_to_cpu(lmm->lmm_magic) == LOV_MAGIC_V3)
+ lmm_objects = ((struct lov_mds_md_v3 *)lmm)->lmm_objects;
+ else
+ lmm_objects = lmm->lmm_objects;
+
for (j = 0; j < le32_to_cpu(lmm->lmm_stripe_count); j++) {
- int i = le32_to_cpu(lmm->lmm_objects[j].l_ost_idx);
- obd_id id = le64_to_cpu(lmm->lmm_objects[j].l_object_id);
+ int i = le32_to_cpu(lmm_objects[j].l_ost_idx);
+ obd_id id = le64_to_cpu(lmm_objects[j].l_object_id);
int page = i / OBJID_PER_PAGE();
int idx = i % OBJID_PER_PAGE();
obd_id *data = mds->mds_lov_page_array[page];
if (rc || !(ma->ma_valid & MA_LOV))
RETURN(rc);
- LASSERT(le32_to_cpu(ma->ma_lmm->lmm_magic) == LOV_MAGIC);
+ LASSERTF(le32_to_cpu(ma->ma_lmm->lmm_magic) == LOV_MAGIC_V1 ||
+ le32_to_cpu(ma->ma_lmm->lmm_magic) == LOV_MAGIC_V3,
+ "%08x", le32_to_cpu(ma->ma_lmm->lmm_magic));
+
if ((int)le32_to_cpu(ma->ma_lmm->lmm_stripe_count) < 0)
stripe = mdd2obd_dev(mdd)->u.mds.mds_lov_desc.ld_tgt_count;
else
if (rc)
GOTO(err_objects, rc);
- mds->mds_max_mdsize = sizeof(struct lov_mds_md);
+ mds->mds_max_mdsize = sizeof(struct lov_mds_md_v3);
mds->mds_max_cookiesize = sizeof(struct llog_cookie);
err_pop:
int mds_notify(struct obd_device *obd, struct obd_device *watched,
enum obd_notify_event ev, void *data);
int mds_convert_lov_ea(struct obd_device *obd, struct inode *inode,
- struct lov_mds_md *lmm, int lmm_size);
+ struct lov_mds_md *lmm, int lmm_size,
+ __u64 connect_flags);
int mds_init_lov_desc(struct obd_device *obd, struct obd_export *osc_exp);
int mds_obd_create(struct obd_export *exp, struct obdo *oa,
stripes = min_t(__u32, LOV_MAX_STRIPE_COUNT,
mds->mds_lov_desc.ld_tgt_count);
- mds->mds_max_mdsize = lov_mds_md_size(stripes);
+ mds->mds_max_mdsize = lov_mds_md_size(stripes, LOV_MAGIC_V3);
mds->mds_max_cookiesize = stripes * sizeof(struct llog_cookie);
CDEBUG(D_CONFIG, "updated max_mdsize/max_cookiesize for %d stripes: "
"%d/%d\n", mds->mds_max_mdsize, mds->mds_max_cookiesize,
RETURN(0);
}
+static void mgs_revoke_lock(struct obd_device *obd, char *fsname,
+ struct lustre_handle *lockh)
+{
+ int lockrc;
+
+ if (fsname[0]) {
+ lockrc = mgs_get_cfg_lock(obd, fsname, lockh);
+ if (lockrc != ELDLM_OK)
+ CERROR("lock error %d for fs %s\n", lockrc,
+ fsname);
+ else
+ mgs_put_cfg_lock(lockh);
+ }
+}
+
/* rc=0 means ok
1 means update
<0 means error */
struct obd_device *obd = req->rq_export->exp_obd;
struct mgs_send_param *msp, *rep_msp;
struct lustre_handle lockh;
- int lockrc, rc;
+ int rc;
struct lustre_cfg_bufs bufs;
struct lustre_cfg *lcfg;
char fsname[MTI_NAME_MAXLEN];
RETURN(rc);
}
- /* Revoke lock so everyone updates. Should be alright if
- * someone was already reading while we were updating the logs,
- * so we don't really need to hold the lock while we're
- * writing.
- */
- if (fsname[0]) {
- lockrc = mgs_get_cfg_lock(obd, fsname, &lockh);
- if (lockrc != ELDLM_OK)
- CERROR("lock error %d for fs %s\n", lockrc,
- fsname);
- else
- mgs_put_cfg_lock(&lockh);
- }
+ /* request for update */
+ mgs_revoke_lock(obd, fsname, &lockh);
+
lustre_cfg_free(lcfg);
rc = req_capsule_server_pack(&req->rq_pill);
RETURN(0);
}
+static int mgs_extract_fs_pool(char * arg, char *fsname, char *poolname)
+{
+ char *ptr;
+
+ ENTRY;
+ for (ptr = arg; (*ptr != '\0') && (*ptr != '.'); ptr++ ) {
+ *fsname = *ptr;
+ fsname++;
+ }
+ if (*ptr == '\0')
+ return -EINVAL;
+ *fsname = '\0';
+ ptr++;
+ strcpy(poolname, ptr);
+
+ RETURN(0);
+}
+
+static int mgs_iocontrol_pool(struct obd_device *obd,
+ struct obd_ioctl_data *data)
+{
+ int rc;
+ struct lustre_handle lockh;
+ struct lustre_cfg *lcfg = NULL;
+ struct llog_rec_hdr rec;
+ char *fsname = NULL;
+ char *poolname = NULL;
+ ENTRY;
+
+ OBD_ALLOC(fsname, MTI_NAME_MAXLEN);
+ if (fsname == NULL)
+ RETURN(-ENOMEM);
+
+ OBD_ALLOC(poolname, MAXPOOLNAME + 1);
+ if (poolname == NULL) {
+ rc = -ENOMEM;
+ GOTO(out_pool, rc);
+ }
+ rec.lrh_len = llog_data_len(data->ioc_plen1);
+
+ if (data->ioc_type == LUSTRE_CFG_TYPE) {
+ rec.lrh_type = OBD_CFG_REC;
+ } else {
+ CERROR("unknown cfg record type:%d \n", data->ioc_type);
+ rc = -EINVAL;
+ GOTO(out_pool, rc);
+ }
+
+ if (data->ioc_plen1 > CFS_PAGE_SIZE) {
+ rc = -E2BIG;
+ GOTO(out_pool, rc);
+ }
+
+ OBD_ALLOC(lcfg, data->ioc_plen1);
+ if (lcfg == NULL) {
+ rc = -ENOMEM;
+ GOTO(out_pool, rc);
+ }
+ rc = copy_from_user(lcfg, data->ioc_pbuf1, data->ioc_plen1);
+ if (rc)
+ GOTO(out_pool, rc);
+
+ if (lcfg->lcfg_bufcount < 2) {
+ rc = -EINVAL;
+ GOTO(out_pool, rc);
+ }
+
+ /* first arg is always <fsname>.<poolname> */
+ mgs_extract_fs_pool(lustre_cfg_string(lcfg, 1), fsname,
+ poolname);
+
+ switch (lcfg->lcfg_command) {
+ case LCFG_POOL_NEW: {
+ if (lcfg->lcfg_bufcount != 2)
+ RETURN(-EINVAL);
+ rc = mgs_pool_cmd(obd, LCFG_POOL_NEW, fsname,
+ poolname, NULL);
+ break;
+ }
+ case LCFG_POOL_ADD: {
+ if (lcfg->lcfg_bufcount != 3)
+ RETURN(-EINVAL);
+ rc = mgs_pool_cmd(obd, LCFG_POOL_ADD, fsname, poolname,
+ lustre_cfg_string(lcfg, 2));
+ break;
+ }
+ case LCFG_POOL_REM: {
+ if (lcfg->lcfg_bufcount != 3)
+ RETURN(-EINVAL);
+ rc = mgs_pool_cmd(obd, LCFG_POOL_REM, fsname, poolname,
+ lustre_cfg_string(lcfg, 2));
+ break;
+ }
+ case LCFG_POOL_DEL: {
+ if (lcfg->lcfg_bufcount != 2)
+ RETURN(-EINVAL);
+ rc = mgs_pool_cmd(obd, LCFG_POOL_DEL, fsname,
+ poolname, NULL);
+ break;
+ }
+ default: {
+ rc = -EINVAL;
+ GOTO(out_pool, rc);
+ }
+ }
+
+ if (rc) {
+ CERROR("OBD_IOC_POOL err %d, cmd %X for pool %s.%s\n",
+ rc, lcfg->lcfg_command, fsname, poolname);
+ GOTO(out_pool, rc);
+ }
+
+ /* request for update */
+ mgs_revoke_lock(obd, fsname, &lockh);
+
+out_pool:
+ if (lcfg != NULL)
+ OBD_FREE(lcfg, data->ioc_plen1);
+
+ if (fsname != NULL)
+ OBD_FREE(fsname, MTI_NAME_MAXLEN);
+
+ if (poolname != NULL)
+ OBD_FREE(poolname, MAXPOOLNAME + 1);
+
+ RETURN(rc);
+}
+
/* from mdt_iocontrol */
int mgs_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
void *karg, void *uarg)
struct lustre_cfg *lcfg;
struct llog_rec_hdr rec;
char fsname[MTI_NAME_MAXLEN];
- int lockrc;
rec.lrh_len = llog_data_len(data->ioc_plen1);
someone was already reading while we were updating the logs,
so we don't really need to hold the lock while we're
writing (above). */
- if (fsname[0]) {
- lockrc = mgs_get_cfg_lock(obd, fsname, &lockh);
- if (lockrc != ELDLM_OK)
- CERROR("lock error %d for fs %s\n", lockrc,
- fsname);
- else
- mgs_put_cfg_lock(&lockh);
- }
+ mgs_revoke_lock(obd, fsname, &lockh);
out_free:
OBD_FREE(lcfg, data->ioc_plen1);
RETURN(rc);
}
+ case OBD_IOC_POOL: {
+ RETURN(mgs_iocontrol_pool(obd, data));
+ }
+
case OBD_IOC_DUMP_LOG: {
struct llog_ctxt *ctxt;
ctxt = llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT);
int mgs_erase_logs(struct obd_device *obd, char *fsname);
int mgs_setparam(struct obd_device *obd, struct lustre_cfg *lcfg, char *fsname);
+int mgs_pool_cmd(struct obd_device *obd, enum lcfg_command_type cmd,
+ char *poolname, char *fsname, char *ostname);
+
/* mgs_fs.c */
int mgs_fs_setup(struct obd_device *obd, struct vfsmount *mnt);
int mgs_fs_cleanup(struct obd_device *obddev);
RETURN(rc);
}
+static int mgs_write_log_pool(struct obd_device *obd, char *logname, struct fs_db *fsdb,
+ char *lovname,
+ enum lcfg_command_type cmd,
+ char *poolname, char *fsname,
+ char *ostname, char *comment)
+{
+ struct llog_handle *llh = NULL;
+ int rc;
+
+ rc = record_start_log(obd, &llh, logname);
+ if (rc)
+ RETURN(rc);
+ rc = record_marker(obd, llh, fsdb, CM_START, lovname, comment);
+ record_base(obd, llh, lovname, 0, cmd, poolname, fsname, ostname, 0);
+ rc = record_marker(obd, llh, fsdb, CM_END, lovname, comment);
+ rc = record_end_log(obd, &llh);
+
+ return(rc);
+}
+
+int mgs_pool_cmd(struct obd_device *obd, enum lcfg_command_type cmd,
+ char *fsname, char *poolname, char *ostname)
+{
+ struct fs_db *fsdb;
+ char mdt_index[16];
+ char *lovname;
+ char *logname;
+ char *label, *canceled_label = NULL;
+ int label_sz;
+ struct mgs_target_info *mti;
+ int rc;
+ ENTRY;
+
+ rc = mgs_find_or_make_fsdb(obd, fsname, &fsdb);
+ if (rc) {
+ CERROR("Can't get db for %s\n", fsname);
+ RETURN(rc);
+ }
+ if (fsdb->fsdb_flags & FSDB_LOG_EMPTY) {
+ CERROR("%s is not defined\n", fsname);
+ mgs_free_fsdb(obd, fsdb);
+ RETURN(-EINVAL);
+ }
+
+ label_sz = 10 + strlen(fsname) + strlen(poolname);
+
+ /* check if ostname match fsname */
+ if (ostname != NULL) {
+ char *ptr;
+
+ ptr = strrchr(ostname, '-');
+ if ((ptr == NULL) ||
+ (strncmp(fsname, ostname, ptr-ostname) != 0))
+ RETURN(-EINVAL);
+ label_sz += strlen(ostname);
+ }
+
+ OBD_ALLOC(label, label_sz);
+ if (label == NULL)
+ RETURN(-ENOMEM);
+
+ switch(cmd) {
+ case LCFG_POOL_NEW: {
+ sprintf(label,
+ "new %s.%s", fsname, poolname);
+ break;
+ }
+ case LCFG_POOL_ADD: {
+ sprintf(label,
+ "add %s.%s.%s", fsname, poolname, ostname);
+ break;
+ }
+ case LCFG_POOL_REM: {
+ OBD_ALLOC(canceled_label, label_sz);
+ if (canceled_label == NULL)
+ RETURN(-ENOMEM);
+ sprintf(label,
+ "rem %s.%s.%s", fsname, poolname, ostname);
+ sprintf(canceled_label,
+ "add %s.%s.%s", fsname, poolname, ostname);
+ break;
+ }
+ case LCFG_POOL_DEL: {
+ OBD_ALLOC(canceled_label, label_sz);
+ if (canceled_label == NULL)
+ RETURN(-ENOMEM);
+ sprintf(label,
+ "del %s.%s", fsname, poolname);
+ sprintf(canceled_label,
+ "new %s.%s", fsname, poolname);
+ break;
+ }
+ default: {
+ break;
+ }
+ }
+
+ down(&fsdb->fsdb_sem);
+
+ sprintf(mdt_index, "-MDT%04x", 0);
+ name_create(&logname, fsname, mdt_index);
+ name_create(&lovname, logname, "-mdtlov");
+
+ mti = NULL;
+ if (canceled_label != NULL) {
+ OBD_ALLOC(mti, sizeof(*mti));
+ if (mti != NULL) {
+ strcpy(mti->mti_svname, "lov pool");
+ mgs_modify(obd, fsdb, mti, logname, lovname,
+ canceled_label, CM_SKIP);
+ }
+ }
+
+ mgs_write_log_pool(obd, logname, fsdb, lovname,
+ cmd, fsname, poolname, ostname, label);
+ name_destroy(&logname);
+
+ name_create(&logname, fsname, "-client");
+ if (canceled_label != NULL) {
+ mgs_modify(obd, fsdb, mti, logname, lovname,
+ canceled_label, CM_SKIP);
+ }
+ mgs_write_log_pool(obd, logname, fsdb, fsdb->fsdb_clilov,
+ cmd, fsname, poolname, ostname, label);
+ name_destroy(&logname);
+ name_destroy(&lovname);
+
+ up(&fsdb->fsdb_sem);
+
+ OBD_FREE(label, label_sz);
+ if (canceled_label != NULL)
+ OBD_FREE(canceled_label, label_sz);
+
+ if (mti != NULL)
+ OBD_FREE(mti, sizeof(*mti));
+
+ RETURN(rc);
+}
+
#if 0
/******************** unused *********************/
static int mgs_backup_llog(struct obd_device *obd, char* fsname)
void dump_lsm(int level, struct lov_stripe_md *lsm)
{
CDEBUG(level, "lsm %p, objid "LPX64", maxbytes "LPX64", magic 0x%08X, "
- "stripe_size %u, stripe_count %u\n", lsm,
+ "stripe_size %u, stripe_count %u pool "POOLNAMEF"\n", lsm,
lsm->lsm_object_id, lsm->lsm_maxbytes, lsm->lsm_magic,
- lsm->lsm_stripe_size, lsm->lsm_stripe_count);
+ lsm->lsm_stripe_size, lsm->lsm_stripe_count,
+ lsm->lsm_pool_name);
}
/* XXX assumes only a single page in request */
return n;
}
-int lprocfs_add_simple(struct proc_dir_entry *root, char *name,
- read_proc_t *read_proc, write_proc_t *write_proc,
- void *data)
-{
- struct proc_dir_entry *proc;
+cfs_proc_dir_entry_t *lprocfs_add_simple(struct proc_dir_entry *root,
+ char *name,
+ read_proc_t *read_proc,
+ write_proc_t *write_proc,
+ void *data,
+ struct file_operations *fops)
+{
+ cfs_proc_dir_entry_t *proc;
mode_t mode = 0;
if (root == NULL || name == NULL)
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
if (read_proc)
mode = 0444;
if (write_proc)
mode |= 0200;
+ if (fops)
+ mode = 0644;
proc = create_proc_entry(name, mode, root);
if (!proc) {
CERROR("LprocFS: No memory to create /proc entry %s", name);
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
}
proc->read_proc = read_proc;
proc->write_proc = write_proc;
proc->data = data;
- return 0;
+ if (fops)
+ proc->proc_fops = fops;
+ return proc;
}
struct proc_dir_entry *lprocfs_add_symlink(const char *name,
"change_qunit_size",
"alt_checksum_algorithm",
"fid_is_enabled",
+ "version_recovery",
+ "pools",
NULL
};
LPROCFS_OBD_OP_INIT(num_private_stats,stats,unregister_page_removal_cb);
LPROCFS_OBD_OP_INIT(num_private_stats, stats, register_lock_cancel_cb);
LPROCFS_OBD_OP_INIT(num_private_stats, stats,unregister_lock_cancel_cb);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, pool_new);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, pool_rem);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, pool_add);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, pool_del);
}
int lprocfs_alloc_obd_stats(struct obd_device *obd, unsigned num_private_stats)
int rc = 0;
struct nid_stat *tmp = NULL, *tmp1;
struct obd_device *obd = NULL;
+ cfs_proc_dir_entry_t *entry;
ENTRY;
*newnid = 0;
GOTO(destroy_new, rc = -ENOMEM);
}
- rc = lprocfs_add_simple(tmp->nid_proc, "uuid",
- lprocfs_exp_rd_uuid, NULL, tmp);
- if (rc)
+ entry = lprocfs_add_simple(tmp->nid_proc, "uuid",
+ lprocfs_exp_rd_uuid, NULL, tmp, NULL);
+ if (IS_ERR(entry)) {
CWARN("Error adding the uuid file\n");
+ rc = PTR_ERR(entry);
+ }
- rc = lprocfs_add_simple(tmp->nid_proc, "hash",
- lprocfs_exp_rd_hash, NULL, tmp);
- if (rc)
+ entry = lprocfs_add_simple(tmp->nid_proc, "hash",
+ lprocfs_exp_rd_hash, NULL, tmp, NULL);
+ if (IS_ERR(entry)) {
CWARN("Error adding the hash file\n");
+ rc = PTR_ERR(entry);
+ }
exp->exp_nid_stats = tmp;
*newnid = 1;
err = class_del_conn(obd, lcfg);
GOTO(out, err = 0);
}
+ case LCFG_POOL_NEW: {
+ err = obd_pool_new(obd, lustre_cfg_string(lcfg, 2));
+ GOTO(out, err = 0);
+ break;
+ }
+ case LCFG_POOL_ADD: {
+ err = obd_pool_add(obd, lustre_cfg_string(lcfg, 2),
+ lustre_cfg_string(lcfg, 3));
+ GOTO(out, err = 0);
+ break;
+ }
+ case LCFG_POOL_REM: {
+ err = obd_pool_rem(obd, lustre_cfg_string(lcfg, 2),
+ lustre_cfg_string(lcfg, 3));
+ GOTO(out, err = 0);
+ break;
+ }
+ case LCFG_POOL_DEL: {
+ err = obd_pool_del(obd, lustre_cfg_string(lcfg, 2));
+ GOTO(out, err = 0);
+ break;
+ }
default: {
err = obd_process_config(obd, sizeof(*lcfg), lcfg);
GOTO(out, err);
if (obd->obd_proc_exports_entry)
lprocfs_add_simple(obd->obd_proc_exports_entry, "clear",
lprocfs_nid_stats_clear_read,
- lprocfs_nid_stats_clear_write, obd);
+ lprocfs_nid_stats_clear_write, obd, NULL);
memcpy((void *)addr, lustre_cfg_buf(lcfg, 4),
LUSTRE_CFG_BUFLEN(lcfg, 4));
*/
static int osc_getstripe(struct lov_stripe_md *lsm, struct lov_user_md *lump)
{
- struct lov_user_md lum, *lumk;
+ /* we use lov_user_md_v3 because it is larger than lov_user_md_v1 */
+ struct lov_user_md_v3 lum, *lumk;
+ struct lov_user_ost_data_v1 *lmm_objects;
int rc = 0, lum_size;
ENTRY;
if (!lsm)
RETURN(-ENODATA);
- if (copy_from_user(&lum, lump, sizeof(lum)))
+ /* we only need the header part from user space to get lmm_magic and
+ * lmm_stripe_count, (the header part is common to v1 and v3) */
+ lum_size = sizeof(struct lov_user_md_v1);
+ if (copy_from_user(&lum, lump, lum_size))
RETURN(-EFAULT);
- if (lum.lmm_magic != LOV_USER_MAGIC)
+ if ((lum.lmm_magic != LOV_USER_MAGIC_V1) &&
+ (lum.lmm_magic != LOV_USER_MAGIC_V3))
RETURN(-EINVAL);
+ /* lov_user_md_vX and lov_mds_md_vX must have the same size */
+ LASSERT(sizeof(struct lov_user_md_v1) == sizeof(struct lov_mds_md_v1));
+ LASSERT(sizeof(struct lov_user_md_v3) == sizeof(struct lov_mds_md_v3));
+ LASSERT(sizeof(lum.lmm_objects[0]) == sizeof(lumk->lmm_objects[0]));
+
+ /* we can use lov_mds_md_size() to compute lum_size
+ * because lov_user_md_vX and lov_mds_md_vX have the same size */
if (lum.lmm_stripe_count > 0) {
- lum_size = sizeof(lum) + sizeof(lum.lmm_objects[0]);
+ lum_size = lov_mds_md_size(lum.lmm_stripe_count, lum.lmm_magic);
OBD_ALLOC(lumk, lum_size);
if (!lumk)
RETURN(-ENOMEM);
- lumk->lmm_objects[0].l_object_id = lsm->lsm_object_id;
- lumk->lmm_objects[0].l_object_gr = lsm->lsm_object_gr;
+ if (lum.lmm_magic == LOV_USER_MAGIC_V1)
+ lmm_objects = &(((struct lov_user_md_v1 *)lumk)->lmm_objects[0]);
+ else
+ lmm_objects = &(lumk->lmm_objects[0]);
+ lmm_objects->l_object_id = lsm->lsm_object_id;
} else {
- lum_size = sizeof(lum);
+ lum_size = lov_mds_md_size(0, lum.lmm_magic);
lumk = &lum;
}
CDEBUG(D_OTHER, "\tlmm_stripe_offset: %#x\n", lum->lmm_stripe_offset);
}
-void lustre_swab_lov_user_md(struct lov_user_md *lum)
+static void lustre_swab_lov_user_md_common(struct lov_user_md_v1 *lum)
{
ENTRY;
- CDEBUG(D_IOCTL, "swabbing lov_user_md\n");
__swab32s(&lum->lmm_magic);
__swab32s(&lum->lmm_pattern);
__swab64s(&lum->lmm_object_id);
CDEBUG(D_OTHER, "\tlmm_extent_count: %#x\n", lumj->lmm_extent_count);
}
+void lustre_swab_lov_user_md_v1(struct lov_user_md_v1 *lum)
+{
+ ENTRY;
+ CDEBUG(D_IOCTL, "swabbing lov_user_md v1\n");
+ lustre_swab_lov_user_md_common(lum);
+ EXIT;
+}
+
+void lustre_swab_lov_user_md_v3(struct lov_user_md_v3 *lum)
+{
+ ENTRY;
+ CDEBUG(D_IOCTL, "swabbing lov_user_md v3\n");
+ lustre_swab_lov_user_md_common((struct lov_user_md_v1 *)lum);
+ /* lmm_pool_name nothing to do with char */
+ EXIT;
+}
+
void lustre_swab_lov_user_md_join(struct lov_user_md_join *lumj)
{
ENTRY;
EXIT;
}
-static void print_lum_objs(struct lov_user_md *lum)
+void lustre_swab_lov_user_md_objects(struct lov_user_ost_data *lod,
+ int stripe_count)
{
- struct lov_user_ost_data *lod;
int i;
ENTRY;
- if (!(libcfs_debug & D_OTHER)) /* don't loop on nothing */
- return;
- CDEBUG(D_OTHER, "lov_user_md_objects: %p\n", lum);
- for (i = 0; i < lum->lmm_stripe_count; i++) {
- lod = &lum->lmm_objects[i];
- CDEBUG(D_OTHER, "(%i) lod->l_object_id: "LPX64"\n", i, lod->l_object_id);
- CDEBUG(D_OTHER, "(%i) lod->l_object_gr: "LPX64"\n", i, lod->l_object_gr);
- CDEBUG(D_OTHER, "(%i) lod->l_ost_gen: %#x\n", i, lod->l_ost_gen);
- CDEBUG(D_OTHER, "(%i) lod->l_ost_idx: %#x\n", i, lod->l_ost_idx);
+ for (i = 0; i < stripe_count; i++) {
+ __swab64s(&(lod[i].l_object_id));
+ __swab64s(&(lod[i].l_object_gr));
+ __swab32s(&(lod[i].l_ost_gen));
+ __swab32s(&(lod[i].l_ost_idx));
}
EXIT;
}
-void lustre_swab_lov_user_md_objects(struct lov_user_md *lum)
-{
- struct lov_user_ost_data *lod;
- int i;
- ENTRY;
- for (i = 0; i < lum->lmm_stripe_count; i++) {
- lod = &lum->lmm_objects[i];
- __swab64s(&lod->l_object_id);
- __swab64s(&lod->l_object_gr);
- __swab32s(&lod->l_ost_gen);
- __swab32s(&lod->l_ost_idx);
- }
- print_lum_objs(lum);
- EXIT;
-}
-
-
-void lustre_swab_lov_mds_md(struct lov_mds_md *lmm)
-{
- struct lov_ost_data *lod;
- int i;
- ENTRY;
- for (i = 0; i < lmm->lmm_stripe_count; i++) {
- lod = &lmm->lmm_objects[i];
- __swab64s(&lod->l_object_id);
- __swab64s(&lod->l_object_gr);
- __swab32s(&lod->l_ost_gen);
- __swab32s(&lod->l_ost_idx);
- }
- __swab32s(&lmm->lmm_magic);
- __swab32s(&lmm->lmm_pattern);
- __swab64s(&lmm->lmm_object_id);
- __swab64s(&lmm->lmm_object_gr);
- __swab32s(&lmm->lmm_stripe_size);
- __swab32s(&lmm->lmm_stripe_count);
-
- EXIT;
-}
-
void lustre_swab_ldlm_res_id (struct ldlm_res_id *id)
{
EXPORT_SYMBOL(lustre_swab_mds_rec_rename);
EXPORT_SYMBOL(lustre_swab_mdt_rec_reint);
EXPORT_SYMBOL(lustre_swab_lov_desc);
-EXPORT_SYMBOL(lustre_swab_lov_user_md);
-EXPORT_SYMBOL(lustre_swab_lov_mds_md);
+EXPORT_SYMBOL(lustre_swab_lov_user_md_v1);
+EXPORT_SYMBOL(lustre_swab_lov_user_md_v3);
EXPORT_SYMBOL(lustre_swab_lov_user_md_objects);
EXPORT_SYMBOL(lustre_swab_lov_user_md_join);
EXPORT_SYMBOL(lustre_swab_ldlm_res_id);
return rc;
}
- lum_size = lov_mds_md_size(MAX_LOV_UUID_COUNT);
+ lum_size = lov_mds_md_size(MAX_LOV_UUID_COUNT, LOV_MAGIC);
if ((lum_dir = (struct lov_user_md *)malloc(lum_size)) == NULL) {
rc = ENOMEM;
llapi_err(LLAPI_MSG_ERROR, "error: can't allocate %d bytes "
return 1;
}
- lum_size = lov_mds_md_size(MAX_LOV_UUID_COUNT);
+ lum_size = lov_mds_md_size(MAX_LOV_UUID_COUNT, LOV_MAGIC);
if ((lum_file = (struct lov_user_md *)malloc(lum_size)) == NULL) {
fprintf(stderr, "unable to allocate memory for ioctl's");
init_test_env $@
. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
+[ -n "$LOAD" ] && load_modules && exit 0
[ -z "$NOFORMAT" ] && formatall
[ -z "$NOSETUP" ] && setupall
run_test 99a "cvs init ========================================="
test_99b() {
+ [ -z "$(which cvs 2>/dev/null)" ] && skip "could not find cvs" && return
[ ! -d $DIR/d99cvsroot ] && test_99a
cd /etc/init.d
# some versions of cvs import exit(1) when asked to import links or
run_test 99b "cvs import ======================================="
test_99c() {
+ [ -z "$(which cvs 2>/dev/null)" ] && skip "could not find cvs" && return
[ ! -d $DIR/d99cvsroot ] && test_99b
cd $DIR
mkdir -p $DIR/d99reposname
run_test 99c "cvs checkout ====================================="
test_99d() {
+ [ -z "$(which cvs 2>/dev/null)" ] && skip "could not find cvs" && return
[ ! -d $DIR/d99cvsroot ] && test_99c
cd $DIR/d99reposname
$RUNAS touch foo99
run_test 99d "cvs add =========================================="
test_99e() {
+ [ -z "$(which cvs 2>/dev/null)" ] && skip "could not find cvs" && return
[ ! -d $DIR/d99cvsroot ] && test_99c
cd $DIR/d99reposname
$RUNAS cvs update
run_test 99e "cvs update ======================================="
test_99f() {
+ [ -z "$(which cvs 2>/dev/null)" ] && skip "could not find cvs" && return
[ ! -d $DIR/d99cvsroot ] && test_99d
cd $DIR/d99reposname
$RUNAS cvs commit -m 'nomsg' foo99
run_test 121 "read cancel race ========="
test_123a() { # was test 123, statahead(bug 11401)
+ SLOWOK=0
if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
log "testing on UP system. Performance may be not as good as expected."
+ SLOWOK=1
fi
remount_client $MOUNT
lctl get_param -n llite.*.statahead_stats
# wait for commitment of removal
sleep 2
- [ $error -ne 0 ] && error "statahead is slow!"
+ [ $error -ne 0 -a $SLOWOK -eq 0 ] && error "statahead is slow!"
return 0
}
run_test 123a "verify statahead work"
}
run_test 130e "FIEMAP (test continuation FIEMAP calls)"
+POOL=${POOL:-cea1}
+TGT_COUNT=$OSTCOUNT
+TGTPOOL_FIRST=1
+TGTPOOL_MAX=$(($TGT_COUNT - 1))
+TGTPOOL_STEP=2
+TGTPOOL_LIST=`seq $TGTPOOL_FIRST $TGTPOOL_STEP $TGTPOOL_MAX`
+POOL_ROOT=${POOL_ROOT:-$DIR/d200.pools}
+POOL_DIR=$POOL_ROOT/dir_tst
+POOL_FILE=$POOL_ROOT/file_tst
+
+check_file_in_pool()
+{
+ file=$1
+ res=$($GETSTRIPE $file | grep 0x | cut -f2)
+ for i in $res
+ do
+ found=$(echo :$TGTPOOL_LIST: | tr " " ":" | grep :$i:)
+ if [[ "$found" == "" ]]
+ then
+ echo "pool list: $TGTPOOL_LIST"
+ echo "striping: $res"
+ error "$file not allocated in $POOL"
+ return 1
+ fi
+ done
+ return 0
+}
+
+test_200() {
+ do_facet mgs $LCTL pool_new $FSNAME.$POOL
+ do_facet mgs $LCTL get_param -n lov.$FSNAME-MDT0000-mdtlov.pools.$POOL
+ [ $? == 0 ] || error "Pool creation of $POOL failed"
+}
+run_test 200 "Create new pool =========================================="
+
+test_201() {
+ TGT=$(seq -f $FSNAME-OST%04g_UUID $TGTPOOL_FIRST $TGTPOOL_STEP \
+ $TGTPOOL_MAX | tr '\n' ' ')
+ do_facet mgs $LCTL pool_add $FSNAME.$POOL \
+ $FSNAME-OST[$TGTPOOL_FIRST-$TGTPOOL_MAX/$TGTPOOL_STEP]_UUID
+ res=$(do_facet mgs $LCTL get_param -n lov.$FSNAME-MDT0000-mdtlov.pools.$POOL | sort \
+ | tr '\n' ' ')
+ [ "$res" = "$TGT" ] || error "Pool content ($res) do not match requested ($TGT)"
+}
+run_test 201 "Add targets to a pool ===================================="
+
+test_202a() {
+ mkdir -p $POOL_DIR
+ $SETSTRIPE -c 2 -p $POOL $POOL_DIR
+ [ $? = 0 ] || error "Cannot set pool $POOL to $POOL_DIR"
+}
+run_test 202a "Set pool on a directory ================================="
+
+test_202b() {
+ res=$($GETSTRIPE $POOL_DIR | grep pool: | cut -f8 -d " ")
+ [ "$res" = $POOL ] || error "Pool on $POOL_DIR is not $POOL"
+}
+run_test 202b "Check pool on a directory ==============================="
+
+test_202c() {
+ failed=0
+ for i in $(seq -w 1 $(($TGT_COUNT * 3)))
+ do
+ file=$POOL_DIR/file-$i
+ touch $file
+ check_file_in_pool $file
+ if [[ $? != 0 ]]
+ then
+ failed=$(($failed + 1))
+ fi
+ done
+ [ "$failed" = 0 ] || error "$failed files not allocated in $POOL"
+}
+run_test 202c "Check files allocation from directory pool =============="
+
+test_203() {
+ mkdir -p $POOL_FILE
+ failed=0
+ for i in $(seq -w 1 $(($TGT_COUNT * 3)))
+ do
+ file=$POOL_FILE/spoo-$i
+ $SETSTRIPE -p $POOL $file
+ check_file_in_pool $file
+ if [[ $? != 0 ]]
+ then
+ failed=$(($failed + 1))
+ fi
+ done
+ [ "$failed" = 0 ] || error "$failed files not allocated in $POOL"
+}
+run_test 203 "Create files in a pool ==================================="
+
+test_210a() {
+ TGT=$(do_facet mgs $LCTL get_param -n lov.$FSNAME-MDT0000-mdtlov.pools.$POOL | head -1)
+ do_facet mgs $LCTL pool_remove $FSNAME.$POOL $TGT
+ res=$(do_facet mgs $LCTL get_param -n lov.$FSNAME-MDT0000-mdtlov.pools.$POOL | grep $TGT)
+ [ "$res" = "" ] || error "$TGT not removed from $FSNAME.$POOL"
+}
+run_test 210a "Remove a target from a pool ============================="
+
+test_210b() {
+ for TGT in $(do_facet mgs $LCTL get_param -n lov.$FSNAME-MDT0000-mdtlov.pools.$POOL)
+ do
+ do_facet mgs $LCTL pool_remove $FSNAME.$POOL $TGT
+ done
+ res=$(do_facet mgs $LCTL get_param -n lov.$FSNAME-MDT0000-mdtlov.pools.$POOL)
+ [ "$res" = "" ] || error "Pool $FSNAME.$POOL cannot be drained"
+}
+run_test 210b "Remove all targets from a pool =========================="
+
+test_211() {
+ do_facet mgs $LCTL pool_destroy $FSNAME.$POOL
+ res=$(do_facet mgs "$LCTL get_param -n lov.$FSNAME-MDT0000-mdtlov.pools.$POOL 2>/dev/null")
+ [ "$res" = "" ] || error "Pool $FSNAME.$POOL is not destroyed"
+}
+run_test 211 "Remove a pool ============================================"
+
TMPDIR=$OLDTMPDIR
TMP=$OLDTMP
HOME=$OLDHOME
export LUSTRE=`absolute_path $LUSTRE`
export TESTSUITE=`basename $0 .sh`
- [ -d /r ] && export ROOT=${ROOT:-/r}
+ #[ -d /r ] && export ROOT=${ROOT:-/r}
export TMP=${TMP:-$ROOT/tmp}
export TESTSUITELOG=${TMP}/${TESTSUITE}.log
export HOSTNAME=${HOSTNAME:-`hostname`}
do_node $client "lctl set_param debug=$PTLDEBUG;
lctl set_param subsystem_debug=${SUBSYSTEM# };
lctl set_param debug_mb=${DEBUG_SIZE}"
- [ -d /r ] && $LCTL modules > /r/tmp/ogdb-$HOSTNAME
+
return 0
}
lib_LIBRARIES = liblustreapi.a libiam.a
lctl_SOURCES = obd.c lustre_cfg.c lctl.c obdctl.h
-lctl_LDADD := $(LIBREADLINE) $(LIBPTLCTL)
+lctl_LDADD := $(LIBREADLINE) liblustreapi.a $(LIBPTLCTL)
lctl_DEPENDENCIES := $(LIBPTLCTL)
lfs_SOURCES = lfs.c obd.c lustre_cfg.c
lfs_DEPENDENCIES := $(LIBPTLCTL) liblustreapi.a
loadgen_SOURCES = loadgen.c lustre_cfg.c obd.c
-loadgen_LDADD := $(LIBREADLINE) $(LIBPTLCTL) $(PTHREAD_LIBS)
+loadgen_LDADD := $(LIBREADLINE) liblustreapi.a $(LIBPTLCTL) $(PTHREAD_LIBS)
loadgen_DEPENDENCIES := $(LIBPTLCTL)
if EXT2FS_DEVEL
"get the device info of a attached file\n"
"usage: blockdev_info <device_name>"},
+ /* Pool commands */
+ {"=== Pools ==", jt_noop, 0, "pool management"},
+ {"pool_new", jt_pool_cmd, 0,
+ "add a new pool\n"
+ "usage pool_new <fsname>.<poolname>"},
+ {"pool_add", jt_pool_cmd, 0,
+ "add the named OSTs to the pool\n"
+ "usage pool_add <fsname>.<poolname> <ostname indexed list>"},
+ {"pool_remove", jt_pool_cmd, 0,
+ "remove the named OST from the pool\n"
+ "usage pool_remove <fsname>.<poolname> <ostname indexed list>"},
+ {"pool_destroy", jt_pool_cmd, 0,
+ "destroy a pool\n"
+ "usage pool_destroy <fsname>.<poolname>"},
+ {"pool_list", jt_pool_cmd, 0,
+ "list pools and pools members\n"
+ "usage pool_list <fsname>[.<poolname>] | <pathname>"},
+
/* Test only commands */
{"==== testing (DANGEROUS) ====", jt_noop, 0, "testing (DANGEROUS)"},
{"--threads", jt_opt_threads, 0,
* Author: Robert Read <rread@clusterfs.com>
*/
+/* for O_DIRECTORY */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
#include <stdlib.h>
#include <stdio.h>
#include <getopt.h>
static int lfs_rgetfacl(int argc, char **argv);
static int lfs_cp(int argc, char **argv);
static int lfs_ls(int argc, char **argv);
+static int lfs_poollist(int argc, char **argv);
/* all avaialable commands */
command_t cmdlist[] = {
"Create a new file with a specific striping pattern or\n"
"set the default striping pattern on an existing directory or\n"
"delete the default striping pattern from an existing directory\n"
- "usage: setstripe <filename|dirname> <stripe_size> <stripe_index> <stripe_count>\n"
- " or \n"
- " setstripe <filename|dirname> [--size|-s stripe_size]\n"
- " [--index|-i stripe_index]\n"
- " [--count|-c stripe_count]\n"
+ "usage: setstripe [--size|-s stripe_size] [--offset|-o start_ost]\n"
+ " [--count|-c stripe_count] [--pool|-p pool_name]\n"
+ " <dir|filename>\n"
" or \n"
- " setstripe -d <dirname> (to delete default striping)\n"
+ " setstripe -d <dir> (to delete default striping)\n"
"\tstripe_size: Number of bytes on each OST (0 filesystem default)\n"
- "\t Can be specified with k, m or g (in KB, MB and GB respectively)\n"
- "\tstripe_index: OST index of first stripe (-1 filesystem default)\n"
- "\tstripe_count: Number of OSTs to stripe over (0 default, -1 all)"},
+ "\t Can be specified with k, m or g (in KB, MB and GB\n"
+ "\t respectively)\n"
+ "\tstart_ost: OST index of first stripe (-1 filesystem default)\n"
+ "\tstripe_count: Number of OSTs to stripe over (0 default, -1 all)\n"
+ "\tpool_name: Name of OST pool"},
{"getstripe", lfs_getstripe, 0,
- "To list the striping info for a given filename or files in a\n"
+ "To list the striping info for a given file or files in a\n"
"directory or recursively for all files in a directory tree.\n"
"usage: getstripe [--obd|-O <uuid>] [--quiet | -q] [--verbose | -v]\n"
" [--recursive | -r] <dir|file> ..."},
+ {"poollist", lfs_poollist, 0,
+ "List pools or pool OSTs\n"
+ "usage: poollist <fsname>[.<poolname>] | <pathname>\n"},
{"find", lfs_find, 0,
"To find files that match given parameters recursively in a directory tree.\n"
- "usage: find <dir/file> ... \n"
+ "usage: find <dir|file> ... \n"
" [[!] --atime|-A [+-]N] [[!] --mtime|-M [+-]N] [[!] --ctime|-C [+-]N]\n"
" [--maxdepth|-D N] [[!] --name|-n <pattern>] [--print0|-P]\n"
" [--print|-p] [--obd|-O <uuid[s]>] [[!] --size|-s [+-]N[bkMGTP]]\n"
" [[!] --type|-t <filetype>] [[!] --gid|-g N] [[!] --group|-G <name>]\n"
" [[!] --uid|-u N] [[!] --user|-U <name>]\n"
+ " [[!] --pool <name>]\n"
"\t !: used before an option indicates 'NOT' the requested attribute\n"
"\t -: used before an value indicates 'AT MOST' the requested value\n"
"\t +: used before an option indicates 'AT LEAST' the requested value\n"},
char *stripe_size_arg = NULL;
char *stripe_off_arg = NULL;
char *stripe_count_arg = NULL;
+ char *pool_name_arg = NULL;
unsigned long long size_units;
struct option long_opts[] = {
{"size", required_argument, 0, 's'},
{"count", required_argument, 0, 'c'},
{"index", required_argument, 0, 'i'},
+ {"offset", required_argument, 0, 'o'},
+ {"pool", required_argument, 0, 'p'},
{"delete", no_argument, 0, 'd'},
{0, 0, 0, 0}
};
* usage */
fname = argv[2];
optind = 2;
- } else if (argc == 5 &&
+ } else if (argc == 5 &&
(argv[2][0] != '-' || isdigit(argv[2][1])) &&
(argv[3][0] != '-' || isdigit(argv[3][1])) &&
(argv[4][0] != '-' || isdigit(argv[4][1])) ) {
optind = 4;
} else {
optind = 0;
- while ((c = getopt_long(argc, argv, "c:di:s:",
+ while ((c = getopt_long(argc, argv, "c:di:o:s:p:",
long_opts, NULL)) >= 0) {
switch (c) {
case 0:
delete = 1;
break;
case 'i':
+ case 'o':
stripe_off_arg = optarg;
break;
case 's':
stripe_size_arg = optarg;
break;
+ case 'p':
+ pool_name_arg = optarg;
+ break;
case '?':
return CMD_HELP;
default:
return CMD_HELP;
- if (delete &&
- (stripe_size_arg != NULL || stripe_off_arg != NULL ||
- stripe_count_arg != NULL)) {
+ if (delete &&
+ (stripe_size_arg != NULL || stripe_off_arg != NULL ||
+ stripe_count_arg != NULL || pool_name_arg != NULL)) {
fprintf(stderr, "error: %s: cannot specify -d with "
- "-s, -c or -i options\n",
+ "-s, -c -o or -p options\n",
argv[0]);
return CMD_HELP;
}
}
}
- result = llapi_file_create(fname, st_size, st_offset, st_count, 0);
+ if (pool_name_arg == NULL)
+ result = llapi_file_create(fname, st_size, st_offset, st_count, 0);
+ else
+ result = llapi_file_create_pool(fname, st_size, st_offset,
+ st_count, 0, pool_name_arg);
+
if (result)
fprintf(stderr, "error: %s: create stripe file failed\n",
argv[0]);
return result;
}
+static int lfs_poollist(int argc, char **argv)
+{
+ if (argc != 2)
+ return CMD_HELP;
+
+ return llapi_poollist(argv[1]);
+}
+
static int set_time(time_t *time, time_t *set, char *str)
{
time_t t;
int res = 0;
-
+
if (str[0] == '+')
res = 1;
else if (str[0] == '-')
return 0;
}
+#define FIND_POOL_OPT 3
static int lfs_find(int argc, char **argv)
{
int new_fashion = 1;
{"uid", required_argument, 0, 'u'},
{"user", required_argument, 0, 'U'},
{"name", required_argument, 0, 'n'},
+ /* no short option for pool, p/P already used */
+ {"pool", required_argument, 0, FIND_POOL_OPT},
/* --obd is considered as a new option. */
{"obd", required_argument, 0, 'O'},
{"ost", required_argument, 0, 'O'},
new_fashion = 1;
param.gid = strtol(optarg, &endptr, 10);
if (optarg == endptr) {
- ret = name2id(¶m.gid, optarg, GRPQUOTA);
- if (ret != 0) {
+ ret = name2id(¶m.gid, optarg, GRPQUOTA);
+ if (ret != 0) {
fprintf(stderr, "Group/GID: %s cannot "
"be found.\n", optarg);
return -1;
new_fashion = 1;
param.uid = strtol(optarg, &endptr, 10);
if (optarg == endptr) {
- ret = name2id(¶m.uid, optarg, USRQUOTA);
- if (ret != 0) {
+ ret = name2id(¶m.uid, optarg, USRQUOTA);
+ if (ret != 0) {
fprintf(stderr, "User/UID: %s cannot "
"be found.\n", optarg);
return -1;
param.exclude_uid = !!neg_opt;
param.check_uid = 1;
break;
+ case FIND_POOL_OPT:
+ new_fashion = 1;
+ if (strlen(optarg) > MAXPOOLNAME) {
+ fprintf(stderr,
+ "Pool name %s is too long"
+ " (max is %d)\n", optarg,
+ MAXPOOLNAME);
+ return -1;
+ }
+ /* we do check for empty pool because empty pool
+ * is used to find V1 lov attributes */
+ strncpy(param.poolname, optarg, MAXPOOLNAME);
+ param.poolname[MAXPOOLNAME] = '\0';
+ param.exclude_pool = !!neg_opt;
+ param.check_pool = 1;
+ break;
case 'n':
new_fashion = 1;
param.pattern = (char *)optarg;
return CMD_HELP;
};
}
-
+
if (pathstart == -1) {
fprintf(stderr, "error: %s: no filename|pathname\n",
argv[0]);
if (!param.recursive && param.maxdepth == -1)
param.maxdepth = 1;
}
-
+
do {
if (new_fashion)
ret = llapi_find(argv[pathstart], ¶m);
} while (++optind < argc && !rc);
if (rc)
- fprintf(stderr, "error: %s failed for %s.\n",
+ fprintf(stderr, "error: %s failed for %s.\n",
argv[0], argv[optind - 1]);
return rc;
}
* 2. specifiers may be encountered multiple times (2s3s is 5 seconds)
* 3. empty integer value is interpreted as 0
*/
-
+
static unsigned long str2sec(const char* timestr) {
const char spec[] = "smhdw";
const unsigned long mult[] = {1, 60, 60*60, 24*60*60, 7*24*60*60};
v = strtoul(timestr, &tail, 10);
if (v == ULONG_MAX || *tail == '\0')
- /* value too large (ULONG_MAX or more)
+ /* value too large (ULONG_MAX or more)
or missing specifier */
goto error;
#include <sys/types.h>
#include <sys/syscall.h>
#include <fnmatch.h>
+#include <glob.h>
#ifdef HAVE_LINUX_UNISTD_H
#include <linux/unistd.h>
#else
return 0;
}
-int llapi_file_open(const char *name, int flags, int mode,
- unsigned long stripe_size, int stripe_offset,
- int stripe_count, int stripe_pattern)
+int llapi_stripe_limit_check(unsigned long stripe_size, int stripe_offset,
+ int stripe_count, int stripe_pattern)
{
- struct lov_user_md lum = { 0 };
- int fd, rc = 0;
- int isdir = 0;
int page_size;
- fd = open(name, flags | O_LOV_DELAY_CREATE, mode);
- if (fd < 0 && errno == EISDIR) {
- fd = open(name, O_DIRECTORY | O_RDONLY);
- isdir++;
- }
-
- if (fd < 0) {
- rc = -errno;
- llapi_err(LLAPI_MSG_ERROR, "unable to open '%s'", name);
- return rc;
- }
-
/* 64 KB is the largest common page size I'm aware of (on ia64), but
* check the local page size just in case. */
page_size = LOV_MIN_STRIPE_SIZE;
if (getpagesize() > page_size) {
page_size = getpagesize();
- llapi_err_noerrno(LLAPI_MSG_WARN,
+ llapi_err_noerrno(LLAPI_MSG_WARN,
"warning: your page size (%u) is "
- "larger than expected (%u)", page_size,
+ "larger than expected (%u)", page_size,
LOV_MIN_STRIPE_SIZE);
}
if (stripe_size < 0 || (stripe_size & (LOV_MIN_STRIPE_SIZE - 1))) {
- errno = rc = -EINVAL;
llapi_err(LLAPI_MSG_ERROR, "error: bad stripe_size %lu, "
- "must be an even multiple of %d bytes",
+ "must be an even multiple of %d bytes",
stripe_size, page_size);
- goto out;
+ return -EINVAL;
}
if (stripe_offset < -1 || stripe_offset > MAX_OBD_DEVICES) {
- errno = rc = -EINVAL;
- llapi_err(LLAPI_MSG_ERROR, "error: bad stripe offset %d",
+ llapi_err(LLAPI_MSG_ERROR, "error: bad stripe offset %d",
stripe_offset);
- goto out;
+ return -EINVAL;
}
if (stripe_count < -1 || stripe_count > LOV_MAX_STRIPE_COUNT) {
- errno = rc = -EINVAL;
- llapi_err(LLAPI_MSG_ERROR, "error: bad stripe count %d",
+ llapi_err(LLAPI_MSG_ERROR, "error: bad stripe count %d",
stripe_count);
- goto out;
+ return -EINVAL;
}
if (stripe_count > 0 && (__u64)stripe_size * stripe_count > 0xffffffff){
- errno = rc = -EINVAL;
llapi_err(LLAPI_MSG_ERROR, "error: stripe_size %lu * "
- "stripe_count %u exceeds 4GB", stripe_size,
+ "stripe_count %u exceeds 4GB", stripe_size,
stripe_count);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int llapi_file_open(const char *name, int flags, int mode,
+ unsigned long stripe_size, int stripe_offset,
+ int stripe_count, int stripe_pattern)
+{
+ struct lov_user_md lum = { 0 };
+ int fd, rc = 0;
+ int isdir = 0;
+
+ fd = open(name, flags | O_LOV_DELAY_CREATE, mode);
+ if (fd < 0 && errno == EISDIR) {
+ fd = open(name, O_DIRECTORY | O_RDONLY);
+ isdir++;
+ }
+
+ if (fd < 0) {
+ rc = -errno;
+ llapi_err(LLAPI_MSG_ERROR, "unable to open '%s'", name);
+ return rc;
+ }
+
+ if ((rc = llapi_stripe_limit_check(stripe_size, stripe_offset,
+ stripe_count, stripe_pattern)) != 0) {
+ errno = rc;
goto out;
}
return fd;
}
+static int poolpath(char *fsname, char *pathname, char *pool_pathname);
+
+int llapi_file_open_pool(const char *name, int flags, int mode,
+ unsigned long stripe_size, int stripe_offset,
+ int stripe_count, int stripe_pattern, char *pool_name)
+{
+ struct lov_user_md_v3 lum = { 0 };
+ int fd, rc = 0;
+ int isdir = 0;
+ char fsname[MAX_OBD_NAME + 1], *ptr;
+
+ fd = open(name, flags | O_LOV_DELAY_CREATE, mode);
+ if (fd < 0 && errno == EISDIR) {
+ fd = open(name, O_DIRECTORY | O_RDONLY);
+ isdir++;
+ }
+
+ if (fd < 0) {
+ rc = -errno;
+ llapi_err(LLAPI_MSG_ERROR, "unable to open '%s'", name);
+ return rc;
+ }
+
+ if ((rc = llapi_stripe_limit_check(stripe_size, stripe_offset,
+ stripe_count, stripe_pattern)) != 0) {
+ errno = rc;
+ goto out;
+ }
+
+ /* in case user give the full pool name <fsname>.<poolname>, skip
+ * the fsname */
+ ptr = strchr(pool_name, '.');
+ if (ptr != NULL) {
+ strncpy(fsname, pool_name, ptr - pool_name);
+ fsname[ptr - pool_name] = '\0';
+ /* if fsname matches a fs skip it
+ * if not keep the poolname as is */
+ if (poolpath(fsname, NULL, NULL) == 0)
+ pool_name = ptr + 1;
+ }
+
+ /* Initialize IOCTL striping pattern structure */
+ lum.lmm_magic = LOV_USER_MAGIC_V3;
+ lum.lmm_pattern = stripe_pattern;
+ lum.lmm_stripe_size = stripe_size;
+ lum.lmm_stripe_count = stripe_count;
+ lum.lmm_stripe_offset = stripe_offset;
+ strncpy(lum.lmm_pool_name, pool_name, MAXPOOLNAME);
+
+ if (ioctl(fd, LL_IOC_LOV_SETSTRIPE, &lum)) {
+ char *errmsg = "stripe already set";
+ rc = -errno;
+ if (errno != EEXIST && errno != EALREADY)
+ errmsg = strerror(errno);
+
+ llapi_err_noerrno(LLAPI_MSG_ERROR,
+ "error on ioctl "LPX64" for '%s' (%d): %s",
+ (__u64)LL_IOC_LOV_SETSTRIPE, name, fd, errmsg);
+ }
+out:
+ if (rc) {
+ close(fd);
+ fd = rc;
+ }
+
+ return fd;
+}
+
int llapi_file_create(const char *name, unsigned long stripe_size,
int stripe_offset, int stripe_count, int stripe_pattern)
{
return 0;
}
+int llapi_file_create_pool(const char *name, unsigned long stripe_size,
+ int stripe_offset, int stripe_count,
+ int stripe_pattern, char *pool_name)
+{
+ int fd;
+
+ fd = llapi_file_open_pool(name, O_CREAT | O_WRONLY, 0644, stripe_size,
+ stripe_offset, stripe_count, stripe_pattern,
+ pool_name);
+ if (fd < 0)
+ return fd;
+
+ close(fd);
+ return 0;
+}
+
+
+static int print_pool_members(char *fs, char *pool_dir, char *pool_file)
+{
+ char path[PATH_MAX + 1];
+ char buf[1024];
+ FILE *fd;
+
+ llapi_printf(LLAPI_MSG_NORMAL, "Pool: %s.%s\n", fs, pool_file);
+ sprintf(path, "%s/%s", pool_dir, pool_file);
+ if ((fd = fopen(path, "r")) == NULL) {
+ llapi_err(LLAPI_MSG_ERROR, "Cannot open %s\n", path);
+ return -EINVAL;
+ }
+ while (fgets(buf, sizeof(buf), fd) != NULL)
+ llapi_printf(LLAPI_MSG_NORMAL, buf);
+
+ fclose(fd);
+ return 0;
+}
+
+/*
+ * search lustre fsname from pathname
+ *
+ */
+static int search_fsname(char *pathname, char *fsname)
+{
+ char *ptr;
+ FILE *fp;
+ struct mntent *mnt = NULL;
+
+ /* get the mount point */
+ fp = setmntent(MOUNTED, "r");
+ if (fp == NULL) {
+ llapi_err(LLAPI_MSG_ERROR,
+ "setmntent(%s) failed: %s:", MOUNTED,
+ strerror (errno));
+ return -EIO;
+ }
+ mnt = getmntent(fp);
+ while ((feof(fp) == 0) && ferror(fp) == 0) {
+ if (llapi_is_lustre_mnt(mnt)) {
+ /* search by pathname */
+ if (strncmp(mnt->mnt_dir, pathname,
+ strlen(mnt->mnt_dir)) == 0) {
+ ptr = strchr(mnt->mnt_fsname, '/');
+ if (ptr == NULL)
+ return -EINVAL;
+ ptr++;
+ strcpy(fsname, ptr);
+ return 0;
+ }
+ }
+ mnt = getmntent(fp);
+ }
+ endmntent(fp);
+ return -ENOENT;
+
+}
+
+/*
+ * find the pool directory path under /proc
+ * (can be also used to test if a fsname is known)
+ */
+static int poolpath(char *fsname, char *pathname, char *pool_pathname)
+{
+ int rc = 0;
+ glob_t glob_info;
+ char pattern[PATH_MAX + 1];
+ char buffer[PATH_MAX];
+
+ if (fsname == NULL) {
+ rc = search_fsname(pathname, buffer);
+ if (rc != 0)
+ return rc;
+ fsname = buffer;
+ strcpy(pathname, fsname);
+ }
+
+ snprintf(pattern, PATH_MAX,
+ "/proc/fs/lustre/lov/%s-*/pools",
+ fsname);
+ rc = glob(pattern, GLOB_BRACE, NULL, &glob_info);
+ if (rc)
+ return -ENOENT;
+
+ if (glob_info.gl_pathc == 0) {
+ globfree(&glob_info);
+ return -ENOENT;
+ }
+
+ /* in fsname test mode, pool_pathname is NULL */
+ if (pool_pathname != NULL)
+ strcpy(pool_pathname, glob_info.gl_pathv[0]);
+
+ return 0;
+}
+
+int llapi_poollist(char *name)
+{
+ char *poolname;
+ char *fsname;
+ char rname[PATH_MAX + 1], pathname[PATH_MAX + 1];
+ char *ptr;
+ int rc = 0;
+
+ /* is name a pathname ? */
+ ptr = strchr(name, '/');
+ if (ptr != NULL) {
+ /* only absolute pathname is supported */
+ if (*name != '/')
+ return -EINVAL;
+ if (!realpath(name, rname)) {
+ rc = -errno;
+ llapi_err(LLAPI_MSG_ERROR,
+ "llapi_poollist: invalid path '%s'",
+ name);
+ return rc;
+ }
+
+ rc = poolpath(NULL, rname, pathname);
+ if (rc != 0) {
+ errno = -rc;
+ llapi_err(LLAPI_MSG_ERROR,
+ "llapi_poollist: '%s' is not"
+ " a Lustre filesystem",
+ name);
+ return rc;
+ }
+ fsname = rname;
+ poolname = NULL;
+ } else {
+ /* name is FSNAME[.POOLNAME] */
+ fsname = name;
+ poolname = strchr(name, '.');
+ if (poolname != NULL) {
+ *poolname = '\0';
+ poolname++;
+ }
+ rc = poolpath(fsname, NULL, pathname);
+ if (rc != 0) {
+ errno = -rc;
+ llapi_err(LLAPI_MSG_ERROR,
+ "llapi_poollist: Lustre filesystem '%s'"
+ " not found", name);
+ return rc;
+ }
+ }
+ if (rc != 0) {
+ errno = -rc;
+ llapi_err(LLAPI_MSG_ERROR,
+ "llapi_poollist: Lustre filesystem '%s' not found",
+ name);
+ return rc;
+ }
+
+ if (poolname != NULL) {
+ rc = print_pool_members(fsname, pathname, poolname);
+ poolname--;
+ *poolname = '.';
+ } else {
+ DIR *dir;
+ struct dirent *pool;
+
+ llapi_printf(LLAPI_MSG_NORMAL, "Pools from %s:\n", fsname);
+ if ((dir = opendir(pathname)) == NULL) {
+ return -EINVAL;
+ }
+ while ((pool = readdir(dir)) != NULL) {
+ if (!((pool->d_name[0] == '.') &&
+ (pool->d_name[1] == '\0')) &&
+ !((pool->d_name[0] == '.') &&
+ (pool->d_name[1] == '.') &&
+ (pool->d_name[2] == '\0')))
+ llapi_printf(LLAPI_MSG_NORMAL, " %s.%s\n", fsname, pool->d_name);
+ }
+ closedir(dir);
+ }
+ return rc;
+}
+
typedef int (semantic_func_t)(char *path, DIR *parent, DIR *d,
void *data, cfs_dirent_t *de);
static int common_param_init(struct find_param *param)
{
- param->lumlen = lov_mds_md_size(MAX_LOV_UUID_COUNT);
+ param->lumlen = lov_mds_md_size(MAX_LOV_UUID_COUNT, LOV_MAGIC_V3);
if ((param->lmd = malloc(sizeof(lstat_t) + param->lumlen)) == NULL) {
- llapi_err(LLAPI_MSG_ERROR,
+ llapi_err(LLAPI_MSG_ERROR,
"error: allocation of %d bytes for ioctl",
sizeof(lstat_t) + param->lumlen);
return -ENOMEM;
free(param->lmd);
}
-int llapi_file_get_lov_fuuid(int fd, struct obd_uuid *lov_name)
+int llapi_file_fget_lov_uuid(int fd, struct obd_uuid *lov_name)
{
int rc = ioctl(fd, OBD_IOC_GETNAME, lov_name);
if (rc) {
fd = open(path, O_RDONLY);
if (fd < 0) {
rc = errno;
- llapi_err(LLAPI_MSG_ERROR, "error opening %s\n", path);
+ llapi_err(LLAPI_MSG_ERROR, "error opening %s", path);
return rc;
}
- rc = llapi_file_get_lov_fuuid(fd, lov_uuid);
+ rc = llapi_file_fget_lov_uuid(fd, lov_uuid);
close(fd);
int rc = 0, index = 0;
/* Get the lov name */
- rc = llapi_file_get_lov_fuuid(fd, &lov_name);
+ rc = llapi_file_fget_lov_uuid(fd, &lov_name);
if (rc)
return rc;
int rc = 0, index;
/* Get the lov name */
- rc = llapi_file_get_lov_fuuid(dirfd(dir), &lov_uuid);
+ rc = llapi_file_fget_lov_uuid(dirfd(dir), &lov_uuid);
if (rc) {
if (errno != ENOTTY) {
rc = errno;
- llapi_err(LLAPI_MSG_ERROR,
+ llapi_err(LLAPI_MSG_ERROR,
"error: can't get lov name: %s", dname);
} else {
rc = 0;
if (!param->quiet && param->obduuid &&
(param->obdindex == OBD_NOT_FOUND)) {
- llapi_err_noerrno(LLAPI_MSG_ERROR,
+ llapi_err_noerrno(LLAPI_MSG_ERROR,
"error: %s: unknown obduuid: %s",
__FUNCTION__, param->obduuid->uuid);
//rc = EINVAL;
return 0;
}
-void lov_dump_user_lmm_v1(struct lov_user_md_v1 *lum, char *path, int is_dir,
- int obdindex, int quiet, int header, int body)
+void lov_dump_user_lmm_v1v3(struct lov_user_md *lum, char *pool_name,
+ struct lov_user_ost_data_v1 *objects,
+ char *path, int is_dir,
+ int obdindex, int quiet, int header, int body)
{
int i, obdstripe = 0;
if (obdindex != OBD_NOT_FOUND) {
for (i = 0; !is_dir && i < lum->lmm_stripe_count; i++) {
- if (obdindex == lum->lmm_objects[i].l_ost_idx) {
+ if (obdindex == objects[i].l_ost_idx) {
llapi_printf(LLAPI_MSG_NORMAL, "%s\n", path);
obdstripe = 1;
break;
llapi_printf(LLAPI_MSG_NORMAL, "(Default) ");
lum->lmm_object_gr = LOV_OBJECT_GROUP_CLEAR;
}
- llapi_printf(LLAPI_MSG_NORMAL,
+ llapi_printf(LLAPI_MSG_NORMAL,
"stripe_count: %d stripe_size: %u "
- "stripe_offset: %d\n",
+ "stripe_offset: %d%s%s\n",
lum->lmm_stripe_count == (__u16)-1 ? -1 :
- lum->lmm_stripe_count,
+ lum->lmm_stripe_count,
lum->lmm_stripe_size,
lum->lmm_stripe_offset == (__u16)-1 ? -1 :
- lum->lmm_stripe_offset);
+ lum->lmm_stripe_offset,
+ pool_name != NULL ? " pool: " : "",
+ pool_name != NULL ? pool_name : "");
}
return;
}
if (header && (obdstripe == 1)) {
- llapi_printf(LLAPI_MSG_NORMAL,
+ llapi_printf(LLAPI_MSG_NORMAL,
"lmm_magic: 0x%08X\n", lum->lmm_magic);
- llapi_printf(LLAPI_MSG_NORMAL,
+ llapi_printf(LLAPI_MSG_NORMAL,
"lmm_object_gr: "LPX64"\n", lum->lmm_object_gr);
- llapi_printf(LLAPI_MSG_NORMAL,
+ llapi_printf(LLAPI_MSG_NORMAL,
"lmm_object_id: "LPX64"\n", lum->lmm_object_id);
- llapi_printf(LLAPI_MSG_NORMAL,
+ llapi_printf(LLAPI_MSG_NORMAL,
"lmm_stripe_count: %u\n", (int)lum->lmm_stripe_count);
- llapi_printf(LLAPI_MSG_NORMAL,
+ llapi_printf(LLAPI_MSG_NORMAL,
"lmm_stripe_size: %u\n", lum->lmm_stripe_size);
- llapi_printf(LLAPI_MSG_NORMAL,
+ llapi_printf(LLAPI_MSG_NORMAL,
"lmm_stripe_pattern: %x\n", lum->lmm_pattern);
+ if (pool_name != NULL)
+ llapi_printf(LLAPI_MSG_NORMAL,
+ "lmm_pool_name: %s\n", pool_name);
}
if (body) {
if ((!quiet) && (obdstripe == 1))
- llapi_printf(LLAPI_MSG_NORMAL,
+ llapi_printf(LLAPI_MSG_NORMAL,
"\tobdidx\t\t objid\t\tobjid\t\t group\n");
for (i = 0; i < lum->lmm_stripe_count; i++) {
- int idx = lum->lmm_objects[i].l_ost_idx;
- long long oid = lum->lmm_objects[i].l_object_id;
- long long gr = lum->lmm_objects[i].l_object_gr;
+ int idx = objects[i].l_ost_idx;
+ long long oid = objects[i].l_object_id;
+ long long gr = objects[i].l_object_gr;
if ((obdindex == OBD_NOT_FOUND) || (obdindex == idx))
- llapi_printf(LLAPI_MSG_NORMAL,
+ llapi_printf(LLAPI_MSG_NORMAL,
"\t%6u\t%14llu\t%#13llx\t%14llu%s\n",
idx, oid, oid, gr,
obdindex == idx ? " *" : "");
}
if (header && obdstripe == 1) {
- llapi_printf(LLAPI_MSG_NORMAL, "lmm_magic: 0x%08X\n",
+ llapi_printf(LLAPI_MSG_NORMAL, "lmm_magic: 0x%08X\n",
lumj->lmm_magic);
- llapi_printf(LLAPI_MSG_NORMAL, "lmm_object_gr: "LPX64"\n",
+ llapi_printf(LLAPI_MSG_NORMAL, "lmm_object_gr: "LPX64"\n",
lumj->lmm_object_gr);
- llapi_printf(LLAPI_MSG_NORMAL, "lmm_object_id: "LPX64"\n",
+ llapi_printf(LLAPI_MSG_NORMAL, "lmm_object_id: "LPX64"\n",
lumj->lmm_object_id);
- llapi_printf(LLAPI_MSG_NORMAL, "lmm_stripe_count: %u\n",
+ llapi_printf(LLAPI_MSG_NORMAL, "lmm_stripe_count: %u\n",
(int)lumj->lmm_stripe_count);
llapi_printf(LLAPI_MSG_NORMAL, "lmm_stripe_size: %u\n",
lumj->lmm_stripe_size);
if (body) {
unsigned long long start = -1, end = 0;
if (!quiet && obdstripe == 1)
- llapi_printf(LLAPI_MSG_NORMAL,
+ llapi_printf(LLAPI_MSG_NORMAL,
"joined\tobdidx\t\t objid\t\tobjid\t\t group"
"\t\tstart\t\tend\n");
for (i = 0; i < lumj->lmm_stripe_count; i++) {
long long oid = lumj->lmm_objects[i].l_object_id;
long long gr = lumj->lmm_objects[i].l_object_gr;
if (obdindex == OBD_NOT_FOUND || obdindex == idx)
- llapi_printf(LLAPI_MSG_NORMAL,
+ llapi_printf(LLAPI_MSG_NORMAL,
"\t%6u\t%14llu\t%#13llx\t%14llu%s",
idx, oid, oid, gr,
obdindex == idx ? " *" : "");
llapi_printf(LLAPI_MSG_NORMAL, "\t%14llu", start);
end = lumj->lmm_objects[i].l_extent_end;
if (end == (unsigned long long)-1)
- llapi_printf(LLAPI_MSG_NORMAL,
+ llapi_printf(LLAPI_MSG_NORMAL,
"\t\tEOF\n");
else
- llapi_printf(LLAPI_MSG_NORMAL,
+ llapi_printf(LLAPI_MSG_NORMAL,
"\t\t%llu\n", end);
} else {
llapi_printf(LLAPI_MSG_NORMAL, "\t\t\t\t\n");
{
switch(*(__u32 *)¶m->lmd->lmd_lmm) { /* lum->lmm_magic */
case LOV_USER_MAGIC_V1:
- lov_dump_user_lmm_v1(¶m->lmd->lmd_lmm, path, is_dir,
- param->obdindex, param->quiet,
- param->verbose,
- (param->verbose || !param->obduuid));
+ lov_dump_user_lmm_v1v3(¶m->lmd->lmd_lmm, NULL,
+ param->lmd->lmd_lmm.lmm_objects,
+ path, is_dir,
+ param->obdindex, param->quiet,
+ param->verbose,
+ (param->verbose || !param->obduuid));
break;
case LOV_USER_MAGIC_JOIN:
lov_dump_user_lmm_join(¶m->lmd->lmd_lmm, path, is_dir,
param->verbose,
(param->verbose || !param->obduuid));
break;
+ case LOV_USER_MAGIC_V3: {
+ char pool_name[MAXPOOLNAME + 1];
+ struct lov_user_ost_data_v1 *objects;
+
+ strncpy(pool_name,
+ ((struct lov_user_md_v3 *)(¶m->lmd->lmd_lmm))->lmm_pool_name,
+ MAXPOOLNAME);
+ pool_name[MAXPOOLNAME] = '\0';
+ objects = ((struct lov_user_md_v3 *)(¶m->lmd->lmd_lmm))->lmm_objects;
+ lov_dump_user_lmm_v1v3(¶m->lmd->lmd_lmm, pool_name,
+ objects, path, is_dir,
+ param->obdindex, param->quiet,
+ param->verbose,
+ (param->verbose || !param->obduuid));
+ break;
+ }
default:
- llapi_printf(LLAPI_MSG_NORMAL,
- "unknown lmm_magic: %#x (expecting %#x)\n",
- *(__u32 *)¶m->lmd->lmd_lmm, LOV_USER_MAGIC_V1);
+ llapi_printf(LLAPI_MSG_NORMAL, "unknown lmm_magic: %#x "
+ "(expecting one of %#x %#x %#x)\n",
+ *(__u32 *)¶m->lmd->lmd_lmm,
+ LOV_USER_MAGIC_V1, LOV_USER_MAGIC_JOIN,
+ LOV_USER_MAGIC_V3);
return;
}
}
fname = (fname == NULL ? path : fname + 1);
/* retrieve needed file info */
- strncpy((char *)lmd, fname, lov_mds_md_size(MAX_LOV_UUID_COUNT));
+ strncpy((char *)lmd, fname,
+ lov_mds_md_size(MAX_LOV_UUID_COUNT, LOV_MAGIC));
ret = ioctl(dirfd(parent), IOC_MDC_GETFILEINFO, (void *)lmd);
if (ret) {
* Do the regular lstat(2) instead. */
ret = lstat_f(path, st);
if (ret) {
- llapi_err(LLAPI_MSG_ERROR,
+ llapi_err(LLAPI_MSG_ERROR,
"error: %s: lstat failed for %s",
__FUNCTION__, path);
return ret;
}
} else if (errno == ENOENT) {
- llapi_err(LLAPI_MSG_WARN,
- "warning: %s: %s does not exist",
+ llapi_err(LLAPI_MSG_WARN,
+ "warning: %s: %s does not exist",
__FUNCTION__, path);
return -ENOENT;
} else {
- llapi_err(LLAPI_MSG_ERROR,
+ llapi_err(LLAPI_MSG_ERROR,
"error: %s: IOC_MDC_GETFILEINFO failed for %s",
__FUNCTION__, path);
return ret;
switch (dent->d_type) {
case DT_UNKNOWN:
- llapi_err(LLAPI_MSG_ERROR,
+ llapi_err(LLAPI_MSG_ERROR,
"error: %s: '%s' is UNKNOWN type %d",
__FUNCTION__, dent->d_name, dent->d_type);
break;
lustre_fs = 0;
ret = lstat_f(path, st);
if (ret) {
- llapi_err(LLAPI_MSG_ERROR,
+ llapi_err(LLAPI_MSG_ERROR,
"error: %s: lstat failed for %s",
__FUNCTION__, path);
return ret;
}
} else if (errno == ENOENT) {
- llapi_err(LLAPI_MSG_WARN,
+ llapi_err(LLAPI_MSG_WARN,
"warning: %s: %s does not exist",
__FUNCTION__, path);
goto decided;
goto decided;
} else {
int i, j;
+ struct lov_user_ost_data_v1 *lmm_objects;
+
+ if (param->lmd->lmd_lmm.lmm_magic ==
+ LOV_USER_MAGIC_V3) {
+ lmm_objects =
+ ((struct lov_user_md_v3 *)(&(param->lmd->lmd_lmm)))->lmm_objects;
+ } else {
+ lmm_objects = param->lmd->lmd_lmm.lmm_objects;
+ }
+
for (i = 0;
i < param->lmd->lmd_lmm.lmm_stripe_count; i++) {
for (j = 0; j < param->num_obds; j++) {
if (param->obdindexes[j] ==
- param->lmd->lmd_lmm.lmm_objects[i].l_ost_idx)
+ lmm_objects[i].l_ost_idx)
goto obd_matches;
}
}
}
}
+ if (param->check_pool) {
+ /* empty requested pool is taken as no pool search => V1 */
+ if (((param->lmd->lmd_lmm.lmm_magic == LOV_USER_MAGIC_V1) &&
+ (param->poolname[0] == '\0')) ||
+ ((param->lmd->lmd_lmm.lmm_magic == LOV_USER_MAGIC_V3) &&
+ (strncmp(((struct lov_user_md_v3 *)(&(param->lmd->lmd_lmm)))->lmm_pool_name,
+ param->poolname, MAXPOOLNAME) == 0)) ||
+ ((param->lmd->lmd_lmm.lmm_magic == LOV_USER_MAGIC_V3) &&
+ (strcmp(param->poolname, "*") == 0))) {
+ if (param->exclude_pool)
+ goto decided;
+ } else {
+ if (!param->exclude_pool)
+ goto decided;
+ }
+ }
+
/* Check the time on mds. */
if (!decision) {
int for_mds;
if (param->obdindex != OBD_NOT_FOUND) {
/* Check whether the obd is active or not, if it is
* not active, just print the object affected by this
- * failed ost
+ * failed ost
* */
struct obd_statfs stat_buf;
struct obd_uuid uuid_buf;
memset(&stat_buf, 0, sizeof(struct obd_statfs));
memset(&uuid_buf, 0, sizeof(struct obd_uuid));
ret = llapi_obd_statfs(path, LL_STATFS_LOV,
- param->obdindex, &stat_buf,
+ param->obdindex, &stat_buf,
&uuid_buf);
if (ret) {
- if (ret == -ENODATA || ret == -ENODEV
+ if (ret == -ENODATA || ret == -ENODEV
|| ret == -EIO)
errno = EIO;
- llapi_printf(LLAPI_MSG_NORMAL,
+ llapi_printf(LLAPI_MSG_NORMAL,
"obd_uuid: %s failed %s ",
- param->obduuid->uuid,
+ param->obduuid->uuid,
strerror(errno));
goto print_path;
}
if (ret) {
if (errno == ENOENT) {
- llapi_err(LLAPI_MSG_ERROR,
+ llapi_err(LLAPI_MSG_ERROR,
"warning: %s: %s does not exist",
__FUNCTION__, path);
goto decided;
} else {
- llapi_err(LLAPI_MSG_ERROR,
+ llapi_err(LLAPI_MSG_ERROR,
"%s: IOC_LOV_GETINFO on %s failed",
__FUNCTION__, path);
return ret;
if (ret) {
if (errno == ENODATA) {
if (!param->obduuid && !param->quiet)
- llapi_printf(LLAPI_MSG_NORMAL,
+ llapi_printf(LLAPI_MSG_NORMAL,
"%s has no stripe info\n", path);
goto out;
} else if (errno == ENOTTY) {
- llapi_err(LLAPI_MSG_ERROR,
+ llapi_err(LLAPI_MSG_ERROR,
"%s: '%s' not on a Lustre fs?",
__FUNCTION__, path);
} else if (errno == ENOENT) {
- llapi_err(LLAPI_MSG_WARN,
+ llapi_err(LLAPI_MSG_WARN,
"warning: %s: %s does not exist",
__FUNCTION__, path);
goto out;
} else {
- llapi_err(LLAPI_MSG_ERROR,
+ llapi_err(LLAPI_MSG_ERROR,
"error: %s: %s failed for %s",
__FUNCTION__, d ? "LL_IOC_LOV_GETSTRIPE" :
"IOC_MDC_GETFILESTRIPE", path);
int ret = 0, len = strlen(path);
if (len > PATH_MAX) {
- llapi_err(LLAPI_MSG_ERROR,
+ llapi_err(LLAPI_MSG_ERROR,
"%s: Path name '%s' is too long",
__FUNCTION__, path);
return -EINVAL;
data.ioc_plen2 = sizeof(struct obd_uuid);
if ((rc = obd_ioctl_pack(&data, &rawbuf, sizeof(raw))) != 0) {
- llapi_err(LLAPI_MSG_ERROR,
+ llapi_err(LLAPI_MSG_ERROR,
"llapi_obd_statfs: error packing ioctl data");
return rc;
}
if (fd < 0) {
rc = errno ? -errno : -EBADF;
- llapi_err(LLAPI_MSG_ERROR, "error: %s: opening '%s'",
+ llapi_err(LLAPI_MSG_ERROR, "error: %s: opening '%s'",
__FUNCTION__, path);
return rc;
}
if (rc) {
if (errno == ENODATA) {
if (!param->obduuid && !param->quiet)
- llapi_err(LLAPI_MSG_ERROR,
+ llapi_err(LLAPI_MSG_ERROR,
"%s has no stripe info", path);
rc = 0;
} else if (errno == ENOENT) {
- llapi_err(LLAPI_MSG_ERROR,
+ llapi_err(LLAPI_MSG_ERROR,
"warning: %s: %s does not exist",
__FUNCTION__, path);
rc = 0;
createtime, canceltime);
break;
}
+ case(LCFG_POOL_NEW):{
+ printf("pool new ");
+ print_1_cfg(lcfg);
+ break;
+ }
+ case(LCFG_POOL_ADD):{
+ printf("pool add ");
+ print_1_cfg(lcfg);
+ break;
+ }
+ case(LCFG_POOL_REM):{
+ printf("pool remove ");
+ print_1_cfg(lcfg);
+ break;
+ }
+ case(LCFG_POOL_DEL):{
+ printf("pool destroy ");
+ print_1_cfg(lcfg);
+ break;
+ }
default:
printf("unsupported cmd_code = %x\n",cmd);
}
#include <stdarg.h>
#include <signal.h>
#include <ctype.h>
+#include <glob.h>
#include "obdctl.h"
#include <lnet/lnetctl.h>
#include <libcfs/libcfsutil.h>
#include <stdio.h>
+#include <lustre/liblustreapi.h>
#define MAX_STRING_SIZE 128
#define DEVICES_LIST "/proc/fs/lustre/devices"
static int do_device(char *func, char *devname);
-int lcfg_mgs_ioctl(char *func, int dev_id, struct lustre_cfg *lcfg)
+static int get_mgs_device()
{
- struct obd_ioctl_data data;
- static int mgs_device = -1;
char mgs[] = "$MGS";
- int rc;
+ static int mgs_device = -1;
- /* Always operates on MGS dev */
if (mgs_device == -1) {
+ int rc;
do_disconnect(NULL, 1);
rc = do_device("mgsioc", mgs);
if (rc) {
+ fprintf(stderr,
+ "This command must be run on the MGS.\n");
errno = ENODEV;
return -1;
}
mgs_device = cur_device;
}
+ return mgs_device;
+}
+
+/* Returns -1 on error with errno set */
+int lcfg_mgs_ioctl(char *func, int dev_id, struct lustre_cfg *lcfg)
+{
+ struct obd_ioctl_data data;
+ int rc;
IOC_INIT(data);
- data.ioc_dev = mgs_device;
+ rc = data.ioc_dev = get_mgs_device();
+ if (rc < 0)
+ goto out;
data.ioc_type = LUSTRE_CFG_TYPE;
data.ioc_plen1 = lustre_cfg_len(lcfg->lcfg_bufcount,
lcfg->lcfg_buflens);
data.ioc_pbuf1 = (void *)lcfg;
IOC_PACK(func, data);
- rc = l_ioctl(dev_id, OBD_IOC_PARAM, buf);
-
- if (rc == ENODEV)
- fprintf(stderr, "Is the MGS running on this node?\n");
- if (rc == ENOSYS)
- fprintf(stderr, "Make sure cfg_device is set first.\n");
- if (rc == EINVAL)
- fprintf(stderr, "cfg_device should be of the form "
- "'lustre-MDT0000'\n");
-
+ rc = l_ioctl(dev_id, OBD_IOC_PARAM, buf);
+out:
+ if (rc) {
+ if (errno == ENOSYS)
+ fprintf(stderr, "Make sure cfg_device is set first.\n");
+ if (errno == EINVAL)
+ fprintf(stderr, "cfg_device should be of the form "
+ "'lustre-MDT0000'\n");
+ }
return rc;
}
shmem_stop();
do_disconnect(argv[0], 1);
}
+
+static int find_target_obdpath(char *fsname, char *path)
+{
+ glob_t glob_info;
+ char pattern[PATH_MAX + 1];
+ int rc;
+
+ snprintf(pattern, PATH_MAX,
+ "/proc/fs/lustre/lov/%s-*/target_obd",
+ fsname);
+ rc = glob(pattern, GLOB_BRACE, NULL, &glob_info);
+ if (rc)
+ return -EINVAL;
+
+ if (glob_info.gl_pathc == 0) {
+ globfree(&glob_info);
+ return -EINVAL;
+ }
+
+ strcpy(path, glob_info.gl_pathv[0]);
+ return 0;
+}
+
+static int find_poolpath(char *fsname, char *poolname, char *poolpath)
+{
+ glob_t glob_info;
+ char pattern[PATH_MAX + 1];
+ int rc;
+
+ snprintf(pattern, PATH_MAX,
+ "/proc/fs/lustre/lov/%s-*/pools/%s",
+ fsname, poolname);
+ rc = glob(pattern, GLOB_BRACE, NULL, &glob_info);
+ if (rc)
+ return -EINVAL;
+
+ if (glob_info.gl_pathc == 0) {
+ globfree(&glob_info);
+ return -EINVAL;
+ }
+
+ strcpy(poolpath, glob_info.gl_pathv[0]);
+ return 0;
+}
+
+/*
+ * if pool is NULL, search ostname in target_obd
+ * if pool is no NULL
+ * if pool not found returns < 0
+ * if ostname is NULL, returns 1 if pool is not empty and 0 if pool empty
+ * if ostname is not NULL, returns 1 if OST is in pool and 0 if not
+ */
+static int search_ost(char *fsname, char *poolname, char *ostname)
+{
+ FILE *fd;
+ char buffer[PATH_MAX + 1];
+ int len = 0, rc;
+
+ if (ostname != NULL)
+ len = strlen(ostname);
+
+ if (poolname == NULL)
+ rc = find_target_obdpath(fsname, buffer);
+ else
+ rc = find_poolpath(fsname, poolname, buffer);
+ if (rc)
+ return rc;
+
+ if ((fd = fopen(buffer, "r")) == NULL)
+ return -EINVAL;
+
+ while (fgets(buffer, sizeof(buffer), fd) != NULL) {
+ if (poolname == NULL) {
+ /* we search ostname in target_obd */
+ if (strncmp(buffer + 3, ostname, len) == 0) {
+ fclose(fd);
+ return 1;
+ }
+ } else {
+ /* we search a non empty pool or
+ an ostname in a pool */
+ if ((ostname == NULL) ||
+ (strncmp(buffer, ostname, len) == 0)) {
+ fclose(fd);
+ return 1;
+ }
+ }
+ }
+ fclose(fd);
+ return 0;
+}
+
+static int check_pool_cmd(enum lcfg_command_type cmd,
+ char *fsname, char *poolname,
+ char *ostname)
+{
+ int rc = 0;
+
+ switch (cmd) {
+ case LCFG_POOL_NEW: {
+ if (search_ost(fsname, poolname, NULL) >= 0) {
+ fprintf(stderr, "Pool %s.%s already exists\n",
+ fsname, poolname);
+ return -EEXIST;
+ }
+ return 0;
+ }
+ case LCFG_POOL_DEL: {
+ rc = search_ost(fsname, poolname, NULL);
+ if (rc < 0) {
+ fprintf(stderr, "Pool %s.%s not found\n",
+ fsname, poolname);
+ return -ENOENT;
+ }
+ if (rc == 1) {
+ fprintf(stderr, "Pool %s.%s not empty, "
+ "please remove all members\n",
+ fsname, poolname);
+ return -ENOTEMPTY;
+ }
+ return 0;
+ }
+ case LCFG_POOL_ADD: {
+ rc = search_ost(fsname, NULL, ostname);
+ if (rc == 0) {
+ fprintf(stderr, "OST %s not found in lov of %s\n",
+ ostname, fsname);
+ return -ENOENT;
+ }
+ rc = search_ost(fsname, poolname, ostname);
+ if (rc < 0) {
+ fprintf(stderr, "Pool %s.%s not found\n",
+ fsname, poolname);
+ return -ENOENT;
+ }
+ if (rc == 1) {
+ fprintf(stderr, "OST %s already in pool %s.%s\n",
+ ostname, fsname, poolname);
+ return -EEXIST;
+ }
+ return 0;
+ }
+ case LCFG_POOL_REM: {
+ rc = search_ost(fsname, poolname, ostname);
+ if (rc < 0) {
+ fprintf(stderr, "Pool %s.%s not found\n",
+ fsname, poolname);
+ return -ENOENT;
+ }
+ if (rc == 0) {
+ fprintf(stderr, "OST %s not found in pool %s.%s\n",
+ ostname, fsname, poolname);
+ return -ENOENT;
+ }
+ return 0;
+ }
+ default: {
+ }
+ }
+ return 0;
+}
+
+static void check_pool_cmd_result(enum lcfg_command_type cmd,
+ char *fsname, char *poolname,
+ char *ostname)
+{
+ int cpt, rc = 0;
+
+ cpt = 10;
+ switch (cmd) {
+ case LCFG_POOL_NEW: {
+ do {
+ rc = search_ost(fsname, poolname, NULL);
+ if (rc < 0)
+ sleep(2);
+ cpt--;
+ } while ((rc < 0) && (cpt > 0));
+ if (rc >= 0)
+ fprintf(stderr, "Pool %s.%s created\n",
+ fsname, poolname);
+ else
+ fprintf(stderr, "Warning, pool %s.%s not found\n",
+ fsname, poolname);
+ return;
+ }
+ case LCFG_POOL_DEL: {
+ do {
+ rc = search_ost(fsname, poolname, NULL);
+ if (rc >= 0)
+ sleep(2);
+ cpt--;
+ } while ((rc >= 0) && (cpt > 0));
+ if (rc < 0)
+ fprintf(stderr, "Pool %s.%s destroyed\n",
+ fsname, poolname);
+ else
+ fprintf(stderr, "Warning, pool %s.%s still found\n",
+ fsname, poolname);
+ return;
+ }
+ case LCFG_POOL_ADD: {
+ do {
+ rc = search_ost(fsname, poolname, ostname);
+ if (rc != 1)
+ sleep(2);
+ cpt--;
+ } while ((rc != 1) && (cpt > 0));
+ if (rc == 1)
+ fprintf(stderr, "OST %s added to pool %s.%s\n",
+ ostname, fsname, poolname);
+ else
+ fprintf(stderr, "Warning, OST %s not found in pool %s.%s\n",
+ ostname, fsname, poolname);
+ return;
+ }
+ case LCFG_POOL_REM: {
+ do {
+ rc = search_ost(fsname, poolname, ostname);
+ if (rc == 1)
+ sleep(2);
+ cpt--;
+ } while ((rc == 1) && (cpt > 0));
+ if (rc != 1)
+ fprintf(stderr, "OST %s removed from pool %s.%s\n",
+ ostname, fsname, poolname);
+ else
+ fprintf(stderr, "Warning, OST %s still found in pool %s.%s\n",
+ ostname, fsname, poolname);
+ return;
+ }
+ default: {
+ }
+ }
+}
+
+static int check_and_complete_ostname(char *fsname, char *ostname)
+{
+ char *ptr;
+ char real_ostname[MAX_OBD_NAME + 1];
+ char i;
+
+ /* if OST name does not start with fsname, we add it */
+ /* if not check if the fsname is the right one */
+ ptr = strchr(ostname, '-');
+ if (ptr == NULL) {
+ sprintf(real_ostname, "%s-%s", fsname, ostname);
+ } else if (strncmp(ostname, fsname, strlen(fsname)) != 0) {
+ fprintf(stderr, "%s does not start with fsname %s\n",
+ ostname, fsname);
+ return -EINVAL;
+ } else {
+ strcpy(real_ostname, ostname);
+ }
+ /* real_ostname is fsname-????? */
+ ptr = real_ostname + strlen(fsname) + 1;
+ if (strncmp(ptr, "OST", 3) != 0) {
+ fprintf(stderr, "%s does not start by %s-OST nor OST\n",
+ ostname, fsname);
+ return -EINVAL;
+ }
+ /* real_ostname is fsname-OST????? */
+ ptr += 3;
+ for (i = 0; i < 4; i++) {
+ if (!isxdigit(*ptr)) {
+ fprintf(stderr,
+ "ost's index in %s is not an hexa number\n",
+ ostname);
+ return -EINVAL;
+ }
+ ptr++;
+ }
+ /* real_ostname is fsname-OSTXXXX????? */
+ /* if OST name does not end with _UUID, we add it */
+ if (*ptr == '\0') {
+ strcat(real_ostname, "_UUID");
+ } else if (strcmp(ptr, "_UUID") != 0) {
+ fprintf(stderr,
+ "ostname %s does not end with _UUID\n", ostname);
+ return -EINVAL;
+ }
+ /* real_ostname is fsname-OSTXXXX_UUID */
+ strcpy(ostname, real_ostname);
+ return 0;
+}
+
+/* returns 0 or -errno */
+static int pool_cmd(enum lcfg_command_type cmd,
+ char *cmdname, char *fullpoolname,
+ char *fsname, char *poolname, char *ostname)
+{
+ int rc = 0;
+ struct obd_ioctl_data data;
+ struct lustre_cfg_bufs bufs;
+ struct lustre_cfg *lcfg;
+
+ rc = check_pool_cmd(cmd, fsname, poolname, ostname);
+ if (rc)
+ return rc;
+
+ lustre_cfg_bufs_reset(&bufs, NULL);
+ lustre_cfg_bufs_set_string(&bufs, 0, cmdname);
+ lustre_cfg_bufs_set_string(&bufs, 1, fullpoolname);
+ if (ostname != NULL)
+ lustre_cfg_bufs_set_string(&bufs, 2, ostname);
+
+ lcfg = lustre_cfg_new(cmd, &bufs);
+ if (IS_ERR(lcfg)) {
+ rc = PTR_ERR(lcfg);
+ return rc;
+ }
+
+ IOC_INIT(data);
+ rc = data.ioc_dev = get_mgs_device();
+ if (rc < 0)
+ goto out;
+
+ data.ioc_type = LUSTRE_CFG_TYPE;
+ data.ioc_plen1 = lustre_cfg_len(lcfg->lcfg_bufcount,
+ lcfg->lcfg_buflens);
+ data.ioc_pbuf1 = (void *)lcfg;
+ IOC_PACK(cmdname, data);
+
+ rc = l_ioctl(OBD_DEV_ID, OBD_IOC_POOL, buf);
+out:
+ if (rc)
+ rc = -errno;
+ lustre_cfg_free(lcfg);
+ return rc;
+}
+
+/*
+ * this function tranforms a rule [start-end/step] into an array
+ * of matching numbers
+ * supported forms are:
+ * [start] : just this number
+ * [start-end] : all numbers from start to end
+ * [start-end/step] : numbers from start to end with increment of step
+ * on return, format contains a printf format string which can be used
+ * to generate all the strings
+ */
+static int get_array_idx(char *rule, char *format, int **array)
+{
+ char *start, *end, *ptr;
+ unsigned int lo, hi, step;
+ int array_sz = 0;
+ int i, array_idx;
+ int rc;
+
+ start = strchr(rule, '[');
+ end = strchr(rule, ']');
+ if ((start == NULL) || (end == NULL)) {
+ *array = malloc(sizeof(int));
+ if (*array == NULL)
+ return 0;
+ strcpy(format, rule);
+ array_sz = 1;
+ return array_sz;
+ }
+ *start = '\0';
+ *end = '\0';
+ end++;
+ start++;
+ /* put in format the printf format (the rule without the range) */
+ sprintf(format, "%s%%.4d%s", rule, end);
+
+ array_idx = 0;
+ array_sz = 0;
+ *array = NULL;
+ /* loop on , separator */
+ do {
+ /* extract the 3 fields */
+ rc = sscanf(start, "%u-%u/%u", &lo, &hi, &step);
+ switch (rc) {
+ case 0: {
+ return 0;
+ }
+ case 1: {
+ array_sz++;
+ *array = realloc(*array, array_sz * sizeof(int));
+ if (*array == NULL)
+ return 0;
+ (*array)[array_idx] = lo;
+ array_idx++;
+ break;
+ }
+ case 2: {
+ step = 1;
+ /* do not break to share code with case 3: */
+ }
+ case 3: {
+ if ((hi < lo) || (step == 0))
+ return 0;
+ array_sz += (hi - lo) / step + 1;
+ *array = realloc(*array, sizeof(int) * array_sz);
+ if (*array == NULL)
+ return 0;
+ for (i = lo; i <= hi; i+=step, array_idx++)
+ (*array)[array_idx] = i;
+ break;
+ }
+ }
+ ptr = strchr(start, ',');
+ if (ptr != NULL)
+ start = ptr + 1;
+
+ } while (ptr != NULL);
+ return array_sz;
+}
+
+static int extract_fsname_poolname(char *arg, char *fsname, char *poolname)
+{
+ char *ptr;
+ int len;
+ int rc;
+
+ strcpy(fsname, arg);
+ ptr = strchr(fsname, '.');
+ if (ptr == NULL) {
+ fprintf(stderr, ". is missing in %s\n", fsname);
+ rc = -EINVAL;
+ goto err;
+ }
+
+ len = ptr - fsname;
+ if (len == 0) {
+ fprintf(stderr, "fsname is empty\n");
+ rc = -EINVAL;
+ goto err;
+ }
+
+ len = strlen(ptr + 1);
+ if (len == 0) {
+ fprintf(stderr, "poolname is empty\n");
+ rc = -EINVAL;
+ goto err;
+ }
+ if (len > MAXPOOLNAME) {
+ fprintf(stderr,
+ "poolname %s is too long (length is %d max is %d)\n",
+ ptr + 1, len, MAXPOOLNAME);
+ rc = -ENAMETOOLONG;
+ goto err;
+ }
+ strncpy(poolname, ptr + 1, MAXPOOLNAME);
+ poolname[MAXPOOLNAME] = '\0';
+ *ptr = '\0';
+ return 0;
+
+err:
+ fprintf(stderr, "argument %s must be <fsname>.<poolname>\n", arg);
+ return rc;
+}
+
+int jt_pool_cmd(int argc, char **argv)
+{
+ enum lcfg_command_type cmd;
+ char fsname[PATH_MAX + 1];
+ char poolname[MAXPOOLNAME + 1];
+ char *ostnames_buf = NULL;
+ int i, rc;
+ int *array = NULL, array_sz;
+ struct {
+ int rc;
+ char *ostname;
+ } *cmds = NULL;
+
+ switch (argc) {
+ case 0:
+ case 1: return CMD_HELP;
+ case 2: {
+ if (strcmp("pool_new", argv[0]) == 0)
+ cmd = LCFG_POOL_NEW;
+ else if (strcmp("pool_destroy", argv[0]) == 0)
+ cmd = LCFG_POOL_DEL;
+ else if (strcmp("pool_list", argv[0]) == 0)
+ return llapi_poollist(argv[1]);
+ else return CMD_HELP;
+
+ rc = extract_fsname_poolname(argv[1], fsname, poolname);
+ if (rc)
+ break;
+
+ rc = pool_cmd(cmd, argv[0], argv[1],
+ fsname, poolname, NULL);
+ if (rc)
+ break;
+
+ check_pool_cmd_result(cmd, fsname, poolname, NULL);
+ break;
+ }
+ default: {
+ char format[2*MAX_OBD_NAME];
+
+ if (strcmp("pool_remove", argv[0]) == 0) {
+ cmd = LCFG_POOL_REM;
+ } else if (strcmp("pool_add", argv[0]) == 0) {
+ cmd = LCFG_POOL_ADD;
+ } else {
+ return CMD_HELP;
+ }
+
+ rc = extract_fsname_poolname(argv[1], fsname, poolname);
+ if (rc)
+ break;
+
+ for (i = 2; i < argc; i++) {
+ int j;
+
+ array_sz = get_array_idx(argv[i], format, &array);
+ if (array_sz == 0)
+ return CMD_HELP;
+
+ cmds = malloc(array_sz * sizeof(cmds[0]));
+ if (cmds != NULL) {
+ ostnames_buf = malloc(array_sz *
+ (MAX_OBD_NAME + 1));
+ } else {
+ free(array);
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ for (j = 0; j < array_sz; j++) {
+ char ostname[MAX_OBD_NAME + 1];
+
+ snprintf(ostname, MAX_OBD_NAME, format,
+ array[j]);
+ ostname[MAX_OBD_NAME] = '\0';
+
+ rc = check_and_complete_ostname(fsname,ostname);
+ if (rc) {
+ free(array);
+ free(cmds);
+ if (ostnames_buf)
+ free(ostnames_buf);
+ goto out;
+ }
+ if (ostnames_buf != NULL) {
+ cmds[j].ostname =
+ &ostnames_buf[(MAX_OBD_NAME + 1) * j];
+ strcpy(cmds[j].ostname, ostname);
+ } else {
+ cmds[j].ostname = NULL;
+ }
+ cmds[j].rc = pool_cmd(cmd, argv[0], argv[1],
+ fsname, poolname,
+ ostname);
+ }
+ for (j = 0; j < array_sz; j++) {
+ if (!cmds[j].rc) {
+ char ostname[MAX_OBD_NAME + 1];
+
+ if (!cmds[j].ostname) {
+ snprintf(ostname, MAX_OBD_NAME,
+ format, array[j]);
+ ostname[MAX_OBD_NAME] = '\0';
+ check_and_complete_ostname(
+ fsname, ostname);
+ } else {
+ strcpy(ostname,
+ cmds[j].ostname);
+ }
+ check_pool_cmd_result(cmd, fsname,
+ poolname,ostname);
+ }
+ }
+ if (array_sz > 0)
+ free(array);
+ if (cmds)
+ free(cmds);
+ if (ostnames_buf);
+ free(ostnames_buf);
+ }
+ return 0;
+ }
+ }
+
+
+out:
+ if ((rc == -EINVAL) || (rc == -ENOENT))
+ fprintf(stderr, "Does the fs, pool or ost exist?\n");
+ if (rc != 0) {
+ errno = -rc;
+ perror(argv[0]);
+ }
+
+ return rc;
+}
int jt_blockdev_detach(int argc, char **argv);
int jt_blockdev_info(int argc, char **argv);
+int jt_pool_cmd(int argc, char **argv);
+
#endif
#define lustre_swab_ldlm_request NULL
#define lustre_swab_ldlm_reply NULL
#define lustre_swab_ldlm_intent NULL
-#define lustre_swab_lov_mds_md NULL
+/* #define lustre_swab_lov_mds_md NULL */
#define lustre_swab_mdt_rec_reint NULL
#define lustre_swab_lustre_capa NULL
#define lustre_swab_lustre_capa_key NULL
}
static void
+check_lov_mds_md_v3(void)
+{
+ BLANK_LINE();
+ CHECK_STRUCT(lov_mds_md_v3);
+ CHECK_MEMBER(lov_mds_md_v3, lmm_magic);
+ CHECK_MEMBER(lov_mds_md_v3, lmm_pattern);
+ CHECK_MEMBER(lov_mds_md_v3, lmm_object_id);
+ CHECK_MEMBER(lov_mds_md_v3, lmm_object_gr);
+ CHECK_MEMBER(lov_mds_md_v3, lmm_stripe_size);
+ CHECK_MEMBER(lov_mds_md_v3, lmm_stripe_count);
+ CHECK_MEMBER(lov_mds_md_v3, lmm_pool_name);
+ CHECK_MEMBER(lov_mds_md_v3, lmm_objects);
+
+ BLANK_LINE();
+ CHECK_STRUCT(lov_ost_data_v1);
+ CHECK_MEMBER(lov_ost_data_v1, l_object_id);
+ CHECK_MEMBER(lov_ost_data_v1, l_object_gr);
+ CHECK_MEMBER(lov_ost_data_v1, l_ost_gen);
+ CHECK_MEMBER(lov_ost_data_v1, l_ost_idx);
+
+ CHECK_CDEFINE(LOV_MAGIC_V3);
+
+ CHECK_VALUE(LOV_PATTERN_RAID0);
+ CHECK_VALUE(LOV_PATTERN_RAID1);
+}
+
+static void
check_obd_statfs(void)
{
BLANK_LINE();
check_obd_connect_data();
check_obdo();
check_lov_mds_md_v1();
+ check_lov_mds_md_v3();
check_lov_mds_md_join();
check_obd_statfs();
check_obd_ioobj();