Whamcloud - gitweb
b=14836
authornathan <nathan>
Mon, 15 Sep 2008 23:25:34 +0000 (23:25 +0000)
committernathan <nathan>
Mon, 15 Sep 2008 23:25:34 +0000 (23:25 +0000)
i=nathan
i=adilger
OST pools on HEAD, comprehensive patch including 17054:19007;
16935:18918,19012,19089,19128; 16978:18872

53 files changed:
lustre/ChangeLog
lustre/doc/lfs.1
lustre/include/lprocfs_status.h
lustre/include/lustre/liblustreapi.h
lustre/include/lustre/lustre_idl.h
lustre/include/lustre/lustre_user.h
lustre/include/lustre_cfg.h
lustre/include/lustre_lib.h
lustre/include/obd.h
lustre/include/obd_class.h
lustre/include/obd_lov.h
lustre/ldlm/ldlm_lib.c
lustre/liblustre/super.c
lustre/llite/dir.c
lustre/llite/file.c
lustre/llite/llite_lib.c
lustre/lov/Makefile.in
lustre/lov/autoMakefile.am
lustre/lov/lov_ea.c
lustre/lov/lov_internal.h
lustre/lov/lov_obd.c
lustre/lov/lov_pack.c
lustre/lov/lov_pool.c [new file with mode: 0644]
lustre/lov/lov_qos.c
lustre/mdd/mdd_lov.c
lustre/mdd/mdd_trans.c
lustre/mds/handler.c
lustre/mds/mds_internal.h
lustre/mds/mds_lov.c
lustre/mgs/mgs_handler.c
lustre/mgs/mgs_internal.h
lustre/mgs/mgs_llog.c
lustre/obdclass/debug.c
lustre/obdclass/lprocfs_status.c
lustre/obdclass/obd_config.c
lustre/obdfilter/filter.c
lustre/osc/osc_request.c
lustre/ptlrpc/pack_generic.c
lustre/ptlrpc/ptlrpc_module.c
lustre/tests/ll_dirstripe_verify.c
lustre/tests/ll_getstripe_info.c
lustre/tests/llmount.sh
lustre/tests/sanity.sh
lustre/tests/test-framework.sh
lustre/utils/Makefile.am
lustre/utils/lctl.c
lustre/utils/lfs.c
lustre/utils/liblustreapi.c
lustre/utils/llog_reader.c
lustre/utils/obd.c
lustre/utils/obdctl.h
lustre/utils/req-layout.c
lustre/utils/wirecheck.c

index 4141ff9..4ecc54e 100644 (file)
@@ -95,6 +95,10 @@ Details    : When connection is reused this not moved from CONN_UNUSED_HASH
             again in unused hash.
 
 Severity   : enhancement
             again in unused hash.
 
 Severity   : enhancement
+Bugzilla   : 15899
+Description: File striping can now be set to use an arbitrary pool of OSTs.
+       
+Severity   : enhancement
 Bugzilla   : 16573
 Description: Export bytes_read/bytes_write count on OSC/OST.
 
 Bugzilla   : 16573
 Description: Export bytes_read/bytes_write count on OSC/OST.
 
index 0fca528..cea475f 100644 (file)
@@ -17,18 +17,29 @@ lfs \- Lustre utility to create a file with specific striping pattern, find the
         \fB[[!] --uid|-u N] [[!] --user|-U <name>]
         \fB<dirname|filename>\fR
 .br
         \fB[[!] --uid|-u N] [[!] --user|-U <name>]
         \fB<dirname|filename>\fR
 .br
+.B lfs osts
+.br
 .B lfs getstripe [--obd|-O <uuid>] [--quiet|-q] [--verbose|-v] 
 .B lfs getstripe [--obd|-O <uuid>] [--quiet|-q] [--verbose|-v] 
-              \fB[--recursive|-r] <dirname|filename>\fR
+        \fB[--recursive|-r] <dirname|filename>\fR
 .br
 .B lfs setstripe [--size|-s stripe-size] [--count|-c stripe-cnt]
 .br
 .B lfs setstripe [--size|-s stripe-size] [--count|-c stripe-cnt]
-              \fB[--index|-i start-ost] <filename|dirname>\fR
+        \fB[--offset|-o start-ost] [--pool|-p pool-name]
+        \fB<dir|filename>\fR
 .br
 .br
-.B lfs setstripe -d <dirname>
+.B lfs setstripe -d <dir>
 .br
 .br
-.B lfs quotachown [-i] <filesystem>
+.B lfs poollist <filesystem>[.<pool>] | <pathname>
+.br
+.B lfs quota [-v] [-o obd_uuid] [-u|-g] <username|groupname> <filesystem>
+.br
+.B lfs quota <filesystem>
+.br
+.B lfs quota -t [-u|-g] <filesystem>
 .br
 .B lfs quotacheck [-ug] <filesystem>
 .br
 .br
 .B lfs quotacheck [-ug] <filesystem>
 .br
+.B lfs quotachown [-i] <filesystem>
+.br
 .B lfs quotaon [-ugf] <filesystem>
 .br
 .B lfs quotaoff [-ug] <filesystem>
 .B lfs quotaon [-ugf] <filesystem>
 .br
 .B lfs quotaoff [-ug] <filesystem>
@@ -59,13 +70,15 @@ Report filesystem disk space usage or inodes usage of each MDT/OST.
 .B find 
 To search the directory tree rooted at the given dir/file name for the files that match the given parameters: \fB--atime\fR (file was last accessed N*24 hours ago), \fB--ctime\fR (file's status was last changed N*24 hours ago), \fB--mtime\fR (file's data was last modified N*24 hours ago), \fB--obd\fR (file has an object on a specific OST or OSTs), \fB--size\fR (file has size in bytes, or \fBk\fRilo-, \fBM\fRega-, \fBG\fRiga-, \fBT\fRera-, \fBP\fReta-, or \fBE\fRxabytes if a suffix is given), \fB--type\fR (file has the type: \fBb\fRlock, \fBc\fRharacter, \fBd\fRirectory, \fBp\fRipe, \fBf\fRile, sym\fBl\fRink, \fBs\fRocket, or \fBD\fRoor (Solaris)), \fB--uid\fR (file has specific numeric user ID), \fB--user\fR (file owned by specific user, numeric user ID allowed), \fB--gid\fR (file has specific group ID), \fB--group\fR (file belongs to specific group, numeric group ID allowed). The option \fB--maxdepth\fR allows find to decend at most N levels of directory tree. The options \fB--print\fR and \fB--print0\fR print full file name, followed by a newline or NUL character correspondingly.  Using \fB!\fR before an option negates its meaning (\fIfiles NOT matching the parameter\fR).  Using \fB+\fR before a numeric value means \fIfiles with the parameter OR MORE\fR, while \fB-\fR before a numeric value means \fIfiles with the parameter OR LESS\fR.
 .TP
 .B find 
 To search the directory tree rooted at the given dir/file name for the files that match the given parameters: \fB--atime\fR (file was last accessed N*24 hours ago), \fB--ctime\fR (file's status was last changed N*24 hours ago), \fB--mtime\fR (file's data was last modified N*24 hours ago), \fB--obd\fR (file has an object on a specific OST or OSTs), \fB--size\fR (file has size in bytes, or \fBk\fRilo-, \fBM\fRega-, \fBG\fRiga-, \fBT\fRera-, \fBP\fReta-, or \fBE\fRxabytes if a suffix is given), \fB--type\fR (file has the type: \fBb\fRlock, \fBc\fRharacter, \fBd\fRirectory, \fBp\fRipe, \fBf\fRile, sym\fBl\fRink, \fBs\fRocket, or \fBD\fRoor (Solaris)), \fB--uid\fR (file has specific numeric user ID), \fB--user\fR (file owned by specific user, numeric user ID allowed), \fB--gid\fR (file has specific group ID), \fB--group\fR (file belongs to specific group, numeric group ID allowed). The option \fB--maxdepth\fR allows find to decend at most N levels of directory tree. The options \fB--print\fR and \fB--print0\fR print full file name, followed by a newline or NUL character correspondingly.  Using \fB!\fR before an option negates its meaning (\fIfiles NOT matching the parameter\fR).  Using \fB+\fR before a numeric value means \fIfiles with the parameter OR MORE\fR, while \fB-\fR before a numeric value means \fIfiles with the parameter OR LESS\fR.
 .TP
-.B getstripe
-To list the striping info for a given filename or files in a directory, optionally recursively, for all files in a directory tree: \fB--quiet\fR (don't print object IDs), \fB--verbose\fR (print striping parameters), \fB--recursive\fR (recurse into subdirectories).
-.TP
 .B osts 
 List all the OSTs for the filesystem
 .TP
 .B osts 
 List all the OSTs for the filesystem
 .TP
-.B setstripe [--size stripe-size] [--count stripe-cnt] [--index start-ost]
+.B getstripe
+To list the striping info for a given filename or files in a directory, optionally recursively, for all files in a directory tree: \fB--quiet\fR (don't print object IDs), \fB--verbose\fR (print striping parameters), \fB--recursive\fR (recurse into subdirectories).
+.TP
+.B setstripe [--size stripe-size] [--count stripe-cnt] 
+       \fB[--offset start-ost] [--pool pool-name]\fR
+.br
 To create a new file, or set the directory default, with the specified striping parameters.  The
 .I stripe-count
 is the number of OSTs to stripe a file over. A
 To create a new file, or set the directory default, with the specified striping parameters.  The
 .I stripe-count
 is the number of OSTs to stripe a file over. A
@@ -78,15 +91,24 @@ is the number of bytes to store on each OST before moving to the next OST.  A
 .I stripe-size
 of 0 means to use the filesystem-wide default stripe size (default 1MB).  The
 .I start-ost
 .I stripe-size
 of 0 means to use the filesystem-wide default stripe size (default 1MB).  The
 .I start-ost
-is the OST index (starting at 0) on which to start striping for this file.  A
+is the OST index (base 10, starting at 0) on which to start striping for this file.  A
 .I start-ost
 .I start-ost
-of -1 allows the MDS to specify the starting index and it is strongly
-recommended that the starting OST not be given, as this allows space and
-load balancing to be done by the MDS as needed.
+of -1 allows the MDS to choose the starting index and it is strongly recommended, as this allows space and load balancing to be done by the MDS as needed.  The
+.I pool-name
+is the name of a predefined pool of OSTs (see 
+.I lctl
+) that will be used for striping. The 
+.I stripe-count, stripe-size, start-ost
+will be used as well; the 
+.I start-ost
+must be part of the pool or an error will be returned. 
 .TP
 .TP
-.B lfs setstripe -d
+.B setstripe -d
 Delete the default striping on the specified directory.
 .TP
 Delete the default striping on the specified directory.
 .TP
+.B poollist <filesystem>[.<pool>] | <pathname>
+List the pools in \fBfilesystem\fR or \fBpathname\fR, or the OSTs in \fBfilesystem.pool\fR
+.TP
 .B quotachown
 To change files' owner and group on OSTs of the specified filesystem
 .TP
 .B quotachown
 To change files' owner and group on OSTs of the specified filesystem
 .TP
index c3e69c7..9696a7b 100644 (file)
@@ -403,9 +403,12 @@ extern int lprocfs_add_clear_entry(struct obd_device * obd,
 extern int lprocfs_exp_setup(struct obd_export *exp,
                              lnet_nid_t *peer_nid, int *newnid);
 extern int lprocfs_exp_cleanup(struct obd_export *exp);
 extern int lprocfs_exp_setup(struct obd_export *exp,
                              lnet_nid_t *peer_nid, int *newnid);
 extern int lprocfs_exp_cleanup(struct obd_export *exp);
-extern int lprocfs_add_simple(struct proc_dir_entry *root,
-                              char *name, cfs_read_proc_t *read_proc,
-                              cfs_write_proc_t *write_proc, void *data);
+extern cfs_proc_dir_entry_t *lprocfs_add_simple(struct proc_dir_entry *root,
+                                                char *name,
+                                                cfs_read_proc_t *read_proc,
+                                                cfs_write_proc_t *write_proc,
+                                                void *data,
+                                                struct file_operations *fops);
 extern struct proc_dir_entry *lprocfs_add_symlink(const char *name,
                         struct proc_dir_entry *parent, const char *dest);
 extern void lprocfs_free_per_client_stats(struct obd_device *obd);
 extern struct proc_dir_entry *lprocfs_add_symlink(const char *name,
                         struct proc_dir_entry *parent, const char *dest);
 extern void lprocfs_free_per_client_stats(struct obd_device *obd);
@@ -436,10 +439,6 @@ extern cfs_proc_dir_entry_t *lprocfs_srch(cfs_proc_dir_entry_t *root,
 
 extern int lprocfs_obd_setup(struct obd_device *obd, struct lprocfs_vars *list);
 extern int lprocfs_obd_cleanup(struct obd_device *obd);
 
 extern int lprocfs_obd_setup(struct obd_device *obd, struct lprocfs_vars *list);
 extern int lprocfs_obd_cleanup(struct obd_device *obd);
-extern int lprocfs_add_simple(struct proc_dir_entry *root, char *name,
-                              cfs_read_proc_t *read_proc,
-                              cfs_write_proc_t *write_proc,
-                              void *data);
 extern void lprocfs_free_per_client_stats(struct obd_device *obd);
 extern struct file_operations lprocfs_evict_client_fops;
 
 extern void lprocfs_free_per_client_stats(struct obd_device *obd);
 extern struct file_operations lprocfs_evict_client_fops;
 
@@ -658,11 +657,12 @@ static inline int lprocfs_exp_setup(struct obd_export *exp,
 { return 0; }
 static inline int lprocfs_exp_cleanup(struct obd_export *exp)
 { return 0; }
 { return 0; }
 static inline int lprocfs_exp_cleanup(struct obd_export *exp)
 { return 0; }
-static inline int lprocfs_add_simple(struct proc_dir_entry *root,
+static inline cfs_proc_dir_entry_t *lprocfs_add_simple(struct proc_dir_entry *root,
                                      char *name,
                                      cfs_read_proc_t *read_proc,
                                      cfs_write_proc_t *write_proc,
                                      char *name,
                                      cfs_read_proc_t *read_proc,
                                      cfs_write_proc_t *write_proc,
-                                     void *data)
+                                     void *data,
+                                     struct file_operations *fops)
 {return 0; }
 static inline struct proc_dir_entry *lprocfs_add_symlink(const char *name,
                         struct proc_dir_entry *parent, const char *dest)
 {return 0; }
 static inline struct proc_dir_entry *lprocfs_add_symlink(const char *name,
                         struct proc_dir_entry *parent, const char *dest)
index 24857e4..e6c1e43 100644 (file)
@@ -73,6 +73,14 @@ extern int llapi_file_create(const char *name, unsigned long stripe_size,
 extern int llapi_file_open(const char *name, int flags, int mode,
                            unsigned long stripe_size, int stripe_offset,
                            int stripe_count, int stripe_pattern);
 extern int llapi_file_open(const char *name, int flags, int mode,
                            unsigned long stripe_size, int stripe_offset,
                            int stripe_count, int stripe_pattern);
+extern int llapi_file_create_pool(const char *name, unsigned long stripe_size,
+                                  int stripe_offset, int stripe_count,
+                                  int stripe_pattern, char *pool_name);
+extern int llapi_file_open_pool(const char *name, int flags, int mode,
+                                unsigned long stripe_size, int stripe_offset,
+                                int stripe_count, int stripe_pattern,
+                                char *pool_name);
+extern int llapi_poollist(char *name);
 extern int llapi_file_get_stripe(const char *path, struct lov_user_md *lum);
 #define HAVE_LLAPI_FILE_LOOKUP
 extern int llapi_file_lookup(int dirfd, const char *name);
 extern int llapi_file_get_stripe(const char *path, struct lov_user_md *lum);
 #define HAVE_LLAPI_FILE_LOOKUP
 extern int llapi_file_lookup(int dirfd, const char *name);
@@ -102,7 +110,9 @@ struct find_param {
                         exclude_gid:1,
                         exclude_uid:1,
                         check_gid:1,
                         exclude_gid:1,
                         exclude_uid:1,
                         check_gid:1,
-                        check_uid:1;
+                        check_uid:1,
+                        check_pool:1,
+                        exclude_pool:1;
 
         int     verbose;
         int     quiet;
 
         int     verbose;
         int     quiet;
@@ -124,6 +134,8 @@ struct find_param {
         /* In-precess parameters. */
         unsigned int depth;
         dev_t   st_dev;
         /* In-precess parameters. */
         unsigned int depth;
         dev_t   st_dev;
+
+        char poolname[MAXPOOLNAME+1];
 };
 
 extern int llapi_getstripe(char *path, struct find_param *param);
 };
 
 extern int llapi_getstripe(char *path, struct find_param *param);
@@ -136,7 +148,7 @@ extern int llapi_ping(char *obd_type, char *obd_name);
 extern int llapi_target_check(int num_types, char **obd_types, char *dir);
 extern int llapi_catinfo(char *dir, char *keyword, char *node_name);
 extern int llapi_file_get_lov_uuid(const char *path, struct obd_uuid *lov_uuid);
 extern int llapi_target_check(int num_types, char **obd_types, char *dir);
 extern int llapi_catinfo(char *dir, char *keyword, char *node_name);
 extern int llapi_file_get_lov_uuid(const char *path, struct obd_uuid *lov_uuid);
-extern int llapi_file_get_lov_fuuid(int fd, struct obd_uuid *lov_uuid);
+extern int llapi_file_fget_lov_uuid(int fd, struct obd_uuid *lov_uuid);
 extern int llapi_lov_get_uuids(int fd, struct obd_uuid *uuidp, int *ost_count);
 extern int llapi_is_lustre_mnttype(const char *type);
 extern int parse_size(char *optarg, unsigned long long *size,
 extern int llapi_lov_get_uuids(int fd, struct obd_uuid *uuidp, int *ost_count);
 extern int llapi_is_lustre_mnttype(const char *type);
 extern int parse_size(char *optarg, unsigned long long *size,
index 4f09b84..942ec44 100644 (file)
@@ -607,6 +607,7 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
                                               *b=10600 */
 #define OBD_CONNECT_CKSUM      0x20000000ULL /* support several cksum algos */
 #define OBD_CONNECT_FID        0x40000000ULL /* FID is supported by server */
                                               *b=10600 */
 #define OBD_CONNECT_CKSUM      0x20000000ULL /* support several cksum algos */
 #define OBD_CONNECT_FID        0x40000000ULL /* FID is supported by server */
+#define OBD_CONNECT_LOV_V3    0x100000000ULL /* client supports lov v3 ea */
 
 /* also update obd_connect_names[] for lprocfs_rd_connect_flags()
  * and lustre/utils/wirecheck.c */
 
 /* also update obd_connect_names[] for lprocfs_rd_connect_flags()
  * and lustre/utils/wirecheck.c */
@@ -626,7 +627,8 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
                                 OBD_CONNECT_MDS_CAPA | OBD_CONNECT_OSS_CAPA | \
                                 OBD_CONNECT_MDS_MDS | OBD_CONNECT_CANCELSET | \
                                 OBD_CONNECT_FID | \
                                 OBD_CONNECT_MDS_CAPA | OBD_CONNECT_OSS_CAPA | \
                                 OBD_CONNECT_MDS_MDS | OBD_CONNECT_CANCELSET | \
                                 OBD_CONNECT_FID | \
-                                LRU_RESIZE_CONNECT_FLAG | OBD_CONNECT_AT)
+                                LRU_RESIZE_CONNECT_FLAG | OBD_CONNECT_AT | \
+                                OBD_CONNECT_LOV_V3)
 #define OST_CONNECT_SUPPORTED  (OBD_CONNECT_SRVLOCK | OBD_CONNECT_GRANT | \
                                 OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | \
                                 OBD_CONNECT_TRUNCLOCK | OBD_CONNECT_INDEX | \
 #define OST_CONNECT_SUPPORTED  (OBD_CONNECT_SRVLOCK | OBD_CONNECT_GRANT | \
                                 OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | \
                                 OBD_CONNECT_TRUNCLOCK | OBD_CONNECT_INDEX | \
@@ -748,6 +750,7 @@ typedef __u32 obd_count;
 #define LOV_MAGIC_V1      0x0BD10BD0
 #define LOV_MAGIC         LOV_MAGIC_V1
 #define LOV_MAGIC_JOIN    0x0BD20BD0
 #define LOV_MAGIC_V1      0x0BD10BD0
 #define LOV_MAGIC         LOV_MAGIC_V1
 #define LOV_MAGIC_JOIN    0x0BD20BD0
+#define LOV_MAGIC_V3      0x0BD30BD0
 
 #define LOV_PATTERN_RAID0 0x001   /* stripes are used round-robin */
 #define LOV_PATTERN_RAID1 0x002   /* stripes are mirrors of each other */
 
 #define LOV_PATTERN_RAID0 0x001   /* stripes are used round-robin */
 #define LOV_PATTERN_RAID1 0x002   /* stripes are mirrors of each other */
@@ -757,6 +760,9 @@ typedef __u32 obd_count;
 #define LOV_OBJECT_GROUP_DEFAULT ~0ULL
 #define LOV_OBJECT_GROUP_CLEAR 0ULL
 
 #define LOV_OBJECT_GROUP_DEFAULT ~0ULL
 #define LOV_OBJECT_GROUP_CLEAR 0ULL
 
+#define MAXPOOLNAME 16
+#define POOLNAMEF "%.16s"
+
 #define lov_ost_data lov_ost_data_v1
 struct lov_ost_data_v1 {          /* per-stripe data structure (little-endian)*/
         __u64 l_object_id;        /* OST object ID */
 #define lov_ost_data lov_ost_data_v1
 struct lov_ost_data_v1 {          /* per-stripe data structure (little-endian)*/
         __u64 l_object_id;        /* OST object ID */
@@ -776,7 +782,7 @@ struct lov_mds_md_v1 {            /* LOV EA mds/wire data (little-endian) */
         struct lov_ost_data_v1 lmm_objects[0]; /* per-stripe data */
 };
 
         struct lov_ost_data_v1 lmm_objects[0]; /* per-stripe data */
 };
 
-extern void lustre_swab_lov_mds_md(struct lov_mds_md *llm);
+/* extern void lustre_swab_lov_mds_md(struct lov_mds_md *llm); */
 
 #define MAX_MD_SIZE (sizeof(struct lov_mds_md) + 4 * sizeof(struct lov_ost_data))
 #define MIN_MD_SIZE (sizeof(struct lov_mds_md) + 1 * sizeof(struct lov_ost_data))
 
 #define MAX_MD_SIZE (sizeof(struct lov_mds_md) + 4 * sizeof(struct lov_ost_data))
 #define MIN_MD_SIZE (sizeof(struct lov_mds_md) + 1 * sizeof(struct lov_ost_data))
@@ -785,6 +791,18 @@ extern void lustre_swab_lov_mds_md(struct lov_mds_md *llm);
 #define XATTR_NAME_ACL_DEFAULT  "system.posix_acl_default"
 #define XATTR_NAME_LOV          "trusted.lov"
 
 #define XATTR_NAME_ACL_DEFAULT  "system.posix_acl_default"
 #define XATTR_NAME_LOV          "trusted.lov"
 
+struct lov_mds_md_v3 {            /* LOV EA mds/wire data (little-endian) */
+        __u32 lmm_magic;          /* magic number = LOV_MAGIC_V3 */
+        __u32 lmm_pattern;        /* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */
+        __u64 lmm_object_id;      /* LOV object ID */
+        __u64 lmm_object_gr;      /* LOV object group */
+        __u32 lmm_stripe_size;    /* size of stripe in bytes */
+        __u32 lmm_stripe_count;   /* num stripes in use for this object */
+        char  lmm_pool_name[MAXPOOLNAME]; /* must be 32bit aligned */
+        struct lov_ost_data_v1 lmm_objects[0]; /* per-stripe data */
+};
+
+
 #define OBD_MD_FLID        (0x00000001ULL) /* object ID */
 #define OBD_MD_FLATIME     (0x00000002ULL) /* access time */
 #define OBD_MD_FLMTIME     (0x00000004ULL) /* data modification time */
 #define OBD_MD_FLID        (0x00000001ULL) /* object ID */
 #define OBD_MD_FLATIME     (0x00000002ULL) /* access time */
 #define OBD_MD_FLMTIME     (0x00000004ULL) /* data modification time */
@@ -2151,8 +2169,10 @@ extern void lustre_swab_ost_body (struct ost_body *b);
 extern void lustre_swab_ost_last_id(obd_id *id);
 extern void lustre_swab_fiemap(struct ll_user_fiemap *fiemap);
 
 extern void lustre_swab_ost_last_id(obd_id *id);
 extern void lustre_swab_fiemap(struct ll_user_fiemap *fiemap);
 
-extern void lustre_swab_lov_user_md(struct lov_user_md *lum);
-extern void lustre_swab_lov_user_md_objects(struct lov_user_md *lum);
+extern void lustre_swab_lov_user_md_v1(struct lov_user_md_v1 *lum);
+extern void lustre_swab_lov_user_md_v3(struct lov_user_md_v3 *lum);
+extern void lustre_swab_lov_user_md_objects(struct lov_user_ost_data *lod,
+                                            int stripe_count);
 extern void lustre_swab_lov_user_md_join(struct lov_user_md_join *lumj);
 
 /* llog_swab.c */
 extern void lustre_swab_lov_user_md_join(struct lov_user_md_join *lumj);
 
 /* llog_swab.c */
index 2fd8bc5..ed40b0e 100644 (file)
@@ -129,13 +129,15 @@ struct obd_statfs;
 
 #define LOV_USER_MAGIC_V1 0x0BD10BD0
 #define LOV_USER_MAGIC    LOV_USER_MAGIC_V1
 
 #define LOV_USER_MAGIC_V1 0x0BD10BD0
 #define LOV_USER_MAGIC    LOV_USER_MAGIC_V1
-
 #define LOV_USER_MAGIC_JOIN 0x0BD20BD0
 #define LOV_USER_MAGIC_JOIN 0x0BD20BD0
+#define LOV_USER_MAGIC_V3 0x0BD30BD0
 
 #define LOV_PATTERN_RAID0 0x001
 #define LOV_PATTERN_RAID1 0x002
 #define LOV_PATTERN_FIRST 0x100
 
 
 #define LOV_PATTERN_RAID0 0x001
 #define LOV_PATTERN_RAID1 0x002
 #define LOV_PATTERN_FIRST 0x100
 
+#define MAXPOOLNAME 16
+
 #define lov_user_ost_data lov_user_ost_data_v1
 struct lov_user_ost_data_v1 {     /* per-stripe data structure */
         __u64 l_object_id;        /* OST object ID */
 #define lov_user_ost_data lov_user_ost_data_v1
 struct lov_user_ost_data_v1 {     /* per-stripe data structure */
         __u64 l_object_id;        /* OST object ID */
@@ -156,6 +158,18 @@ struct lov_user_md_v1 {           /* LOV EA user data (host-endian) */
         struct lov_user_ost_data_v1 lmm_objects[0]; /* per-stripe data */
 } __attribute__((packed));
 
         struct lov_user_ost_data_v1 lmm_objects[0]; /* per-stripe data */
 } __attribute__((packed));
 
+struct lov_user_md_v3 {           /* LOV EA user data (host-endian) */
+        __u32 lmm_magic;          /* magic number = LOV_USER_MAGIC_V3 */
+        __u32 lmm_pattern;        /* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */
+        __u64 lmm_object_id;      /* LOV object ID */
+        __u64 lmm_object_gr;      /* LOV object group */
+        __u32 lmm_stripe_size;    /* size of stripe in bytes */
+        __u16 lmm_stripe_count;   /* num stripes in use for this object */
+        __u16 lmm_stripe_offset;  /* starting stripe offset in lmm_objects */
+        char  lmm_pool_name[MAXPOOLNAME]; /* pool name */
+        struct lov_user_ost_data_v1 lmm_objects[0]; /* per-stripe data */
+} __attribute__((packed));
+
 /* Compile with -D_LARGEFILE64_SOURCE or -D_GNU_SOURCE (or #define) to
  * use this.  It is unsafe to #define those values in this header as it
  * is possible the application has already #included <sys/stat.h>. */
 /* Compile with -D_LARGEFILE64_SOURCE or -D_GNU_SOURCE (or #define) to
  * use this.  It is unsafe to #define those values in this header as it
  * is possible the application has already #included <sys/stat.h>. */
@@ -163,7 +177,12 @@ struct lov_user_md_v1 {           /* LOV EA user data (host-endian) */
 #define lov_user_mds_data lov_user_mds_data_v1
 struct lov_user_mds_data_v1 {
         lstat_t lmd_st;                 /* MDS stat struct */
 #define lov_user_mds_data lov_user_mds_data_v1
 struct lov_user_mds_data_v1 {
         lstat_t lmd_st;                 /* MDS stat struct */
-        struct lov_user_md_v1 lmd_lmm;  /* LOV EA user data */
+        struct lov_user_md_v1 lmd_lmm;  /* LOV EA V1 user data */
+} __attribute__((packed));
+
+struct lov_user_mds_data_v3 {
+        lstat_t lmd_st;                 /* MDS stat struct */
+        struct lov_user_md_v3 lmd_lmm;  /* LOV EA V3 user data */
 } __attribute__((packed));
 #endif
 
 } __attribute__((packed));
 #endif
 
index 266d6f5..e52a9f3 100644 (file)
@@ -73,6 +73,10 @@ enum lcfg_command_type {
         LCFG_ADD_MDC        = 0x00cf014,
         LCFG_DEL_MDC        = 0x00cf015,
         LCFG_SPTLRPC_CONF   = 0x00ce016,
         LCFG_ADD_MDC        = 0x00cf014,
         LCFG_DEL_MDC        = 0x00cf015,
         LCFG_SPTLRPC_CONF   = 0x00ce016,
+        LCFG_POOL_NEW       = 0x00ce020,
+        LCFG_POOL_ADD       = 0x00ce021,
+        LCFG_POOL_REM       = 0x00ce022,
+        LCFG_POOL_DEL       = 0x00ce023,
 };
 
 struct lustre_cfg_bufs {
 };
 
 struct lustre_cfg_bufs {
@@ -222,7 +226,7 @@ static inline struct lustre_cfg *lustre_cfg_new(int cmd,
         OBD_ALLOC(lcfg, lustre_cfg_len(bufs->lcfg_bufcount,
                                        bufs->lcfg_buflen));
         if (!lcfg)
         OBD_ALLOC(lcfg, lustre_cfg_len(bufs->lcfg_bufcount,
                                        bufs->lcfg_buflen));
         if (!lcfg)
-                RETURN(lcfg);
+                RETURN(ERR_PTR(-ENOMEM));
 
         lcfg->lcfg_version = LUSTRE_CFG_VERSION;
         lcfg->lcfg_command = cmd;
 
         lcfg->lcfg_version = LUSTRE_CFG_VERSION;
         lcfg->lcfg_command = cmd;
index 0a8eb9a..83697fe 100644 (file)
@@ -498,6 +498,7 @@ static inline void obd_ioctl_freedata(char *buf, int len)
 #define OBD_IOC_DUMP_LOG               _IOWR('f', 185, OBD_IOC_DATA_TYPE)
 #define OBD_IOC_CLEAR_LOG              _IOWR('f', 186, OBD_IOC_DATA_TYPE)
 #define OBD_IOC_PARAM                  _IOW ('f', 187, OBD_IOC_DATA_TYPE)
 #define OBD_IOC_DUMP_LOG               _IOWR('f', 185, OBD_IOC_DATA_TYPE)
 #define OBD_IOC_CLEAR_LOG              _IOWR('f', 186, OBD_IOC_DATA_TYPE)
 #define OBD_IOC_PARAM                  _IOW ('f', 187, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_POOL                   _IOWR('f', 188, OBD_IOC_DATA_TYPE)
 
 #define OBD_IOC_CATLOGLIST             _IOWR('f', 190, OBD_IOC_DATA_TYPE)
 #define OBD_IOC_LLOG_INFO              _IOWR('f', 191, OBD_IOC_DATA_TYPE)
 
 #define OBD_IOC_CATLOGLIST             _IOWR('f', 190, OBD_IOC_DATA_TYPE)
 #define OBD_IOC_LLOG_INFO              _IOWR('f', 191, OBD_IOC_DATA_TYPE)
index 109c9af..81cd3d8 100644 (file)
@@ -150,6 +150,7 @@ struct lov_stripe_md {
                 __u32 lw_stripe_size;      /* size of the stripe */
                 __u32 lw_pattern;          /* striping pattern (RAID0, RAID1) */
                 unsigned lw_stripe_count;  /* number of objects being striped over */
                 __u32 lw_stripe_size;      /* size of the stripe */
                 __u32 lw_pattern;          /* striping pattern (RAID0, RAID1) */
                 unsigned lw_stripe_count;  /* number of objects being striped over */
+                char  lw_pool_name[MAXPOOLNAME]; /* pool name */
         } lsm_wire;
 
         struct lov_array_info *lsm_array; /*Only for joined file array info*/
         } lsm_wire;
 
         struct lov_array_info *lsm_array; /*Only for joined file array info*/
@@ -163,6 +164,7 @@ struct lov_stripe_md {
 #define lsm_stripe_size  lsm_wire.lw_stripe_size
 #define lsm_pattern      lsm_wire.lw_pattern
 #define lsm_stripe_count lsm_wire.lw_stripe_count
 #define lsm_stripe_size  lsm_wire.lw_stripe_size
 #define lsm_pattern      lsm_wire.lw_pattern
 #define lsm_stripe_count lsm_wire.lw_stripe_count
+#define lsm_pool_name    lsm_wire.lw_pool_name
 
 struct obd_info;
 
 
 struct obd_info;
 
@@ -649,15 +651,32 @@ struct ltd_qos {
         unsigned int        ltq_usable:1;    /* usable for striping */
 };
 
         unsigned int        ltq_usable:1;    /* usable for striping */
 };
 
+/* Generic subset of OSTs */
+struct ost_pool {
+        __u32              *op_array;        /* array of index of
+                                                lov_obd->lov_tgts */
+        unsigned int        op_count;        /* number of OSTs in the array */
+        unsigned int        op_size;         /* allocated size of lp_array */
+        rwlock_t            op_rwlock;       /* to protect lov_pool use */
+};
+
+/* Round-robin allocator data */
+struct lov_qos_rr {
+        __u32               lqr_start_idx;   /* start index of new inode */
+        __u32               lqr_offset_idx;  /* aliasing for start_idx  */
+        int                 lqr_start_count; /* reseed counter */
+        struct ost_pool     lqr_pool;        /* round-robin optimized list */
+        unsigned long       lqr_dirty:1;     /* recalc round-robin list */
+};
+
+/* Stripe placement optimization */
 struct lov_qos {
         struct list_head    lq_oss_list;    /* list of OSSs that targets use */
         struct rw_semaphore lq_rw_sem;
         __u32               lq_active_oss_count;
 struct lov_qos {
         struct list_head    lq_oss_list;    /* list of OSSs that targets use */
         struct rw_semaphore lq_rw_sem;
         __u32               lq_active_oss_count;
-        __u32              *lq_rr_array;    /* round-robin optimized list */
-        unsigned int        lq_rr_size;     /* rr array size */
         unsigned int        lq_prio_free;   /* priority for free space */
         unsigned int        lq_prio_free;   /* priority for free space */
+        struct lov_qos_rr   lq_rr;          /* round robin qos data */
         unsigned long       lq_dirty:1,     /* recalc qos data */
         unsigned long       lq_dirty:1,     /* recalc qos data */
-                            lq_dirty_rr:1,  /* recalc round-robin list */
                             lq_same_space:1,/* the ost's all have approx.
                                                the same space avail */
                             lq_reset:1;     /* zero current penalties */
                             lq_same_space:1,/* the ost's all have approx.
                                                the same space avail */
                             lq_reset:1;     /* zero current penalties */
@@ -674,9 +693,29 @@ struct lov_tgt_desc {
                             ltd_reap:1;  /* should this target be deleted */
 };
 
                             ltd_reap:1;  /* should this target be deleted */
 };
 
+/* Pool metadata */
+#define pool_tgt_size(_p)   _p->pool_obds.op_size
+#define pool_tgt_count(_p)  _p->pool_obds.op_count
+#define pool_tgt_array(_p)  _p->pool_obds.op_array
+#define pool_tgt_rwlock(_p) _p->pool_obds.op_rwlock
+#define pool_tgt(_p, _i)    _p->pool_lov->lov_tgts[_p->pool_obds.op_array[_i]]
+
+struct pool_desc {
+        char                    pool_name[MAXPOOLNAME + 1]; /* name of pool */
+        struct ost_pool         pool_obds;              /* pool members */
+        struct lov_qos_rr       pool_rr;                /* round robin qos */
+        struct hlist_node       pool_hash;              /* access by poolname */
+        struct list_head        pool_list;              /* serial access */
+        cfs_proc_dir_entry_t   *pool_proc_entry;        /* file in /proc */
+        struct lov_obd         *pool_lov;               /* lov obd to which this
+                                                           pool belong */
+};
+
 struct lov_obd {
         struct lov_desc         desc;
 struct lov_obd {
         struct lov_desc         desc;
-        struct lov_tgt_desc   **lov_tgts;
+        struct lov_tgt_desc   **lov_tgts;              /* sparse array */
+        struct ost_pool         lov_packed;            /* all OSTs in a packed
+                                                          array */
         struct semaphore        lov_lock;
         struct obd_connect_data lov_ocd;
         struct lov_qos          lov_qos;               /* qos info per lov */
         struct semaphore        lov_lock;
         struct obd_connect_data lov_ocd;
         struct lov_qos          lov_qos;               /* qos info per lov */
@@ -685,13 +724,14 @@ struct lov_obd {
         __u32                   lov_active_tgt_count;  /* how many active */
         __u32                   lov_death_row;/* tgts scheduled to be deleted */
         __u32                   lov_tgt_size;   /* size of tgts array */
         __u32                   lov_active_tgt_count;  /* how many active */
         __u32                   lov_death_row;/* tgts scheduled to be deleted */
         __u32                   lov_tgt_size;   /* size of tgts array */
-        __u32                   lov_start_idx;  /* start index of new inode */
-        __u32                   lov_offset_idx; /* aliasing for start_idx  */
-        int                     lov_start_count;/* reseed counter */
         int                     lov_connects;
         obd_page_removal_cb_t   lov_page_removal_cb;
         obd_pin_extent_cb       lov_page_pin_cb;
         obd_lock_cancel_cb      lov_lock_cancel_cb;
         int                     lov_connects;
         obd_page_removal_cb_t   lov_page_removal_cb;
         obd_pin_extent_cb       lov_page_pin_cb;
         obd_lock_cancel_cb      lov_lock_cancel_cb;
+        int                     lov_pool_count;
+        lustre_hash_t          *lov_pools_hash_body; /* used for key access */
+        struct list_head        lov_pool_list; /* used for sequential access */
+        cfs_proc_dir_entry_t   *lov_pool_proc_entry;
 };
 
 struct lmv_tgt_desc {
 };
 
 struct lmv_tgt_desc {
@@ -1340,7 +1380,13 @@ struct obd_ops {
                                        obd_lock_cancel_cb cb);
         int (*o_unregister_lock_cancel_cb)(struct obd_export *exp,
                                          obd_lock_cancel_cb cb);
                                        obd_lock_cancel_cb cb);
         int (*o_unregister_lock_cancel_cb)(struct obd_export *exp,
                                          obd_lock_cancel_cb cb);
-
+        /* pools methods */
+        int (*o_pool_new)(struct obd_device *obd, char *poolname);
+        int (*o_pool_del)(struct obd_device *obd, char *poolname);
+        int (*o_pool_add)(struct obd_device *obd, char *poolname,
+                          char *ostname);
+        int (*o_pool_rem)(struct obd_device *obd, char *poolname,
+                          char *ostname);
         /*
          * NOTE: If adding ops, add another LPROCFS_OBD_OP_INIT() line
          * to lprocfs_alloc_obd_stats() in obdclass/lprocfs_status.c.
         /*
          * NOTE: If adding ops, add another LPROCFS_OBD_OP_INIT() line
          * to lprocfs_alloc_obd_stats() in obdclass/lprocfs_status.c.
@@ -1511,15 +1557,18 @@ struct lsm_operations {
                              struct lov_mds_md *lmm);
 };
 
                              struct lov_mds_md *lmm);
 };
 
-extern struct lsm_operations lsm_plain_ops;
+extern struct lsm_operations lsm_v1_ops;
 extern struct lsm_operations lsm_join_ops;
 extern struct lsm_operations lsm_join_ops;
+extern struct lsm_operations lsm_v3_ops;
 static inline struct lsm_operations *lsm_op_find(int magic)
 {
         switch(magic) {
 static inline struct lsm_operations *lsm_op_find(int magic)
 {
         switch(magic) {
-        case LOV_MAGIC:
-               return &lsm_plain_ops;
+        case LOV_MAGIC_V1:
+               return &lsm_v1_ops;
         case LOV_MAGIC_JOIN:
                return &lsm_join_ops;
         case LOV_MAGIC_JOIN:
                return &lsm_join_ops;
+        case LOV_MAGIC_V3:
+               return &lsm_v3_ops;
         default:
                CERROR("Cannot recognize lsm_magic %d\n", magic);
                return NULL;
         default:
                CERROR("Cannot recognize lsm_magic %d\n", magic);
                return NULL;
index 8a73f27..fa44819 100644 (file)
@@ -946,6 +946,54 @@ static inline int obd_ping(struct obd_export *exp)
         RETURN(rc);
 }
 
         RETURN(rc);
 }
 
+static inline int obd_pool_new(struct obd_device *obd, char *poolname)
+{
+        int rc;
+        ENTRY;
+
+        OBD_CHECK_DT_OP(obd, pool_new, -EOPNOTSUPP);
+        OBD_COUNTER_INCREMENT(obd, pool_new);
+
+        rc = OBP(obd, pool_new)(obd, poolname);
+        RETURN(rc);
+}
+
+static inline int obd_pool_del(struct obd_device *obd, char *poolname)
+{
+        int rc;
+        ENTRY;
+
+        OBD_CHECK_DT_OP(obd, pool_del, -EOPNOTSUPP);
+        OBD_COUNTER_INCREMENT(obd, pool_del);
+
+        rc = OBP(obd, pool_del)(obd, poolname);
+        RETURN(rc);
+}
+
+static inline int obd_pool_add(struct obd_device *obd, char *poolname, char *ostname)
+{
+        int rc;
+        ENTRY;
+
+        OBD_CHECK_DT_OP(obd, pool_add, -EOPNOTSUPP);
+        OBD_COUNTER_INCREMENT(obd, pool_add);
+
+        rc = OBP(obd, pool_add)(obd, poolname, ostname);
+        RETURN(rc);
+}
+
+static inline int obd_pool_rem(struct obd_device *obd, char *poolname, char *ostname)
+{
+        int rc;
+        ENTRY;
+
+        OBD_CHECK_DT_OP(obd, pool_rem, -EOPNOTSUPP);
+        OBD_COUNTER_INCREMENT(obd, pool_rem);
+
+        rc = OBP(obd, pool_rem)(obd, poolname, ostname);
+        RETURN(rc);
+}
+
 static inline int obd_init_export(struct obd_export *exp)
 {
         int rc = 0;
 static inline int obd_init_export(struct obd_export *exp)
 {
         int rc = 0;
index 64798ab..da3ca51 100644 (file)
@@ -42,13 +42,17 @@ static inline int lov_stripe_md_size(int stripes)
         return sizeof(struct lov_stripe_md) + stripes*sizeof(struct lov_oinfo*);
 }
 
         return sizeof(struct lov_stripe_md) + stripes*sizeof(struct lov_oinfo*);
 }
 
-#define lov_mds_md_size(stripes) lov_mds_md_v1_size(stripes)
-static inline int lov_mds_md_v1_size(int stripes)
+static inline int lov_mds_md_size(int stripes, int lmm_magic)
 {
 {
-        return sizeof(struct lov_mds_md_v1) +
-                stripes * sizeof(struct lov_ost_data_v1);
+        if (lmm_magic == LOV_MAGIC_V3)
+                return sizeof(struct lov_mds_md_v3) +
+                        stripes * sizeof(struct lov_ost_data_v1);
+        else
+                return sizeof(struct lov_mds_md_v1) +
+                        stripes * sizeof(struct lov_ost_data_v1);
 }
 
 }
 
+
 #define IOC_LOV_TYPE                   'g'
 #define IOC_LOV_MIN_NR                 50
 #define IOC_LOV_SET_OSC_ACTIVE         _IOWR('g', 50, long)
 #define IOC_LOV_TYPE                   'g'
 #define IOC_LOV_MIN_NR                 50
 #define IOC_LOV_SET_OSC_ACTIVE         _IOWR('g', 50, long)
index 3978b71..27cd186 100644 (file)
@@ -345,7 +345,7 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
 
         cli->cl_import = imp;
         /* cli->cl_max_mds_{easize,cookiesize} updated by mdc_init_ea_size() */
 
         cli->cl_import = imp;
         /* cli->cl_max_mds_{easize,cookiesize} updated by mdc_init_ea_size() */
-        cli->cl_max_mds_easize = sizeof(struct lov_mds_md);
+        cli->cl_max_mds_easize = sizeof(struct lov_mds_md_v3);
         cli->cl_max_mds_cookiesize = sizeof(struct llog_cookie);
 
         if (LUSTRE_CFG_BUFLEN(lcfg, 3) > 0) {
         cli->cl_max_mds_cookiesize = sizeof(struct llog_cookie);
 
         if (LUSTRE_CFG_BUFLEN(lcfg, 3) > 0) {
index 552a586..d7d37cf 100644 (file)
@@ -1730,11 +1730,25 @@ static int llu_lov_dir_setstripe(struct inode *ino, unsigned long arg)
         if (rc)
                 return(-EFAULT);
 
         if (rc)
                 return(-EFAULT);
 
-        if (lum.lmm_magic != LOV_USER_MAGIC)
+        switch (lum.lmm_magic) {
+        case LOV_USER_MAGIC_V1: {
+                if (lum.lmm_magic != cpu_to_le32(LOV_USER_MAGIC_V1))
+                        lustre_swab_lov_user_md_v1(&lum);
+                break;
+                }
+        case LOV_USER_MAGIC_V3: {
+                if (lum.lmm_magic != cpu_to_le32(LOV_USER_MAGIC_V3))
+                        lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)&lum);
+                break;
+                }
+        default: {
+                CDEBUG(D_IOCTL, "bad userland LOV MAGIC:"
+                                " %#08x != %#08x nor %#08x\n",
+                                lum.lmm_magic, LOV_USER_MAGIC_V1,
+                                LOV_USER_MAGIC_V3);
                 RETURN(-EINVAL);
                 RETURN(-EINVAL);
-
-        if (lum.lmm_magic != cpu_to_le32(LOV_USER_MAGIC))
-                lustre_swab_lov_user_md(&lum);
+        }
+        }
 
         /* swabbing is done in lov_setstripe() on server side */
         rc = md_setattr(sbi->ll_md_exp, &op_data, &lum,
 
         /* swabbing is done in lov_setstripe() on server side */
         rc = md_setattr(sbi->ll_md_exp, &op_data, &lum,
@@ -1968,7 +1982,9 @@ struct inode *llu_iget(struct filesys *fs, struct lustre_md *md)
 static int
 llu_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp)
 {
 static int
 llu_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp)
 {
-        struct lov_stripe_md lsm = { .lsm_magic = LOV_MAGIC };
+        /* even if default lov is LOV_MAGIC_V1 we use LOV_MAGIC_V3
+         * to be sure buffer are large enough */
+        struct lov_stripe_md lsm = { .lsm_magic = LOV_MAGIC_V3 };
         __u32 valsize = sizeof(struct lov_desc);
         int rc, easize, def_easize, cookiesize;
         struct lov_desc desc;
         __u32 valsize = sizeof(struct lov_desc);
         int rc, easize, def_easize, cookiesize;
         struct lov_desc desc;
index 55fa6a8..930bd26 100644 (file)
@@ -555,17 +555,34 @@ int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump,
         struct lustre_sb_info *lsi = s2lsi(inode->i_sb);
         struct obd_device *mgc = lsi->lsi_mgc;
         char *fsname = NULL, *param = NULL;
         struct lustre_sb_info *lsi = s2lsi(inode->i_sb);
         struct obd_device *mgc = lsi->lsi_mgc;
         char *fsname = NULL, *param = NULL;
+        int lum_size;
 
         /*
          * This is coming from userspace, so should be in
          * local endian.  But the MDS would like it in little
          * endian, so we swab it before we send it.
          */
 
         /*
          * This is coming from userspace, so should be in
          * local endian.  But the MDS would like it in little
          * endian, so we swab it before we send it.
          */
-        if (lump->lmm_magic != LOV_USER_MAGIC)
+        switch (lump->lmm_magic) {
+        case LOV_USER_MAGIC_V1: {
+                if (lump->lmm_magic != cpu_to_le32(LOV_USER_MAGIC_V1))
+                        lustre_swab_lov_user_md_v1(lump);
+                lum_size = sizeof(struct lov_user_md_v1);
+                break;
+                }
+        case LOV_USER_MAGIC_V3: {
+                if (lump->lmm_magic != cpu_to_le32(LOV_USER_MAGIC_V3))
+                        lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lump);
+                lum_size = sizeof(struct lov_user_md_v3);
+                break;
+                }
+        default: {
+                CDEBUG(D_IOCTL, "bad userland LOV MAGIC:"
+                                " %#08x != %#08x nor %#08x\n",
+                                lump->lmm_magic, LOV_USER_MAGIC_V1,
+                                LOV_USER_MAGIC_V3);
                 RETURN(-EINVAL);
                 RETURN(-EINVAL);
-
-        if (lump->lmm_magic != cpu_to_le32(LOV_USER_MAGIC))
-                lustre_swab_lov_user_md(lump);
+                }
+        }
 
         op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
                                      LUSTRE_OPC_ANY, NULL);
 
         op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
                                      LUSTRE_OPC_ANY, NULL);
@@ -573,7 +590,7 @@ int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump,
                 RETURN(PTR_ERR(op_data));
 
         /* swabbing is done in lov_setstripe() on server side */
                 RETURN(PTR_ERR(op_data));
 
         /* swabbing is done in lov_setstripe() on server side */
-        rc = md_setattr(sbi->ll_md_exp, op_data, lump, sizeof(*lump),
+        rc = md_setattr(sbi->ll_md_exp, op_data, lump, lum_size,
                         NULL, 0, &req, NULL);
         ll_finish_md_op_data(op_data);
         ptlrpc_req_finished(req);
                         NULL, 0, &req, NULL);
         ll_finish_md_op_data(op_data);
         ptlrpc_req_finished(req);
@@ -582,6 +599,9 @@ int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump,
                         CERROR("mdc_setattr fails: rc = %d\n", rc);
         }
 
                         CERROR("mdc_setattr fails: rc = %d\n", rc);
         }
 
+        /* In the following we use the fact that LOV_USER_MAGIC_V1 and
+         LOV_USER_MAGIC_V3 have the same initial fields so we do not
+         need the make the distiction between the 2 versions */
         if (set_default && mgc->u.cli.cl_mgc_mgsexp) {
                 OBD_ALLOC(param, MGS_PARAM_MAXLEN);
 
         if (set_default && mgc->u.cli.cl_mgc_mgsexp) {
                 OBD_ALLOC(param, MGS_PARAM_MAXLEN);
 
@@ -661,8 +681,19 @@ int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmmp,
          * little endian.  We convert it to host endian before
          * passing it to userspace.
          */
          * little endian.  We convert it to host endian before
          * passing it to userspace.
          */
-        if (lmm->lmm_magic == __swab32(LOV_MAGIC)) {
-                lustre_swab_lov_user_md((struct lov_user_md *)lmm);
+        /* We don't swab objects for directories */
+        switch (le32_to_cpu(lmm->lmm_magic)) {
+        case LOV_MAGIC_V1:
+                if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC))
+                        lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
+                break;
+        case LOV_MAGIC_V3:
+                if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC))
+                        lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
+                break;
+        default:
+                CERROR("unknown magic: %lX\n", (unsigned long)lmm->lmm_magic);
+                rc = -EPROTO;
         }
 out:
         *lmmp = lmm;
         }
 out:
         *lmmp = lmm;
@@ -737,21 +768,33 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file,
                 return rc;
         }
         case LL_IOC_LOV_SETSTRIPE: {
                 return rc;
         }
         case LL_IOC_LOV_SETSTRIPE: {
-                struct lov_user_md lum, *lump = (struct lov_user_md *)arg;
+                struct lov_user_md_v3 lumv3;
+                struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
+                struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
+                struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
+
                 int rc = 0;
                 int set_default = 0;
 
                 int rc = 0;
                 int set_default = 0;
 
-                LASSERT(sizeof(lum) == sizeof(*lump));
-                LASSERT(sizeof(lum.lmm_objects[0]) ==
-                        sizeof(lump->lmm_objects[0]));
-                rc = copy_from_user(&lum, lump, sizeof(lum));
+                LASSERT(sizeof(lumv3) == sizeof(*lumv3p));
+                LASSERT(sizeof(lumv3.lmm_objects[0]) ==
+                        sizeof(lumv3p->lmm_objects[0]));
+                /* first try with v1 which is smaller than v3 */
+                rc = copy_from_user(lumv1, lumv1p, sizeof(*lumv1));
                 if (rc)
                         RETURN(-EFAULT);
 
                 if (rc)
                         RETURN(-EFAULT);
 
+                if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
+                        rc = copy_from_user(&lumv3, lumv3p, sizeof(lumv3));
+                        if (rc)
+                                RETURN(-EFAULT);
+                }
+
                 if (inode->i_sb->s_root == file->f_dentry)
                         set_default = 1;
 
                 if (inode->i_sb->s_root == file->f_dentry)
                         set_default = 1;
 
-                rc = ll_dir_setstripe(inode, &lum, set_default);
+                /* in v1 and v3 cases lumv1 points to data */
+                rc = ll_dir_setstripe(inode, lumv1, set_default);
 
                 RETURN(rc);
         }
 
                 RETURN(rc);
         }
@@ -863,6 +906,29 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file,
                 if (rc)
                         GOTO(free_lmm, rc = -EFAULT);
 
                 if (rc)
                         GOTO(free_lmm, rc = -EFAULT);
 
+                switch (lmm->lmm_magic) {
+                case LOV_USER_MAGIC_V1:
+                        if (LOV_USER_MAGIC_V1 == cpu_to_le32(LOV_USER_MAGIC_V1))
+                                break;
+                        /* swab objects first so that stripes num will be sane */
+                        lustre_swab_lov_user_md_objects(
+                                ((struct lov_user_md_v1 *)lmm)->lmm_objects,
+                                ((struct lov_user_md_v1 *)lmm)->lmm_stripe_count);
+                        lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
+                        break;
+                case LOV_USER_MAGIC_V3:
+                        if (LOV_USER_MAGIC_V3 == cpu_to_le32(LOV_USER_MAGIC_V3))
+                                break;
+                        /* swab objects first so that stripes num will be sane */
+                        lustre_swab_lov_user_md_objects(
+                                ((struct lov_user_md_v3 *)lmm)->lmm_objects,
+                                ((struct lov_user_md_v3 *)lmm)->lmm_stripe_count);
+                        lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
+                        break;
+                default:
+                        GOTO(free_lmm, rc = -EINVAL);
+                }
+
                 rc = obd_unpackmd(sbi->ll_dt_exp, &lsm, lmm, lmmsize);
                 if (rc < 0)
                         GOTO(free_lmm, rc = -ENOMEM);
                 rc = obd_unpackmd(sbi->ll_dt_exp, &lsm, lmm, lmmsize);
                 if (rc < 0)
                         GOTO(free_lmm, rc = -ENOMEM);
index 438593b..96961fc 100644 (file)
@@ -2091,16 +2091,35 @@ int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
         lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
         LASSERT(lmm != NULL);
 
         lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
         LASSERT(lmm != NULL);
 
+        if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
+            (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3)) &&
+            (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_JOIN))) {
+                GOTO(out, rc = -EPROTO);
+        }
+
         /*
          * This is coming from the MDS, so is probably in
          * little endian.  We convert it to host endian before
          * passing it to userspace.
          */
         /*
          * This is coming from the MDS, so is probably in
          * little endian.  We convert it to host endian before
          * passing it to userspace.
          */
-        if (lmm->lmm_magic == __swab32(LOV_MAGIC)) {
-                lustre_swab_lov_user_md((struct lov_user_md *)lmm);
-                lustre_swab_lov_user_md_objects((struct lov_user_md *)lmm);
-        } else if (lmm->lmm_magic == __swab32(LOV_MAGIC_JOIN)) {
-                lustre_swab_lov_user_md_join((struct lov_user_md_join *)lmm);
+        if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
+                /* if function called for directory - we should
+                 * avoid swab not existent lsm objects */
+                if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
+                        lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
+                        if (S_ISREG(body->mode))
+                                lustre_swab_lov_user_md_objects(
+                                 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
+                                 ((struct lov_user_md_v1 *)lmm)->lmm_stripe_count);
+                } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
+                        lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
+                        if (S_ISREG(body->mode))
+                                lustre_swab_lov_user_md_objects(
+                                 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
+                                 ((struct lov_user_md_v3 *)lmm)->lmm_stripe_count);
+                } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_JOIN)) {
+                        lustre_swab_lov_user_md_join((struct lov_user_md_join *)lmm);
+                }
         }
 
         if (lmm->lmm_magic == LOV_MAGIC_JOIN) {
         }
 
         if (lmm->lmm_magic == LOV_MAGIC_JOIN) {
@@ -2193,23 +2212,34 @@ static int ll_lov_setea(struct inode *inode, struct file *file,
 static int ll_lov_setstripe(struct inode *inode, struct file *file,
                             unsigned long arg)
 {
 static int ll_lov_setstripe(struct inode *inode, struct file *file,
                             unsigned long arg)
 {
-        struct lov_user_md lum, *lump = (struct lov_user_md *)arg;
+        struct lov_user_md_v3 lumv3;
+        struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
+        struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
+        struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
+        int lum_size;
         int rc;
         int flags = FMODE_WRITE;
         ENTRY;
 
         int rc;
         int flags = FMODE_WRITE;
         ENTRY;
 
-        /* Bug 1152: copy properly when this is no longer true */
-        LASSERT(sizeof(lum) == sizeof(*lump));
-        LASSERT(sizeof(lum.lmm_objects[0]) == sizeof(lump->lmm_objects[0]));
-        rc = copy_from_user(&lum, lump, sizeof(lum));
+        /* first try with v1 which is smaller than v3 */
+        lum_size = sizeof(struct lov_user_md_v1);
+        rc = copy_from_user(lumv1, lumv1p, lum_size);
         if (rc)
                 RETURN(-EFAULT);
 
         if (rc)
                 RETURN(-EFAULT);
 
-        rc = ll_lov_setstripe_ea_info(inode, file, flags, &lum, sizeof(lum));
+        if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
+                lum_size = sizeof(struct lov_user_md_v3);
+                rc = copy_from_user(&lumv3, lumv3p, lum_size);
+                if (rc)
+                        RETURN(-EFAULT);
+        }
+
+        rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
         if (rc == 0) {
         if (rc == 0) {
-                 put_user(0, &lump->lmm_stripe_count);
+                 put_user(0, &lumv1p->lmm_stripe_count);
                  rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
                  rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
-                                    0, ll_i2info(inode)->lli_smd, lump);
+                                    0, ll_i2info(inode)->lli_smd,
+                                    (void *)arg);
         }
         RETURN(rc);
 }
         }
         RETURN(rc);
 }
index b42fb5a..257516f 100644 (file)
@@ -248,7 +248,7 @@ static struct dentry_operations ll_d_root_ops = {
  * calculate this (via a call into the LOV + OSCs) each time we make an RPC. */
 static int ll_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp)
 {
  * calculate this (via a call into the LOV + OSCs) each time we make an RPC. */
 static int ll_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp)
 {
-        struct lov_stripe_md lsm = { .lsm_magic = LOV_MAGIC };
+        struct lov_stripe_md lsm = { .lsm_magic = LOV_MAGIC_V3 };
         __u32 valsize = sizeof(struct lov_desc);
         int rc, easize, def_easize, cookiesize;
         struct lov_desc desc;
         __u32 valsize = sizeof(struct lov_desc);
         int rc, easize, def_easize, cookiesize;
         struct lov_desc desc;
@@ -316,7 +316,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt)
                                   OBD_CONNECT_JOIN     | OBD_CONNECT_ATTRFID  |
                                   OBD_CONNECT_VERSION  | OBD_CONNECT_MDS_CAPA |
                                   OBD_CONNECT_OSS_CAPA | OBD_CONNECT_CANCELSET|
                                   OBD_CONNECT_JOIN     | OBD_CONNECT_ATTRFID  |
                                   OBD_CONNECT_VERSION  | OBD_CONNECT_MDS_CAPA |
                                   OBD_CONNECT_OSS_CAPA | OBD_CONNECT_CANCELSET|
-                                  OBD_CONNECT_FID      | OBD_CONNECT_AT;
+                                  OBD_CONNECT_FID      | OBD_CONNECT_AT |
+                                  OBD_CONNECT_LOV_V3;
 
 #ifdef HAVE_LRU_RESIZE_SUPPORT
         if (sbi->ll_flags & LL_SBI_LRU_RESIZE)
 
 #ifdef HAVE_LRU_RESIZE_SUPPORT
         if (sbi->ll_flags & LL_SBI_LRU_RESIZE)
@@ -1802,7 +1803,8 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
         LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0));
         if (lsm != NULL) {
                 if (lli->lli_smd == NULL) {
         LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0));
         if (lsm != NULL) {
                 if (lli->lli_smd == NULL) {
-                        if (lsm->lsm_magic != LOV_MAGIC &&
+                        if (lsm->lsm_magic != LOV_MAGIC_V1 &&
+                            lsm->lsm_magic != LOV_MAGIC_V3 &&
                             lsm->lsm_magic != LOV_MAGIC_JOIN) {
                                 dump_lsm(D_ERROR, lsm);
                                 LBUG();
                             lsm->lsm_magic != LOV_MAGIC_JOIN) {
                                 dump_lsm(D_ERROR, lsm);
                                 LBUG();
index f714192..0f223f8 100644 (file)
@@ -1,4 +1,4 @@
 MODULES := lov
 MODULES := lov
-lov-objs := lov_log.o lov_obd.o lov_pack.o lproc_lov.o lov_offset.o lov_merge.o lov_request.o lov_qos.o lov_ea.o
+lov-objs := lov_log.o lov_obd.o lov_pack.o lproc_lov.o lov_offset.o lov_merge.o lov_request.o lov_qos.o lov_ea.o lov_pool.o
 
 @INCLUDE_RULES@
 
 @INCLUDE_RULES@
index 8c3af02..c65e095 100644 (file)
@@ -36,7 +36,7 @@
 
 if LIBLUSTRE
 noinst_LIBRARIES = liblov.a
 
 if LIBLUSTRE
 noinst_LIBRARIES = liblov.a
-liblov_a_SOURCES = lov_log.c lov_obd.c lov_pack.c lov_request.c lov_offset.c lov_qos.c lov_merge.c lov_ea.c lov_internal.h
+liblov_a_SOURCES = lov_log.c lov_pool.c lov_obd.c lov_pack.c lov_request.c lov_offset.c lov_qos.c lov_merge.c lov_ea.c lov_internal.h
 liblov_a_CPPFLAGS = $(LLCPPFLAGS)
 liblov_a_CFLAGS = $(LLCFLAGS)
 endif
 liblov_a_CPPFLAGS = $(LLCPPFLAGS)
 liblov_a_CFLAGS = $(LLCFLAGS)
 endif
@@ -51,6 +51,7 @@ macos_PROGRAMS := lov
 
 lov_SOURCES :=          \
         lov_log.c       \
 
 lov_SOURCES :=          \
         lov_log.c       \
+        lov_pool.c     \
         lov_obd.c       \
         lov_pack.c      \
         lov_request.c   \
         lov_obd.c       \
         lov_pack.c      \
         lov_request.c   \
index 8167469..1ea9d70 100755 (executable)
@@ -68,19 +68,19 @@ static int lsm_lmm_verify_common(struct lov_mds_md *lmm, int lmm_bytes,
 
         if (stripe_count == 0 || stripe_count > LOV_V1_INSANE_STRIPE_COUNT) {
                 CERROR("bad stripe count %d\n", stripe_count);
 
         if (stripe_count == 0 || stripe_count > LOV_V1_INSANE_STRIPE_COUNT) {
                 CERROR("bad stripe count %d\n", stripe_count);
-                lov_dump_lmm_v1(D_WARNING, lmm);
+                lov_dump_lmm(D_WARNING, lmm);
                 return -EINVAL;
         }
                 return -EINVAL;
         }
-        
+
         if (lmm->lmm_object_id == 0) {
                 CERROR("zero object id\n");
         if (lmm->lmm_object_id == 0) {
                 CERROR("zero object id\n");
-                lov_dump_lmm_v1(D_WARNING, lmm);
+                lov_dump_lmm(D_WARNING, lmm);
                 return -EINVAL;
         }
                 return -EINVAL;
         }
-        
+
         if (lmm->lmm_pattern != cpu_to_le32(LOV_PATTERN_RAID0)) {
                 CERROR("bad striping pattern\n");
         if (lmm->lmm_pattern != cpu_to_le32(LOV_PATTERN_RAID0)) {
                 CERROR("bad striping pattern\n");
-                lov_dump_lmm_v1(D_WARNING, lmm);
+                lov_dump_lmm(D_WARNING, lmm);
                 return -EINVAL;
         }
 
                 return -EINVAL;
         }
 
@@ -90,7 +90,7 @@ static int lsm_lmm_verify_common(struct lov_mds_md *lmm, int lmm_bytes,
              0xffffffff)) {
                 CERROR("bad stripe size %u\n",
                        le32_to_cpu(lmm->lmm_stripe_size));
              0xffffffff)) {
                 CERROR("bad stripe size %u\n",
                        le32_to_cpu(lmm->lmm_stripe_size));
-                lov_dump_lmm_v1(D_WARNING, lmm);
+                lov_dump_lmm(D_WARNING, lmm);
                 return -EINVAL;
         }
         return 0;
                 return -EINVAL;
         }
         return 0;
@@ -118,6 +118,7 @@ struct lov_stripe_md *lsm_alloc_plain(int stripe_count, int *size)
                 lsm->lsm_oinfo[i] = loi;
         }
         lsm->lsm_stripe_count = stripe_count;
                 lsm->lsm_oinfo[i] = loi;
         }
         lsm->lsm_stripe_count = stripe_count;
+        lsm->lsm_pool_name[0] = '\0';
         return lsm;
 
 err:
         return lsm;
 
 err:
@@ -142,10 +143,15 @@ void lsm_free_plain(struct lov_stripe_md *lsm)
 static void lsm_unpackmd_common(struct lov_stripe_md *lsm,
                                 struct lov_mds_md *lmm)
 {
 static void lsm_unpackmd_common(struct lov_stripe_md *lsm,
                                 struct lov_mds_md *lmm)
 {
+        /*
+         * This supposes lov_mds_md_v1/v3 first fields are
+         * are the same
+         */
         lsm->lsm_object_id = le64_to_cpu(lmm->lmm_object_id);
         lsm->lsm_object_gr = le64_to_cpu(lmm->lmm_object_gr);
         lsm->lsm_stripe_size = le32_to_cpu(lmm->lmm_stripe_size);
         lsm->lsm_pattern = le32_to_cpu(lmm->lmm_pattern);
         lsm->lsm_object_id = le64_to_cpu(lmm->lmm_object_id);
         lsm->lsm_object_gr = le64_to_cpu(lmm->lmm_object_gr);
         lsm->lsm_stripe_size = le32_to_cpu(lmm->lmm_stripe_size);
         lsm->lsm_pattern = le32_to_cpu(lmm->lmm_pattern);
+        lsm->lsm_pool_name[0] = '\0';
 }
 
 static void
 }
 
 static void
@@ -197,20 +203,20 @@ static int lsm_destroy_plain(struct lov_stripe_md *lsm, struct obdo *oa,
         return 0;
 }
 
         return 0;
 }
 
-static int lsm_lmm_verify_plain(struct lov_mds_md *lmm, int lmm_bytes,
+static int lsm_lmm_verify_v1(struct lov_mds_md_v1 *lmm, int lmm_bytes,
                              int *stripe_count)
 {
         if (lmm_bytes < sizeof(*lmm)) {
                              int *stripe_count)
 {
         if (lmm_bytes < sizeof(*lmm)) {
-                CERROR("lov_mds_md too small: %d, need at least %d\n",
+                CERROR("lov_mds_md_v1 too small: %d, need at least %d\n",
                        lmm_bytes, (int)sizeof(*lmm));
                 return -EINVAL;
         }
 
         *stripe_count = le32_to_cpu(lmm->lmm_stripe_count);
 
                        lmm_bytes, (int)sizeof(*lmm));
                 return -EINVAL;
         }
 
         *stripe_count = le32_to_cpu(lmm->lmm_stripe_count);
 
-        if (lmm_bytes < lov_mds_md_v1_size(*stripe_count)) {
-                CERROR("LOV EA too small: %d, need %d\n",
-                       lmm_bytes, lov_mds_md_v1_size(*stripe_count));
+        if (lmm_bytes < lov_mds_md_size(*stripe_count, LOV_MAGIC_V1)) {
+                CERROR("LOV EA V1 too small: %d, need %d\n",
+                       lmm_bytes, lov_mds_md_size(*stripe_count, LOV_MAGIC_V1));
                 lov_dump_lmm_v1(D_WARNING, lmm);
                 return -EINVAL;
         }
                 lov_dump_lmm_v1(D_WARNING, lmm);
                 return -EINVAL;
         }
@@ -218,7 +224,7 @@ static int lsm_lmm_verify_plain(struct lov_mds_md *lmm, int lmm_bytes,
         return lsm_lmm_verify_common(lmm, lmm_bytes, *stripe_count);
 }
 
         return lsm_lmm_verify_common(lmm, lmm_bytes, *stripe_count);
 }
 
-int lsm_unpackmd_plain(struct lov_obd *lov, struct lov_stripe_md *lsm,
+int lsm_unpackmd_v1(struct lov_obd *lov, struct lov_stripe_md *lsm,
                     struct lov_mds_md_v1 *lmm)
 {
         struct lov_oinfo *loi;
                     struct lov_mds_md_v1 *lmm)
 {
         struct lov_oinfo *loi;
@@ -249,7 +255,7 @@ int lsm_unpackmd_plain(struct lov_obd *lov, struct lov_stripe_md *lsm,
         return 0;
 }
 
         return 0;
 }
 
-struct lsm_operations lsm_plain_ops = {
+struct lsm_operations lsm_v1_ops = {
         .lsm_free            = lsm_free_plain,
         .lsm_destroy         = lsm_destroy_plain,
         .lsm_stripe_by_index    = lsm_stripe_by_index_plain,
         .lsm_free            = lsm_free_plain,
         .lsm_destroy         = lsm_destroy_plain,
         .lsm_stripe_by_index    = lsm_stripe_by_index_plain,
@@ -258,8 +264,8 @@ struct lsm_operations lsm_plain_ops = {
         .lsm_stripe_offset_by_index  = lsm_stripe_offset_by_index_plain,
         .lsm_stripe_offset_by_offset = lsm_stripe_offset_by_offset_plain,
         .lsm_stripe_index_by_offset  = lsm_stripe_index_by_offset_plain,
         .lsm_stripe_offset_by_index  = lsm_stripe_offset_by_index_plain,
         .lsm_stripe_offset_by_offset = lsm_stripe_offset_by_offset_plain,
         .lsm_stripe_index_by_offset  = lsm_stripe_index_by_offset_plain,
-        .lsm_lmm_verify         = lsm_lmm_verify_plain,
-        .lsm_unpackmd           = lsm_unpackmd_plain,
+        .lsm_lmm_verify         = lsm_lmm_verify_v1,
+        .lsm_unpackmd           = lsm_unpackmd_v1,
 };
 
 struct lov_extent *lovea_off2le(struct lov_stripe_md *lsm, obd_off lov_off)
 };
 
 struct lov_extent *lovea_off2le(struct lov_stripe_md *lsm, obd_off lov_off)
@@ -625,3 +631,79 @@ struct lsm_operations lsm_join_ops = {
         .lsm_lmm_verify         = lsm_lmm_verify_join,
         .lsm_unpackmd           = lsm_unpackmd_join,
 };
         .lsm_lmm_verify         = lsm_lmm_verify_join,
         .lsm_unpackmd           = lsm_unpackmd_join,
 };
+
+
+static int lsm_lmm_verify_v3(struct lov_mds_md *lmmv1, int lmm_bytes,
+                             int *stripe_count)
+{
+        struct lov_mds_md_v3 *lmm;
+
+        lmm = (struct lov_mds_md_v3 *)lmmv1;
+
+        if (lmm_bytes < sizeof(*lmm)) {
+                CERROR("lov_mds_md_v3 too small: %d, need at least %d\n",
+                       lmm_bytes, (int)sizeof(*lmm));
+                return -EINVAL;
+        }
+
+        *stripe_count = le32_to_cpu(lmm->lmm_stripe_count);
+
+        if (lmm_bytes < lov_mds_md_size(*stripe_count, LOV_MAGIC_V3)) {
+                CERROR("LOV EA V3 too small: %d, need %d\n",
+                       lmm_bytes, lov_mds_md_size(*stripe_count, LOV_MAGIC_V3));
+                lov_dump_lmm_v3(D_WARNING, lmm);
+                return -EINVAL;
+        }
+
+        return lsm_lmm_verify_common((struct lov_mds_md_v1 *)lmm, lmm_bytes,
+                                     *stripe_count);
+}
+
+int lsm_unpackmd_v3(struct lov_obd *lov, struct lov_stripe_md *lsm,
+                    struct lov_mds_md *lmmv1)
+{
+        struct lov_mds_md_v3 *lmm;
+        struct lov_oinfo *loi;
+        int i;
+
+        lmm = (struct lov_mds_md_v3 *)lmmv1;
+
+        lsm_unpackmd_common(lsm, (struct lov_mds_md_v1 *)lmm);
+        strncpy(lsm->lsm_pool_name, lmm->lmm_pool_name, MAXPOOLNAME);
+
+        for (i = 0; i < lsm->lsm_stripe_count; i++) {
+                /* XXX LOV STACKING call down to osc_unpackmd() */
+                loi = lsm->lsm_oinfo[i];
+                loi->loi_id = le64_to_cpu(lmm->lmm_objects[i].l_object_id);
+                loi->loi_gr = le64_to_cpu(lmm->lmm_objects[i].l_object_gr);
+                loi->loi_ost_idx = le32_to_cpu(lmm->lmm_objects[i].l_ost_idx);
+                loi->loi_ost_gen = le32_to_cpu(lmm->lmm_objects[i].l_ost_gen);
+                if (loi->loi_ost_idx >= lov->desc.ld_tgt_count) {
+                        CERROR("OST index %d more than OST count %d\n",
+                               loi->loi_ost_idx, lov->desc.ld_tgt_count);
+                        lov_dump_lmm_v3(D_WARNING, lmm);
+                        return -EINVAL;
+                }
+                if (!lov->lov_tgts[loi->loi_ost_idx]) {
+                        CERROR("OST index %d missing\n", loi->loi_ost_idx);
+                        lov_dump_lmm_v3(D_WARNING, lmm);
+                        return -EINVAL;
+                }
+        }
+
+        return 0;
+}
+
+struct lsm_operations lsm_v3_ops = {
+        .lsm_free            = lsm_free_plain,
+        .lsm_destroy         = lsm_destroy_plain,
+        .lsm_stripe_by_index    = lsm_stripe_by_index_plain,
+        .lsm_stripe_by_offset   = lsm_stripe_by_offset_plain,
+        .lsm_revalidate         = lsm_revalidate_plain,
+        .lsm_stripe_offset_by_index  = lsm_stripe_offset_by_index_plain,
+        .lsm_stripe_offset_by_offset = lsm_stripe_offset_by_offset_plain,
+        .lsm_stripe_index_by_offset  = lsm_stripe_index_by_offset_plain,
+        .lsm_lmm_verify         = lsm_lmm_verify_v3,
+        .lsm_unpackmd           = lsm_unpackmd_v3,
+};
+
index 77154c7..9a1d66d 100644 (file)
@@ -285,6 +285,9 @@ void lov_free_memmd(struct lov_stripe_md **lsmp);
 
 void lov_dump_lmm_v1(int level, struct lov_mds_md_v1 *lmm);
 void lov_dump_lmm_join(int level, struct lov_mds_md_join *lmmj);
 
 void lov_dump_lmm_v1(int level, struct lov_mds_md_v1 *lmm);
 void lov_dump_lmm_join(int level, struct lov_mds_md_join *lmmj);
+void lov_dump_lmm_v3(int level, struct lov_mds_md_v3 *lmm);
+void lov_dump_lmm(int level, void *lmm);
+
 /* lov_ea.c */
 int lov_unpackmd_join(struct lov_obd *lov, struct lov_stripe_md *lsm,
                       struct lov_mds_md *lmm);
 /* lov_ea.c */
 int lov_unpackmd_join(struct lov_obd *lov, struct lov_stripe_md *lsm,
                       struct lov_mds_md *lmm);
@@ -306,4 +309,23 @@ static inline void lprocfs_lov_init_vars(struct lprocfs_static_vars *lvars)
 }
 #endif
 
 }
 #endif
 
+/* pools */
+extern lustre_hash_ops_t pool_hash_operations;
+/* ost_pool methods */
+int lov_ost_pool_init(struct ost_pool *op, unsigned int count);
+int lov_ost_pool_extend(struct ost_pool *op, unsigned int max_count);
+int lov_ost_pool_add(struct ost_pool *op, __u32 idx, unsigned int max_count);
+int lov_ost_pool_remove(struct ost_pool *op, __u32 idx);
+int lov_ost_pool_free(struct ost_pool *op);
+
+/* high level pool methods */
+int lov_pool_new(struct obd_device *obd, char *poolname);
+int lov_pool_del(struct obd_device *obd, char *poolname);
+int lov_pool_add(struct obd_device *obd, char *poolname, char *ostname);
+int lov_pool_remove(struct obd_device *obd, char *poolname, char *ostname);
+void lov_dump_pool(int level, struct pool_desc *pool);
+struct pool_desc *lov_find_pool(struct lov_obd *lov, char *poolname);
+int lov_check_index_in_pool(__u32 idx, struct pool_desc *pool);
+
+
 #endif
 #endif
index 739fee3..2456372 100644 (file)
@@ -667,7 +667,6 @@ static int lov_add_target(struct obd_device *obd, struct obd_uuid *uuidp,
                        lov->lov_tgts, lov->lov_tgt_size);
         }
 
                        lov->lov_tgts, lov->lov_tgt_size);
         }
 
-
         OBD_ALLOC_PTR(tgt);
         if (!tgt) {
                 mutex_up(&lov->lov_lock);
         OBD_ALLOC_PTR(tgt);
         if (!tgt) {
                 mutex_up(&lov->lov_lock);
@@ -683,6 +682,11 @@ static int lov_add_target(struct obd_device *obd, struct obd_uuid *uuidp,
         lov->lov_tgts[index] = tgt;
         if (index >= lov->desc.ld_tgt_count)
                 lov->desc.ld_tgt_count = index + 1;
         lov->lov_tgts[index] = tgt;
         if (index >= lov->desc.ld_tgt_count)
                 lov->desc.ld_tgt_count = index + 1;
+
+        rc = lov_ost_pool_add(&lov->lov_packed, index, lov->lov_tgt_size);
+        if (rc)
+                RETURN(rc);
+
         mutex_up(&lov->lov_lock);
 
         CDEBUG(D_CONFIG, "idx=%d ltd_gen=%d ld_tgt_count=%d\n",
         mutex_up(&lov->lov_lock);
 
         CDEBUG(D_CONFIG, "idx=%d ltd_gen=%d ld_tgt_count=%d\n",
@@ -781,6 +785,7 @@ static void __lov_del_obd(struct obd_device *obd, __u32 index)
          * maximum tgt index for computing the mds_max_easize. So we can't
          * shrink it. */
 
          * maximum tgt index for computing the mds_max_easize. So we can't
          * shrink it. */
 
+        lov_ost_pool_remove(&lov->lov_packed, index);
         lov->lov_tgts[index] = NULL;
         OBD_FREE_PTR(tgt);
 
         lov->lov_tgts[index] = NULL;
         OBD_FREE_PTR(tgt);
 
@@ -841,6 +846,7 @@ static int lov_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
         struct lov_desc *desc;
         struct lov_obd *lov = &obd->u.lov;
         int count;
         struct lov_desc *desc;
         struct lov_obd *lov = &obd->u.lov;
         int count;
+        int rc;
         ENTRY;
 
         if (LUSTRE_CFG_BUFLEN(lcfg, 1) < 1) {
         ENTRY;
 
         if (LUSTRE_CFG_BUFLEN(lcfg, 1) < 1) {
@@ -884,16 +890,27 @@ static int lov_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
         desc->ld_active_tgt_count = 0;
         lov->desc = *desc;
         lov->lov_tgt_size = 0;
         desc->ld_active_tgt_count = 0;
         lov->desc = *desc;
         lov->lov_tgt_size = 0;
+        rc = lov_ost_pool_init(&lov->lov_packed, 0);
+        if (rc)
+                RETURN(rc);
+
         sema_init(&lov->lov_lock, 1);
         atomic_set(&lov->lov_refcount, 0);
         CFS_INIT_LIST_HEAD(&lov->lov_qos.lq_oss_list);
         init_rwsem(&lov->lov_qos.lq_rw_sem);
         lov->lov_qos.lq_dirty = 1;
         sema_init(&lov->lov_lock, 1);
         atomic_set(&lov->lov_refcount, 0);
         CFS_INIT_LIST_HEAD(&lov->lov_qos.lq_oss_list);
         init_rwsem(&lov->lov_qos.lq_rw_sem);
         lov->lov_qos.lq_dirty = 1;
-        lov->lov_qos.lq_dirty_rr = 1;
+        lov->lov_qos.lq_rr.lqr_dirty = 1;
         lov->lov_qos.lq_reset = 1;
         /* Default priority is toward free space balance */
         lov->lov_qos.lq_prio_free = 232;
 
         lov->lov_qos.lq_reset = 1;
         /* Default priority is toward free space balance */
         lov->lov_qos.lq_prio_free = 232;
 
+        lov->lov_pools_hash_body = lustre_hash_init("POOLS", 128, 128,
+                                                    &pool_hash_operations,
+                                                    0);
+
+        CFS_INIT_LIST_HEAD(&lov->lov_pool_list);
+        lov->lov_pool_count = 0;
+
         lprocfs_lov_init_vars(&lvars);
         lprocfs_obd_setup(obd, lvars.obd_vars);
 #ifdef LPROCFS
         lprocfs_lov_init_vars(&lvars);
         lprocfs_obd_setup(obd, lvars.obd_vars);
 #ifdef LPROCFS
@@ -906,6 +923,9 @@ static int lov_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
                         CWARN("Error adding the target_obd file\n");
         }
 #endif
                         CWARN("Error adding the target_obd file\n");
         }
 #endif
+        lov->lov_pool_proc_entry = lprocfs_register("pools",
+                                                    obd->obd_proc_entry,
+                                                    NULL, NULL);
 
         RETURN(0);
 }
 
         RETURN(0);
 }
@@ -939,8 +959,23 @@ static int lov_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
 static int lov_cleanup(struct obd_device *obd)
 {
         struct lov_obd *lov = &obd->u.lov;
 static int lov_cleanup(struct obd_device *obd)
 {
         struct lov_obd *lov = &obd->u.lov;
+        struct list_head *pos, *tmp;
+        struct pool_desc *pool;
+
+        list_for_each_safe(pos, tmp, &lov->lov_pool_list) {
+                pool = list_entry(pos, struct pool_desc, pool_list);
+                list_del(&pool->pool_list);
+                lustre_hash_del_key(lov->lov_pools_hash_body, pool->pool_name);
+                lov_ost_pool_free(&(pool->pool_rr.lqr_pool));
+                lov_ost_pool_free(&(pool->pool_obds));
+                OBD_FREE(pool, sizeof(*pool));
+        }
+        lustre_hash_exit(lov->lov_pools_hash_body);
 
         lprocfs_obd_cleanup(obd);
 
         lprocfs_obd_cleanup(obd);
+
+        lov_ost_pool_free(&lov->lov_packed);
+
         if (lov->lov_tgts) {
                 int i;
                 for (i = 0; i < lov->desc.ld_tgt_count; i++) {
         if (lov->lov_tgts) {
                 int i;
                 for (i = 0; i < lov->desc.ld_tgt_count; i++) {
@@ -964,8 +999,7 @@ static int lov_cleanup(struct obd_device *obd)
                 lov->lov_tgt_size = 0;
         }
 
                 lov->lov_tgt_size = 0;
         }
 
-        if (lov->lov_qos.lq_rr_size)
-                OBD_FREE(lov->lov_qos.lq_rr_array, lov->lov_qos.lq_rr_size);
+        lov_ost_pool_free(&(lov->lov_qos.lq_rr.lqr_pool));
 
         RETURN(0);
 }
 
         RETURN(0);
 }
@@ -1015,6 +1049,12 @@ static int lov_process_config(struct obd_device *obd, obd_count len, void *buf)
                                               lcfg, obd);
                 GOTO(out, rc);
         }
                                               lcfg, obd);
                 GOTO(out, rc);
         }
+        case LCFG_POOL_NEW:
+        case LCFG_POOL_ADD:
+        case LCFG_POOL_DEL:
+        case LCFG_POOL_REM:
+                GOTO(out, rc);
+
         default: {
                 CERROR("Unknown command: %d\n", lcfg->lcfg_command);
                 GOTO(out, rc = -EINVAL);
         default: {
                 CERROR("Unknown command: %d\n", lcfg->lcfg_command);
                 GOTO(out, rc = -EINVAL);
@@ -1193,7 +1233,8 @@ static int lov_create(struct obd_export *exp, struct obdo *src_oa,
 #define ASSERT_LSM_MAGIC(lsmp)                                                  \
 do {                                                                            \
         LASSERT((lsmp) != NULL);                                                \
 #define ASSERT_LSM_MAGIC(lsmp)                                                  \
 do {                                                                            \
         LASSERT((lsmp) != NULL);                                                \
-        LASSERTF(((lsmp)->lsm_magic == LOV_MAGIC ||                             \
+        LASSERTF(((lsmp)->lsm_magic == LOV_MAGIC_V1 ||                          \
+                 (lsmp)->lsm_magic == LOV_MAGIC_V3 ||                           \
                  (lsmp)->lsm_magic == LOV_MAGIC_JOIN), "%p->lsm_magic=%x\n",    \
                  (lsmp), (lsmp)->lsm_magic);                                    \
 } while (0)
                  (lsmp)->lsm_magic == LOV_MAGIC_JOIN), "%p->lsm_magic=%x\n",    \
                  (lsmp), (lsmp)->lsm_magic);                                    \
 } while (0)
@@ -3332,6 +3373,10 @@ struct obd_ops lov_obd_ops = {
         .o_unregister_page_removal_cb = lov_unregister_page_removal_cb,
         .o_register_lock_cancel_cb = lov_register_lock_cancel_cb,
         .o_unregister_lock_cancel_cb = lov_unregister_lock_cancel_cb,
         .o_unregister_page_removal_cb = lov_unregister_page_removal_cb,
         .o_register_lock_cancel_cb = lov_register_lock_cancel_cb,
         .o_unregister_lock_cancel_cb = lov_unregister_lock_cancel_cb,
+        .o_pool_new            = lov_pool_new,
+        .o_pool_rem            = lov_pool_remove,
+        .o_pool_add            = lov_pool_add,
+        .o_pool_del            = lov_pool_del,
 };
 
 static quota_interface_t *quota_interface;
 };
 
 static quota_interface_t *quota_interface;
index 8b2003d..b02c980 100644 (file)
@@ -94,6 +94,52 @@ void lov_dump_lmm_join(int level, struct lov_mds_md_join *lmmj)
                le32_to_cpu(lmmj->lmmj_extent_count));
 }
 
                le32_to_cpu(lmmj->lmmj_extent_count));
 }
 
+void lov_dump_lmm_v3(int level, struct lov_mds_md_v3 *lmm)
+{
+        struct lov_ost_data_v1 *lod;
+        int i;
+
+        CDEBUG(level, "objid "LPX64", magic 0x%08x, pattern %#x\n",
+               le64_to_cpu(lmm->lmm_object_id), le32_to_cpu(lmm->lmm_magic),
+               le32_to_cpu(lmm->lmm_pattern));
+        CDEBUG(level,"stripe_size %u, stripe_count %u\n",
+               le32_to_cpu(lmm->lmm_stripe_size),
+               le32_to_cpu(lmm->lmm_stripe_count));
+        CDEBUG(level,"pool_name "POOLNAMEF"\n", lmm->lmm_pool_name);
+
+        if (le32_to_cpu(lmm->lmm_stripe_count) <= LOV_V1_INSANE_STRIPE_COUNT) {
+                for (i = 0, lod = lmm->lmm_objects;
+                     i < (int)le32_to_cpu(lmm->lmm_stripe_count); i++, lod++)
+                         CDEBUG(level,
+                                "stripe %u idx %u subobj "LPX64"/"LPX64"\n",
+                                i, le32_to_cpu(lod->l_ost_idx),
+                                le64_to_cpu(lod->l_object_gr),
+                                le64_to_cpu(lod->l_object_id));
+        } else {
+                CDEBUG(level, "bad stripe_count %u > max_stripe_count %u\n",
+                       le32_to_cpu(lmm->lmm_stripe_count),
+                       LOV_V1_INSANE_STRIPE_COUNT);
+        }
+}
+
+void lov_dump_lmm(int level, void *lmm)
+{
+        int magic;
+
+        magic = ((struct lov_mds_md_v1 *)(lmm))->lmm_magic;
+        switch (magic) {
+        case LOV_MAGIC_V1:
+                return lov_dump_lmm_v1(level, (struct lov_mds_md_v1 *)(lmm));
+        case LOV_MAGIC_JOIN:
+                return lov_dump_lmm_join(level, (struct lov_mds_md_join *)(lmm));
+        case LOV_MAGIC_V3:
+                return lov_dump_lmm_v3(level, (struct lov_mds_md_v3 *)(lmm));
+        default:
+                CERROR("Cannot recognize lmm_magic %x", magic);
+        }
+        return;
+}
+
 #define LMM_ASSERT(test)                                                \
 do {                                                                    \
         if (!(test)) lov_dump_lmm(D_ERROR, lmm);                        \
 #define LMM_ASSERT(test)                                                \
 do {                                                                    \
         if (!(test)) lov_dump_lmm(D_ERROR, lmm);                        \
@@ -113,37 +159,51 @@ int lov_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
 {
         struct obd_device *obd = class_exp2obd(exp);
         struct lov_obd *lov = &obd->u.lov;
 {
         struct obd_device *obd = class_exp2obd(exp);
         struct lov_obd *lov = &obd->u.lov;
-        struct lov_mds_md *lmm;
+        struct lov_mds_md_v1 *lmmv1;
+        struct lov_mds_md_v3 *lmmv3;
         int stripe_count = lov->desc.ld_tgt_count;
         int stripe_count = lov->desc.ld_tgt_count;
-        int lmm_size;
+        struct lov_ost_data_v1 *lmm_objects;
+        int lmm_size, lmm_magic;
         int i;
         ENTRY;
 
         if (lsm) {
         int i;
         ENTRY;
 
         if (lsm) {
-                if (lsm->lsm_magic != LOV_MAGIC) {
-                        CERROR("bad mem LOV MAGIC: 0x%08X != 0x%08X\n",
-                               lsm->lsm_magic, LOV_MAGIC);
-                        RETURN(-EINVAL);
-                }
+                lmm_magic = lsm->lsm_magic;
+
                 /* If we are just sizing the EA, limit the stripe count
                  * to the actual number of OSTs in this filesystem. */
                 if (!lmmp) {
                 /* If we are just sizing the EA, limit the stripe count
                  * to the actual number of OSTs in this filesystem. */
                 if (!lmmp) {
-                        stripe_count = lov_get_stripecnt(lov, lsm->lsm_stripe_count);
+                        stripe_count = lov_get_stripecnt(lov,
+                                                         lsm->lsm_stripe_count);
                         lsm->lsm_stripe_count = stripe_count;
                 } else {
                         stripe_count = lsm->lsm_stripe_count;
                 }
                         lsm->lsm_stripe_count = stripe_count;
                 } else {
                         stripe_count = lsm->lsm_stripe_count;
                 }
+        } else if (lmmp && *lmmp) {
+                lmm_magic = le32_to_cpu((*lmmp)->lmm_magic);
+        } else {
+                /* lsm == NULL and lmmp == NULL */
+                lmm_magic = LOV_MAGIC;
+        }
+
+        if ((lmm_magic != LOV_MAGIC_V1) &&
+            (lmm_magic != LOV_MAGIC_V3)) {
+                CERROR("bad mem LOV MAGIC: 0x%08X != 0x%08X nor 0x%08X\n",
+                        lmm_magic, LOV_MAGIC_V1, LOV_MAGIC_V3);
+                RETURN(-EINVAL);
+
         }
 
         /* XXX LOV STACKING call into osc for sizes */
         }
 
         /* XXX LOV STACKING call into osc for sizes */
-        lmm_size = lov_mds_md_size(stripe_count);
+        lmm_size = lov_mds_md_size(stripe_count, lmm_magic);
 
         if (!lmmp)
                 RETURN(lmm_size);
 
         if (*lmmp && !lsm) {
                 stripe_count = le32_to_cpu((*lmmp)->lmm_stripe_count);
 
         if (!lmmp)
                 RETURN(lmm_size);
 
         if (*lmmp && !lsm) {
                 stripe_count = le32_to_cpu((*lmmp)->lmm_stripe_count);
-                OBD_FREE(*lmmp, lov_mds_md_size(stripe_count));
+                lmm_size = lov_mds_md_size(stripe_count, lmm_magic);
+                OBD_FREE(*lmmp, lmm_size);
                 *lmmp = NULL;
                 RETURN(0);
         }
                 *lmmp = NULL;
                 RETURN(0);
         }
@@ -154,28 +214,44 @@ int lov_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
                         RETURN(-ENOMEM);
         }
 
                         RETURN(-ENOMEM);
         }
 
-        lmm = *lmmp;
-        lmm->lmm_magic = cpu_to_le32(LOV_MAGIC); /* only write new format */
+        CDEBUG(D_INFO, "lov_packmd: LOV_MAGIC 0x%08X, lmm_size = %d \n",
+               lmm_magic, lmm_size);
+
+        lmmv1 = *lmmp;
+        lmmv3 = (struct lov_mds_md_v3 *)*lmmp;
+        if (lmm_magic == LOV_MAGIC_V3)
+                lmmv3->lmm_magic = cpu_to_le32(LOV_MAGIC_V3);
+        else
+                lmmv1->lmm_magic = cpu_to_le32(LOV_MAGIC_V1);
 
         if (!lsm)
                 RETURN(lmm_size);
 
 
         if (!lsm)
                 RETURN(lmm_size);
 
-        lmm->lmm_object_id = cpu_to_le64(lsm->lsm_object_id);
-        lmm->lmm_object_gr = cpu_to_le64(lsm->lsm_object_gr);
-        lmm->lmm_stripe_size = cpu_to_le32(lsm->lsm_stripe_size);
-        lmm->lmm_stripe_count = cpu_to_le32(stripe_count);
-        lmm->lmm_pattern = cpu_to_le32(lsm->lsm_pattern);
+        /* lmmv1 and lmmv3 point to the same struct and have the
+         * same first fields
+         */
+        lmmv1->lmm_object_id = cpu_to_le64(lsm->lsm_object_id);
+        lmmv1->lmm_object_gr = cpu_to_le64(lsm->lsm_object_gr);
+        lmmv1->lmm_stripe_size = cpu_to_le32(lsm->lsm_stripe_size);
+        lmmv1->lmm_stripe_count = cpu_to_le32(stripe_count);
+        lmmv1->lmm_pattern = cpu_to_le32(lsm->lsm_pattern);
+        if (lsm->lsm_magic == LOV_MAGIC_V3) {
+                strncpy(lmmv3->lmm_pool_name, lsm->lsm_pool_name, MAXPOOLNAME);
+                lmm_objects = lmmv3->lmm_objects;
+        } else {
+                lmm_objects = lmmv1->lmm_objects;
+        }
 
         for (i = 0; i < stripe_count; i++) {
                 struct lov_oinfo *loi = lsm->lsm_oinfo[i];
 
                 /* XXX LOV STACKING call down to osc_packmd() to do packing */
                 LASSERTF(loi->loi_id, "lmm_oid "LPU64" stripe %u/%u idx %u\n",
 
         for (i = 0; i < stripe_count; i++) {
                 struct lov_oinfo *loi = lsm->lsm_oinfo[i];
 
                 /* XXX LOV STACKING call down to osc_packmd() to do packing */
                 LASSERTF(loi->loi_id, "lmm_oid "LPU64" stripe %u/%u idx %u\n",
-                         lmm->lmm_object_id, i, stripe_count, loi->loi_ost_idx);
-                lmm->lmm_objects[i].l_object_id = cpu_to_le64(loi->loi_id);
-                lmm->lmm_objects[i].l_object_gr = cpu_to_le64(loi->loi_gr);
-                lmm->lmm_objects[i].l_ost_gen = cpu_to_le32(loi->loi_ost_gen);
-                lmm->lmm_objects[i].l_ost_idx = cpu_to_le32(loi->loi_ost_idx);
+                         lmmv1->lmm_object_id, i, stripe_count, loi->loi_ost_idx);
+                lmm_objects[i].l_object_id = cpu_to_le64(loi->loi_id);
+                lmm_objects[i].l_object_gr = cpu_to_le64(loi->loi_gr);
+                lmm_objects[i].l_ost_gen = cpu_to_le32(loi->loi_ost_gen);
+                lmm_objects[i].l_ost_idx = cpu_to_le32(loi->loi_ost_idx);
         }
 
         RETURN(lmm_size);
         }
 
         RETURN(lmm_size);
@@ -205,9 +281,22 @@ static int lov_verify_lmm(void *lmm, int lmm_bytes, int *stripe_count)
         int rc;
 
         if (lsm_op_find(le32_to_cpu(*(__u32 *)lmm)) == NULL) {
         int rc;
 
         if (lsm_op_find(le32_to_cpu(*(__u32 *)lmm)) == NULL) {
-                CERROR("bad disk LOV MAGIC: 0x%08X; dumping V1 LMM:\n",
-                       le32_to_cpu(*(__u32 *)lmm));
-                lov_dump_lmm_v1(D_WARNING, lmm);
+                char *buffer;
+                int sz;
+
+                CERROR("bad disk LOV MAGIC: 0x%08X; dumping LMM (size=%d):\n",
+                       le32_to_cpu(*(__u32 *)lmm), lmm_bytes);
+                sz = lmm_bytes * 2 + 1;
+                OBD_ALLOC(buffer, sz);
+                if (buffer != NULL) {
+                        int i;
+
+                        for (i = 0; i < lmm_bytes; i++)
+                                sprintf(buffer+2*i, "%.2X", ((char *)lmm)[i]);
+                        buffer[sz] = '\0';
+                        CERROR("%s\n", buffer);
+                        OBD_FREE(buffer, sz);
+                }
                 return -EINVAL;
         }
         rc = lsm_op_find(le32_to_cpu(*(__u32 *)lmm))->lsm_lmm_verify(lmm,
                 return -EINVAL;
         }
         rc = lsm_op_find(le32_to_cpu(*(__u32 *)lmm))->lsm_lmm_verify(lmm,
@@ -234,6 +323,7 @@ int lov_alloc_memmd(struct lov_stripe_md **lsmp, int stripe_count,
         (*lsmp)->lsm_stripe_count = stripe_count;
         (*lsmp)->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES * stripe_count;
         (*lsmp)->lsm_pattern = pattern;
         (*lsmp)->lsm_stripe_count = stripe_count;
         (*lsmp)->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES * stripe_count;
         (*lsmp)->lsm_pattern = pattern;
+        (*lsmp)->lsm_pool_name[0] = '\0';
         (*lsmp)->lsm_oinfo[0]->loi_ost_idx = ~0;
 
         for (i = 0; i < stripe_count; i++)
         (*lsmp)->lsm_oinfo[0]->loi_ost_idx = ~0;
 
         for (i = 0; i < stripe_count; i++)
@@ -312,68 +402,108 @@ static int __lov_setstripe(struct obd_export *exp, struct lov_stripe_md **lsmp,
 {
         struct obd_device *obd = class_exp2obd(exp);
         struct lov_obd *lov = &obd->u.lov;
 {
         struct obd_device *obd = class_exp2obd(exp);
         struct lov_obd *lov = &obd->u.lov;
-        struct lov_user_md lum;
+        struct lov_user_md_v3 lumv3;
+        struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
+        int lmm_magic;
         int stripe_count;
         int rc;
         ENTRY;
 
         int stripe_count;
         int rc;
         ENTRY;
 
-        rc = copy_from_user(&lum, lump, sizeof(lum));
+        rc = copy_from_user(&lumv3, lump, sizeof(struct lov_user_md_v1));
         if (rc)
                 RETURN(-EFAULT);
 
         if (rc)
                 RETURN(-EFAULT);
 
-        if (lum.lmm_magic != LOV_USER_MAGIC) {
-                if (lum.lmm_magic == __swab32(LOV_USER_MAGIC)) {
-                        lustre_swab_lov_user_md(&lum);
-                } else {
-                        CDEBUG(D_IOCTL, "bad userland LOV MAGIC:"
-                               " %#08x != %#08x\n",
-                               lum.lmm_magic, LOV_USER_MAGIC);
-                        RETURN(-EINVAL);
-                }
+        lmm_magic = lumv1->lmm_magic;
+
+        if (lmm_magic == __swab32(LOV_USER_MAGIC_V1)) {
+                lustre_swab_lov_user_md_v1(lumv1);
+                lmm_magic = LOV_USER_MAGIC_V1;
+        } else if (lmm_magic == LOV_USER_MAGIC_V3) {
+                rc = copy_from_user(&lumv3, lump, sizeof(lumv3));
+                if (rc)
+                        RETURN(-EFAULT);
+        } else if (lmm_magic == __swab32(LOV_USER_MAGIC_V3)) {
+                rc = copy_from_user(&lumv3, lump, sizeof(lumv3));
+                if (rc)
+                        RETURN(-EFAULT);
+                lustre_swab_lov_user_md_v3(&lumv3);
+                lmm_magic = LOV_USER_MAGIC_V3;
+        } else if (lmm_magic != LOV_USER_MAGIC_V1) {
+                CDEBUG(D_IOCTL,
+                       "bad userland LOV MAGIC: %#08x != %#08x nor %#08x\n",
+                       lmm_magic, LOV_USER_MAGIC_V1, LOV_USER_MAGIC_V3);
+                       RETURN(-EINVAL);
         }
 
         }
 
-        if (lum.lmm_pattern == 0) {
-                lum.lmm_pattern = lov->desc.ld_pattern ?
+        /* in the rest of the tests, as *lumv1 and lumv3 have the same
+         * fields, we use lumv1 to avoid code duplication */
+
+        if (lumv1->lmm_pattern == 0) {
+                lumv1->lmm_pattern = lov->desc.ld_pattern ?
                         lov->desc.ld_pattern : LOV_PATTERN_RAID0;
         }
 
                         lov->desc.ld_pattern : LOV_PATTERN_RAID0;
         }
 
-        if (lum.lmm_pattern != LOV_PATTERN_RAID0) {
+        if (lumv1->lmm_pattern != LOV_PATTERN_RAID0) {
                 CDEBUG(D_IOCTL, "bad userland stripe pattern: %#x\n",
                 CDEBUG(D_IOCTL, "bad userland stripe pattern: %#x\n",
-                       lum.lmm_pattern);
+                       lumv1->lmm_pattern);
                 RETURN(-EINVAL);
         }
 
         /* 64kB is the largest common page size we see (ia64), and matches the
          * check in lfs */
                 RETURN(-EINVAL);
         }
 
         /* 64kB is the largest common page size we see (ia64), and matches the
          * check in lfs */
-        if (lum.lmm_stripe_size & (LOV_MIN_STRIPE_SIZE - 1)) {
+        if (lumv1->lmm_stripe_size & (LOV_MIN_STRIPE_SIZE - 1)) {
                 CDEBUG(D_IOCTL, "stripe size %u not multiple of %u, fixing\n",
                 CDEBUG(D_IOCTL, "stripe size %u not multiple of %u, fixing\n",
-                       lum.lmm_stripe_size, LOV_MIN_STRIPE_SIZE);
-                lum.lmm_stripe_size = LOV_MIN_STRIPE_SIZE;
+                       lumv1->lmm_stripe_size, LOV_MIN_STRIPE_SIZE);
+                lumv1->lmm_stripe_size = LOV_MIN_STRIPE_SIZE;
         }
 
         }
 
-        if ((lum.lmm_stripe_offset >= lov->desc.ld_tgt_count) &&
-            (lum.lmm_stripe_offset != (typeof(lum.lmm_stripe_offset))(-1))) {
+        if ((lumv1->lmm_stripe_offset >= lov->desc.ld_tgt_count) &&
+            (lumv1->lmm_stripe_offset !=
+             (typeof(lumv1->lmm_stripe_offset))(-1))) {
                 CDEBUG(D_IOCTL, "stripe offset %u > number of OSTs %u\n",
                 CDEBUG(D_IOCTL, "stripe offset %u > number of OSTs %u\n",
-                       lum.lmm_stripe_offset, lov->desc.ld_tgt_count);
+                       lumv1->lmm_stripe_offset, lov->desc.ld_tgt_count);
                 RETURN(-EINVAL);
         }
                 RETURN(-EINVAL);
         }
-        stripe_count = lov_get_stripecnt(lov, lum.lmm_stripe_count);
+        stripe_count = lov_get_stripecnt(lov, lumv1->lmm_stripe_count);
+
+        if (lmm_magic == LOV_USER_MAGIC_V3) {
+                struct pool_desc *pool;
+
+                pool = lov_find_pool(lov, lumv3.lmm_pool_name);
+                if (pool == NULL)
+                        RETURN(-EINVAL);
+
+                if (lumv3.lmm_stripe_offset !=
+                    (typeof(lumv3.lmm_stripe_offset))(-1)) {
+                        rc = lov_check_index_in_pool(lumv3.lmm_stripe_offset,
+                                                     pool);
+                        if (rc < 0)
+                                RETURN(-EINVAL);
+                }
+
+                if (stripe_count > pool_tgt_count(pool))
+                        stripe_count = pool_tgt_count(pool);
+        }
 
 
-        if ((__u64)lum.lmm_stripe_size * stripe_count > ~0UL) {
+        if ((__u64)lumv1->lmm_stripe_size * stripe_count > ~0UL) {
                 CDEBUG(D_IOCTL, "stripe width %ux%i exeeds %lu bytes\n",
                 CDEBUG(D_IOCTL, "stripe width %ux%i exeeds %lu bytes\n",
-                       lum.lmm_stripe_size, (int)lum.lmm_stripe_count, ~0UL);
+                       lumv1->lmm_stripe_size, (int)lumv1->lmm_stripe_count,
+                       ~0UL);
                 RETURN(-EINVAL);
         }
 
                 RETURN(-EINVAL);
         }
 
-        rc = lov_alloc_memmd(lsmp, stripe_count, lum.lmm_pattern, LOV_MAGIC);
+        rc = lov_alloc_memmd(lsmp, stripe_count, lumv1->lmm_pattern, lmm_magic);
 
         if (rc >= 0) {
 
         if (rc >= 0) {
-                (*lsmp)->lsm_oinfo[0]->loi_ost_idx = lum.lmm_stripe_offset;
-                (*lsmp)->lsm_stripe_size = lum.lmm_stripe_size;
+                (*lsmp)->lsm_oinfo[0]->loi_ost_idx = lumv1->lmm_stripe_offset;
+                (*lsmp)->lsm_stripe_size = lumv1->lmm_stripe_size;
+                if (lmm_magic == LOV_USER_MAGIC_V3)
+                        strncpy((*lsmp)->lsm_pool_name, lumv3.lmm_pool_name,
+                                MAXPOOLNAME);
                 rc = 0;
         }
 
                 rc = 0;
         }
 
-        RETURN(0);
+        RETURN(rc);
 }
 
 /* Configure object striping information on a new file.
 }
 
 /* Configure object striping information on a new file.
@@ -405,20 +535,27 @@ int lov_setea(struct obd_export *exp, struct lov_stripe_md **lsmp,
         struct obd_export *oexp;
         struct lov_obd *lov = &exp->exp_obd->u.lov;
         obd_id last_id = 0;
         struct obd_export *oexp;
         struct lov_obd *lov = &exp->exp_obd->u.lov;
         obd_id last_id = 0;
+        struct lov_user_ost_data_v1 *lmm_objects;
 
         ENTRY;
 
         ENTRY;
+
+        if (lump->lmm_magic == LOV_USER_MAGIC_V3)
+                lmm_objects = ((struct lov_user_md_v3 *)lump)->lmm_objects;
+        else
+                lmm_objects = lump->lmm_objects;
+
         for (i = 0; i < lump->lmm_stripe_count; i++) {
                 __u32 len = sizeof(last_id);
         for (i = 0; i < lump->lmm_stripe_count; i++) {
                 __u32 len = sizeof(last_id);
-                oexp = lov->lov_tgts[lump->lmm_objects[i].l_ost_idx]->ltd_exp;
+                oexp = lov->lov_tgts[lmm_objects[i].l_ost_idx]->ltd_exp;
                 rc = obd_get_info(oexp, sizeof(KEY_LAST_ID), KEY_LAST_ID,
                                   &len, &last_id, NULL);
                 if (rc)
                         RETURN(rc);
                 rc = obd_get_info(oexp, sizeof(KEY_LAST_ID), KEY_LAST_ID,
                                   &len, &last_id, NULL);
                 if (rc)
                         RETURN(rc);
-                if (lump->lmm_objects[i].l_object_id > last_id) {
+                if (lmm_objects[i].l_object_id > last_id) {
                         CERROR("Setting EA for object > than last id on "
                                "ost idx %d "LPD64" > "LPD64" \n",
                         CERROR("Setting EA for object > than last id on "
                                "ost idx %d "LPD64" > "LPD64" \n",
-                               lump->lmm_objects[i].l_ost_idx,
-                               lump->lmm_objects[i].l_object_id, last_id);
+                               lmm_objects[i].l_ost_idx,
+                               lmm_objects[i].l_object_id, last_id);
                         RETURN(-EINVAL);
                 }
         }
                         RETURN(-EINVAL);
                 }
         }
@@ -429,9 +566,9 @@ int lov_setea(struct obd_export *exp, struct lov_stripe_md **lsmp,
 
         for (i = 0; i < lump->lmm_stripe_count; i++) {
                 (*lsmp)->lsm_oinfo[i]->loi_ost_idx =
 
         for (i = 0; i < lump->lmm_stripe_count; i++) {
                 (*lsmp)->lsm_oinfo[i]->loi_ost_idx =
-                        lump->lmm_objects[i].l_ost_idx;
-                (*lsmp)->lsm_oinfo[i]->loi_id = lump->lmm_objects[i].l_object_id;
-                (*lsmp)->lsm_oinfo[i]->loi_gr = lump->lmm_objects[i].l_object_gr;
+                        lmm_objects[i].l_ost_idx;
+                (*lsmp)->lsm_oinfo[i]->loi_id = lmm_objects[i].l_object_id;
+                (*lsmp)->lsm_oinfo[i]->loi_gr = lmm_objects[i].l_object_gr;
         }
         RETURN(0);
 }
         }
         RETURN(0);
 }
@@ -449,9 +586,11 @@ int lov_getstripe(struct obd_export *exp, struct lov_stripe_md *lsm,
         /*
          * XXX huge struct allocated on stack.
          */
         /*
          * XXX huge struct allocated on stack.
          */
-        struct lov_user_md lum;
+        /* we use lov_user_md_v3 because it is larger than lov_user_md_v1 */
+        struct lov_user_md_v3 lum;
         struct lov_mds_md *lmmk = NULL;
         int rc, lmm_size;
         struct lov_mds_md *lmmk = NULL;
         int rc, lmm_size;
+        int lum_size;
         mm_segment_t seg;
         ENTRY;
 
         mm_segment_t seg;
         ENTRY;
 
@@ -464,12 +603,22 @@ int lov_getstripe(struct obd_export *exp, struct lov_stripe_md *lsm,
          */
         seg = get_fs();
         set_fs(KERNEL_DS);
          */
         seg = get_fs();
         set_fs(KERNEL_DS);
-        rc = copy_from_user(&lum, lump, sizeof(lum));
+
+        /* we only need the header part from user space to get lmm_magic and
+         * lmm_stripe_count, (the header part is common to v1 and v3) */
+        lum_size = sizeof(struct lov_user_md_v1);
+        rc = copy_from_user(&lum, lump, lum_size);
+
         if (rc)
                 rc = -EFAULT;
         if (rc)
                 rc = -EFAULT;
-        else if (lum.lmm_magic != LOV_USER_MAGIC)
+        else if ((lum.lmm_magic != LOV_USER_MAGIC) &&
+                 (lum.lmm_magic != LOV_USER_MAGIC_V3))
                 rc = -EINVAL;
         else {
                 rc = -EINVAL;
         else {
+                /* if v3 we just have to update the lum_size */
+                if (lum.lmm_magic == LOV_USER_MAGIC_V3)
+                        lum_size = sizeof(struct lov_user_md_v3);
+
                 rc = lov_packmd(exp, &lmmk, lsm);
                 if (rc < 0)
                         RETURN(rc);
                 rc = lov_packmd(exp, &lmmk, lsm);
                 if (rc < 0)
                         RETURN(rc);
@@ -477,17 +626,18 @@ int lov_getstripe(struct obd_export *exp, struct lov_stripe_md *lsm,
                 rc = 0;
 
                 /* FIXME: Bug 1185 - copy fields properly when structs change */
                 rc = 0;
 
                 /* FIXME: Bug 1185 - copy fields properly when structs change */
-                CLASSERT(sizeof lum == sizeof *lmmk);
+                /* struct lov_user_md_v3 and struct lov_mds_md_v3 must be the same */
+                CLASSERT(sizeof(lum) == sizeof(struct lov_mds_md_v3));
                 CLASSERT(sizeof lum.lmm_objects[0] ==
                          sizeof lmmk->lmm_objects[0]);
 
                 /* User wasn't expecting this many OST entries */
                 if (lum.lmm_stripe_count == 0) {
                 CLASSERT(sizeof lum.lmm_objects[0] ==
                          sizeof lmmk->lmm_objects[0]);
 
                 /* User wasn't expecting this many OST entries */
                 if (lum.lmm_stripe_count == 0) {
-                        if (copy_to_user(lump, lmmk, sizeof lum))
+                        if (copy_to_user(lump, lmmk, lum_size))
                                 rc = -EFAULT;
                 } else if (lum.lmm_stripe_count < lmmk->lmm_stripe_count) {
                         rc = -EOVERFLOW;
                                 rc = -EFAULT;
                 } else if (lum.lmm_stripe_count < lmmk->lmm_stripe_count) {
                         rc = -EOVERFLOW;
-                } else if (copy_to_user(lump, lmmk, sizeof lum))
+                } else if (copy_to_user(lump, lmmk, lmm_size))
                         rc = -EFAULT;
 
                 obd_free_diskmd(exp, &lmmk);
                         rc = -EFAULT;
 
                 obd_free_diskmd(exp, &lmmk);
diff --git a/lustre/lov/lov_pool.c b/lustre/lov/lov_pool.c
new file mode 100644 (file)
index 0000000..05fde47
--- /dev/null
@@ -0,0 +1,619 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see [sun.com URL with a
+ * copy of GPLv2].
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright  2008 Sun Microsystems, Inc. All rights reserved
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/lov/lov_pool.c
+ *
+ * OST pool methods
+ *
+ * Author: Jacques-Charles LAFOUCRIERE <jc.lafoucriere@cea.fr>
+ */
+
+#define DEBUG_SUBSYSTEM S_LOV
+
+#ifdef __KERNEL__
+#include <libcfs/libcfs.h>
+#else
+#include <liblustre.h>
+#endif
+
+#include <obd.h>
+#include "lov_internal.h"
+
+/*
+ * hash function using a Rotating Hash algorithm
+ * Knuth, D. The Art of Computer Programming,
+ * Volume 3: Sorting and Searching,
+ * Chapter 6.4.
+ * Addison Wesley, 1973
+ */
+static __u32 pool_hashfn(lustre_hash_t *hash_body, void *key, unsigned mask)
+{
+        int i;
+        __u32 result;
+        char *poolname;
+
+        result = 0;
+        poolname = (char *)key;
+        for (i = 0; i < MAXPOOLNAME; i++) {
+                if (poolname[i] == '\0')
+                        break;
+                result = (result << 4)^(result >> 28) ^  poolname[i];
+        }
+        return (result % mask);
+}
+
+static void *pool_key(struct hlist_node *hnode)
+{
+        struct pool_desc *pool;
+
+        pool = hlist_entry(hnode, struct pool_desc, pool_hash);
+        return (pool->pool_name);
+}
+
+static int pool_hashkey_compare(void *key, struct hlist_node *compared_hnode)
+{
+        char *pool_name;
+        struct pool_desc *pool;
+        int rc;
+
+        pool_name = (char *)key;
+        pool = hlist_entry(compared_hnode, struct pool_desc, pool_hash);
+        rc = strncmp(pool_name, pool->pool_name, MAXPOOLNAME);
+        return (!rc);
+}
+
+static void *pool_hashrefcount_get(struct hlist_node *hnode)
+{
+        struct pool_desc *pool;
+
+        pool = hlist_entry(hnode, struct pool_desc, pool_hash);
+        return (pool);
+}
+
+static void *pool_hashrefcount_put(struct hlist_node *hnode)
+{
+        struct pool_desc *pool;
+
+        pool = hlist_entry(hnode, struct pool_desc, pool_hash);
+        return (pool);
+}
+
+lustre_hash_ops_t pool_hash_operations = {
+        .lh_hash        = pool_hashfn,
+        .lh_key         = pool_key,
+        .lh_compare     = pool_hashkey_compare,
+        .lh_get         = pool_hashrefcount_get,
+        .lh_put         = pool_hashrefcount_put,
+};
+
+#ifdef LPROCFS
+/* ifdef needed for liblustre support */
+/*
+ * pool /proc seq_file methods
+ */
+/*
+ * iterator is used to go through the target pool entries
+ * index is the current entry index in the lp_array[] array
+ * index >= pos returned to the seq_file interface
+ * pos is from 0 to (pool->pool_obds.op_count - 1)
+ */
+#define POOL_IT_MAGIC 0xB001CEA0
+struct pool_iterator {
+        int magic;
+        struct pool_desc *pool;
+        int idx;        /* from 0 to pool_tgt_size - 1 */
+};
+
+static void *pool_proc_next(struct seq_file *s, void *v, loff_t *pos)
+{
+        struct pool_iterator *iter = (struct pool_iterator *)s->private;
+        int prev_idx;
+
+        LASSERTF(iter->magic == POOL_IT_MAGIC, "%08X", iter->magic);
+
+        /* test if end of file */
+        if (*pos >= pool_tgt_count(iter->pool))
+                return NULL;
+
+        /* iterate to find a non empty entry */
+        prev_idx = iter->idx;
+        read_lock(&pool_tgt_rwlock(iter->pool));
+        iter->idx++;
+        if (iter->idx == pool_tgt_count(iter->pool)) {
+                iter->idx = prev_idx; /* we stay on the last entry */
+                read_unlock(&pool_tgt_rwlock(iter->pool));
+                return NULL;
+        }
+        read_unlock(&pool_tgt_rwlock(iter->pool));
+        (*pos)++;
+        /* return != NULL to continue */
+        return iter;
+}
+
+static void *pool_proc_start(struct seq_file *s, loff_t *pos)
+{
+        struct pool_desc *pool = (struct pool_desc *)s->private;
+        struct pool_iterator *iter;
+
+        if ((pool_tgt_count(pool) == 0) ||
+            (*pos >= pool_tgt_count(pool)))
+                return NULL;
+
+        OBD_ALLOC(iter, sizeof(struct pool_iterator));
+        if (!iter)
+                return ERR_PTR(-ENOMEM);
+        iter->magic = POOL_IT_MAGIC;
+        iter->pool = pool;
+        iter->idx = 0;
+
+        /* we use seq_file private field to memorized iterator so
+         * we can free it at stop() */
+        /* /!\ do not forget to restore it to pool before freeing it */
+        s->private = iter;
+        if (*pos > 0) {
+                loff_t i;
+                void *ptr;
+
+                i = 0;
+                do {
+                     ptr = pool_proc_next(s, &iter, &i);
+                } while ((i < *pos) && (ptr != NULL));
+                return ptr;
+        }
+        return iter;
+}
+
+static void pool_proc_stop(struct seq_file *s, void *v)
+{
+        struct pool_iterator *iter = (struct pool_iterator *)s->private;
+
+        /* in some cases stop() method is called 2 times, without
+         * calling start() method (see seq_read() from fs/seq_file.c)
+         * we have to free only if s->private is an iterator */
+        if ((iter) && (iter->magic == POOL_IT_MAGIC)) {
+                /* we restore s->private so next call to pool_proc_start()
+                 * will work */
+                s->private = iter->pool;
+                OBD_FREE(iter, sizeof(struct pool_iterator));
+        }
+        return;
+}
+
+static int pool_proc_show(struct seq_file *s, void *v)
+{
+        struct pool_iterator *iter = (struct pool_iterator *)v;
+        struct lov_tgt_desc *tgt;
+
+        LASSERTF(iter->magic == POOL_IT_MAGIC, "%08X", iter->magic);
+        LASSERT(iter->pool != NULL);
+        LASSERT(iter->idx <= pool_tgt_count(iter->pool));
+
+        read_lock(&pool_tgt_rwlock(iter->pool));
+        tgt = pool_tgt(iter->pool, iter->idx);
+        read_unlock(&pool_tgt_rwlock(iter->pool));
+        if (tgt)
+                seq_printf(s, "%s\n", obd_uuid2str(&(tgt->ltd_uuid)));
+
+        return 0;
+}
+
+static struct seq_operations pool_proc_ops = {
+        .start          = pool_proc_start,
+        .next           = pool_proc_next,
+        .stop           = pool_proc_stop,
+        .show           = pool_proc_show,
+};
+
+static int pool_proc_open(struct inode *inode, struct file *file)
+{
+        int rc;
+
+        rc = seq_open(file, &pool_proc_ops);
+        if (!rc) {
+                struct seq_file *s = file->private_data;
+                s->private = PROC_I(inode)->pde->data;
+        }
+        return rc;
+}
+
+static struct file_operations pool_proc_operations = {
+        .open           = pool_proc_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = seq_release,
+};
+#endif /* LPROCFS */
+
+void lov_dump_pool(int level, struct pool_desc *pool)
+{
+        int i;
+
+        CDEBUG(level, "pool "POOLNAMEF" has %d members\n",
+               pool->pool_name, pool->pool_obds.op_count);
+        read_lock(&pool_tgt_rwlock(pool));
+        for (i = 0; i < pool_tgt_count(pool) ; i++) {
+                if (!pool_tgt(pool, i) || !(pool_tgt(pool, i))->ltd_exp)
+                        continue;
+                CDEBUG(level, "pool "POOLNAMEF"[%d] = %s\n", pool->pool_name,
+                       i, obd_uuid2str(&((pool_tgt(pool, i))->ltd_uuid)));
+        }
+        read_unlock(&pool_tgt_rwlock(pool));
+}
+
+#define LOV_POOL_INIT_COUNT 2
+int lov_ost_pool_init(struct ost_pool *op, unsigned int count)
+{
+        if (count == 0)
+                count = LOV_POOL_INIT_COUNT;
+        op->op_array = NULL;
+        op->op_count = 0;
+        op->op_rwlock = RW_LOCK_UNLOCKED;
+        op->op_size = count;
+        OBD_ALLOC(op->op_array, op->op_size * sizeof(op->op_array[0]));
+        if (op->op_array == NULL) {
+                op->op_size = 0;
+                return -ENOMEM;
+        }
+        return 0;
+}
+
+int lov_ost_pool_extend(struct ost_pool *op, unsigned int max_count)
+{
+        __u32 *new;
+        int new_size;
+
+        LASSERT(max_count != 0);
+
+        if (op->op_count < op->op_size)
+                return 0;
+
+        new_size = min(max_count, 2 * op->op_size);
+        OBD_ALLOC(new, new_size * sizeof(op->op_array[0]));
+        if (new == NULL)
+                return -ENOMEM;
+
+        /* copy old array to new one */
+        memcpy(new, op->op_array, op->op_size * sizeof(op->op_array[0]));
+        write_lock(&op->op_rwlock);
+        OBD_FREE(op->op_array, op->op_size * sizeof(op->op_array[0]));
+        op->op_array = new;
+        op->op_size = new_size;
+        write_unlock(&op->op_rwlock);
+        return 0;
+}
+
+int lov_ost_pool_add(struct ost_pool *op, __u32 idx, unsigned int max_count)
+{
+        int rc, i;
+
+        rc = lov_ost_pool_extend(op, max_count);
+        if (rc)
+                return rc;
+
+        /* search ost in pool array */
+        read_lock(&op->op_rwlock);
+        for (i = 0; i < op->op_count; i++) {
+                if (op->op_array[i] == idx) {
+                        read_unlock(&op->op_rwlock);
+                        return -EEXIST;
+                }
+        }
+        /* ost not found we add it */
+        op->op_array[op->op_count] = idx;
+        op->op_count++;
+        read_unlock(&op->op_rwlock);
+        return 0;
+}
+
+int lov_ost_pool_remove(struct ost_pool *op, __u32 idx)
+{
+        int i;
+
+        read_lock(&op->op_rwlock);
+        for (i = 0; i < op->op_count; i++) {
+                if (op->op_array[i] == idx) {
+                        memmove(&op->op_array[i], &op->op_array[i + 1],
+                                (op->op_count - i - 1) * sizeof(op->op_array[0]));
+                        op->op_count--;
+                        read_unlock(&op->op_rwlock);
+                        return 0;
+                }
+        }
+        read_unlock(&op->op_rwlock);
+        return -EINVAL;
+}
+
+int lov_ost_pool_free(struct ost_pool *op)
+{
+        if (op->op_size == 0)
+                return 0;
+
+        write_lock(&op->op_rwlock);
+        OBD_FREE(op->op_array, op->op_size * sizeof(op->op_array[0]));
+        op->op_array = NULL;
+        op->op_count = 0;
+        op->op_size = 0;
+        write_unlock(&op->op_rwlock);
+        return 0;
+}
+
+
+int lov_pool_new(struct obd_device *obd, char *poolname)
+{
+        struct lov_obd *lov;
+        struct pool_desc *new_pool;
+        int rc;
+
+        lov = &(obd->u.lov);
+
+        OBD_ALLOC(new_pool, sizeof(*new_pool));
+
+        if (new_pool == NULL)
+                return -ENOMEM;
+
+        if (strlen(poolname) > MAXPOOLNAME)
+                return -ENAMETOOLONG;
+
+        strncpy(new_pool->pool_name, poolname, MAXPOOLNAME);
+        new_pool->pool_name[MAXPOOLNAME] = '\0';
+        new_pool->pool_lov = lov;
+        rc = lov_ost_pool_init(&new_pool->pool_obds, 0);
+        if (rc)
+                return rc;
+
+        memset(&(new_pool->pool_rr), 0, sizeof(struct lov_qos_rr));
+        rc = lov_ost_pool_init(&new_pool->pool_rr.lqr_pool, 0);
+        if (rc)
+                return rc;
+
+        spin_lock(&obd->obd_dev_lock);
+        /* check if pool alreaddy exists */
+        if (lustre_hash_lookup(lov->lov_pools_hash_body,
+                                poolname) != NULL) {
+                spin_unlock(&obd->obd_dev_lock);
+                lov_ost_pool_free(&new_pool->pool_obds);
+                OBD_FREE(new_pool, sizeof(*new_pool));
+                return  -EEXIST;
+        }
+
+        INIT_HLIST_NODE(&new_pool->pool_hash);
+        lustre_hash_add_unique(lov->lov_pools_hash_body, poolname,
+                               &new_pool->pool_hash);
+        list_add_tail(&new_pool->pool_list, &lov->lov_pool_list);
+        lov->lov_pool_count++;
+        spin_unlock(&obd->obd_dev_lock);
+
+        CDEBUG(D_CONFIG, POOLNAMEF" is pool #%d\n",
+               poolname, lov->lov_pool_count);
+
+#ifdef LPROCFS
+        /* ifdef needed for liblustre */
+        new_pool->pool_proc_entry = lprocfs_add_simple(lov->lov_pool_proc_entry,
+                                                       poolname,
+                                                       NULL, NULL,
+                                                       new_pool,
+                                                       &pool_proc_operations);
+#endif
+
+        if (IS_ERR(new_pool->pool_proc_entry)) {
+                CWARN("Cannot add proc pool entry "POOLNAMEF"\n", poolname);
+                new_pool->pool_proc_entry = NULL;
+        }
+
+        return 0;
+}
+
+int lov_pool_del(struct obd_device *obd, char *poolname)
+{
+        struct lov_obd *lov;
+        struct pool_desc *pool;
+
+        lov = &(obd->u.lov);
+
+        spin_lock(&obd->obd_dev_lock);
+        pool = lustre_hash_lookup(lov->lov_pools_hash_body,
+                                             poolname);
+        if (pool == NULL) {
+                spin_unlock(&obd->obd_dev_lock);
+                return -ENOENT;
+        }
+
+#ifdef LPROCFS
+        if (pool->pool_proc_entry != NULL)
+                remove_proc_entry(pool->pool_proc_entry->name,
+                                  pool->pool_proc_entry->parent);
+#endif
+
+        /* pool is kept in the list to be freed by lov_cleanup()
+         * list_del(&pool->pool_list);
+         */
+        lustre_hash_del_key(lov->lov_pools_hash_body, poolname);
+
+        lov->lov_pool_count--;
+
+        spin_unlock(&obd->obd_dev_lock);
+
+        /* pool struct is not freed because it may be used by
+         * some open in /proc
+         * the struct is freed at lov_cleanup()
+         */
+        /*
+        if (pool->pool_rr.lqr_size != 0)
+                OBD_FREE(pool->pool_rr.lqr_array, pool->pool_rr.lqr_size);
+        lov_ost_pool_free(&pool->pool_obds);
+        OBD_FREE(pool, sizeof(*pool));
+        */
+        return 0;
+}
+
+
+int lov_pool_add(struct obd_device *obd, char *poolname, char *ostname)
+{
+        struct obd_uuid ost_uuid;
+        struct lov_obd *lov;
+        struct pool_desc *pool;
+        unsigned int i, lov_idx;
+        int rc;
+
+        lov = &(obd->u.lov);
+
+        pool = lustre_hash_lookup(lov->lov_pools_hash_body, poolname);
+        if (pool == NULL) {
+                return -ENOENT;
+        }
+
+        /* allocate pool tgt array if needed */
+        mutex_down(&lov->lov_lock);
+        rc = lov_ost_pool_extend(&pool->pool_obds, lov->lov_tgt_size);
+        if (rc) {
+                mutex_up(&lov->lov_lock);
+                return rc;
+        }
+        mutex_up(&lov->lov_lock);
+
+        obd_str2uuid(&ost_uuid, ostname);
+
+        spin_lock(&obd->obd_dev_lock);
+
+        /* search ost in lov array */
+        for (i = 0; i < lov->desc.ld_tgt_count; i++) {
+                if (!lov->lov_tgts[i])
+                        continue;
+
+                if (obd_uuid_equals(&ost_uuid, &(lov->lov_tgts[i]->ltd_uuid)))
+                        break;
+        }
+
+        /* test if ost found in lov */
+        if (i == lov->desc.ld_tgt_count) {
+                spin_unlock(&obd->obd_dev_lock);
+                return -EINVAL;
+        }
+
+        spin_unlock(&obd->obd_dev_lock);
+
+        lov_idx = i;
+
+        rc = lov_ost_pool_add(&pool->pool_obds, lov_idx, lov->lov_tgt_size);
+        if (rc)
+                return rc;
+
+        pool->pool_rr.lqr_dirty = 1;
+
+        CDEBUG(D_CONFIG, "Added %s to "POOLNAMEF" as member %d\n",
+               ostname, poolname,  pool_tgt_count(pool));
+        return 0;
+}
+
+int lov_pool_remove(struct obd_device *obd, char *poolname, char *ostname)
+{
+        struct obd_uuid ost_uuid;
+        struct lov_obd *lov;
+        struct pool_desc *pool;
+        unsigned int i, lov_idx;
+
+        lov = &(obd->u.lov);
+
+        spin_lock(&obd->obd_dev_lock);
+        pool = lustre_hash_lookup(lov->lov_pools_hash_body, poolname);
+        if (pool == NULL) {
+                spin_unlock(&obd->obd_dev_lock);
+                return -ENOENT;
+        }
+
+        obd_str2uuid(&ost_uuid, ostname);
+
+        /* search ost in lov array, to get index */
+        for (i = 0; i < lov->desc.ld_tgt_count; i++) {
+                if (!lov->lov_tgts[i])
+                        continue;
+
+                if (obd_uuid_equals(&ost_uuid, &(lov->lov_tgts[i]->ltd_uuid)))
+                        break;
+        }
+
+        /* test if ost found in lov */
+        if (i == lov->desc.ld_tgt_count) {
+                spin_unlock(&obd->obd_dev_lock);
+                return -EINVAL;
+        }
+
+        spin_unlock(&obd->obd_dev_lock);
+
+        lov_idx = i;
+
+        lov_ost_pool_remove(&pool->pool_obds, lov_idx);
+
+        pool->pool_rr.lqr_dirty = 1;
+
+        CDEBUG(D_CONFIG, "%s removed from "POOLNAMEF"\n", ostname, poolname);
+
+        return 0;
+}
+
+int lov_check_index_in_pool(__u32 idx, struct pool_desc *pool)
+{
+        int i;
+
+        read_lock(&pool_tgt_rwlock(pool));
+        for (i = 0; i < pool_tgt_count(pool); i++) {
+                if (pool_tgt_array(pool)[i] == idx) {
+                        read_unlock(&pool_tgt_rwlock(pool));
+                        return 0;
+                }
+        }
+        read_unlock(&pool_tgt_rwlock(pool));
+        return -ENOENT;
+}
+
+struct pool_desc *lov_find_pool(struct lov_obd *lov, char *poolname)
+{
+        struct pool_desc *pool;
+
+        pool = NULL;
+        if (poolname[0] != '\0') {
+                pool = lustre_hash_lookup(lov->lov_pools_hash_body, poolname);
+                if (pool == NULL)
+                        CWARN("Request for an unknown pool ("POOLNAMEF")\n",
+                              poolname);
+                if ((pool != NULL) && (pool_tgt_count(pool) == 0)) {
+                        CWARN("Request for an empty pool ("POOLNAMEF")\n",
+                               poolname);
+                        pool = NULL;
+                }
+        }
+        return pool;
+}
+
index 053ef6c..4a97573 100644 (file)
@@ -108,7 +108,7 @@ int qos_add_tgt(struct obd_device *obd, __u32 index)
         list_add_tail(&oss->lqo_oss_list, &temposs->lqo_oss_list);
 
         lov->lov_qos.lq_dirty = 1;
         list_add_tail(&oss->lqo_oss_list, &temposs->lqo_oss_list);
 
         lov->lov_qos.lq_dirty = 1;
-        lov->lov_qos.lq_dirty_rr = 1;
+        lov->lov_qos.lq_rr.lqr_dirty = 1;
 
         CDEBUG(D_QOS, "add tgt %s to OSS %s (%d OSTs)\n",
                obd_uuid2str(&lov->lov_tgts[index]->ltd_uuid),
 
         CDEBUG(D_QOS, "add tgt %s to OSS %s (%d OSTs)\n",
                obd_uuid2str(&lov->lov_tgts[index]->ltd_uuid),
@@ -146,7 +146,7 @@ int qos_del_tgt(struct obd_device *obd, __u32 index)
         }
 
         lov->lov_qos.lq_dirty = 1;
         }
 
         lov->lov_qos.lq_dirty = 1;
-        lov->lov_qos.lq_dirty_rr = 1;
+        lov->lov_qos.lq_rr.lqr_dirty = 1;
 out:
         up_write(&lov->lov_qos.lq_rw_sem);
         RETURN(rc);
 out:
         up_write(&lov->lov_qos.lq_rw_sem);
         RETURN(rc);
@@ -268,10 +268,11 @@ static int qos_calc_weight(struct lov_obd *lov, int i)
 }
 
 /* We just used this index for a stripe; adjust everyone's weights */
 }
 
 /* We just used this index for a stripe; adjust everyone's weights */
-static int qos_used(struct lov_obd *lov, __u32 index, __u64 *total_wt)
+static int qos_used(struct lov_obd *lov, struct ost_pool *osts,
+                    __u32 index, __u64 *total_wt)
 {
         struct lov_qos_oss *oss;
 {
         struct lov_qos_oss *oss;
-        int i;
+        int j;
         ENTRY;
 
         /* Don't allocate from this stripe anymore, until the next alloc_qos */
         ENTRY;
 
         /* Don't allocate from this stripe anymore, until the next alloc_qos */
@@ -301,7 +302,10 @@ static int qos_used(struct lov_obd *lov, __u32 index, __u64 *total_wt)
 
         *total_wt = 0;
         /* Decrease all OST penalties */
 
         *total_wt = 0;
         /* Decrease all OST penalties */
-        for (i = 0; i < lov->desc.ld_tgt_count; i++) {
+        for (j = 0; j < osts->op_count; j++) {
+                int i;
+
+                i = osts->op_array[j];
                 if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_active)
                         continue;
                 if (lov->lov_tgts[i]->ltd_qos.ltq_penalty <
                 if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_active)
                         continue;
                 if (lov->lov_tgts[i]->ltd_qos.ltq_penalty <
@@ -318,10 +322,11 @@ static int qos_used(struct lov_obd *lov, __u32 index, __u64 *total_wt)
                         *total_wt += lov->lov_tgts[i]->ltd_qos.ltq_weight;
 
 #ifdef QOS_DEBUG
                         *total_wt += lov->lov_tgts[i]->ltd_qos.ltq_weight;
 
 #ifdef QOS_DEBUG
-                CDEBUG(D_QOS, "recalc tgt %d avail="LPU64
+                CDEBUG(D_QOS, "recalc tgt %d usable=%d avail="LPU64
                        " ostppo="LPU64" ostp="LPU64" ossppo="LPU64
                        " ossp="LPU64" wt="LPU64"\n",
                        " ostppo="LPU64" ostp="LPU64" ossppo="LPU64
                        " ossp="LPU64" wt="LPU64"\n",
-                       i, TGT_BAVAIL(i) >> 10,
+                       i, lov->lov_tgts[i]->ltd_qos.ltq_usable,
+                       TGT_BAVAIL(i) >> 10,
                        lov->lov_tgts[i]->ltd_qos.ltq_penalty_per_obj >> 10,
                        lov->lov_tgts[i]->ltd_qos.ltq_penalty >> 10,
                        lov->lov_tgts[i]->ltd_qos.ltq_oss->lqo_penalty_per_obj>>10,
                        lov->lov_tgts[i]->ltd_qos.ltq_penalty_per_obj >> 10,
                        lov->lov_tgts[i]->ltd_qos.ltq_penalty >> 10,
                        lov->lov_tgts[i]->ltd_qos.ltq_oss->lqo_penalty_per_obj>>10,
@@ -335,15 +340,16 @@ static int qos_used(struct lov_obd *lov, __u32 index, __u64 *total_wt)
 
 #define LOV_QOS_EMPTY ((__u32)-1)
 /* compute optimal round-robin order, based on OSTs per OSS */
 
 #define LOV_QOS_EMPTY ((__u32)-1)
 /* compute optimal round-robin order, based on OSTs per OSS */
-static int qos_calc_rr(struct lov_obd *lov)
+static int qos_calc_rr(struct lov_obd *lov, struct ost_pool *src_pool,
+                       struct lov_qos_rr *lqr)
 {
         struct lov_qos_oss *oss;
 {
         struct lov_qos_oss *oss;
-        unsigned ost_count, placed, real_count;
-        int i;
+        unsigned placed, real_count;
+        int i, rc;
         ENTRY;
 
         ENTRY;
 
-        if (!lov->lov_qos.lq_dirty_rr) {
-                LASSERT(lov->lov_qos.lq_rr_size);
+        if (!lqr->lqr_dirty) {
+                LASSERT(lqr->lqr_pool.op_size);
                 RETURN(0);
         }
 
                 RETURN(0);
         }
 
@@ -354,54 +360,45 @@ static int qos_calc_rr(struct lov_obd *lov)
          * Check again. While we were sleeping on @lq_rw_sem something could
          * change.
          */
          * Check again. While we were sleeping on @lq_rw_sem something could
          * change.
          */
-        if (!lov->lov_qos.lq_dirty_rr) {
-                LASSERT(lov->lov_qos.lq_rr_size);
+        if (!lqr->lqr_dirty) {
+                LASSERT(lqr->lqr_pool.op_size);
                 up_write(&lov->lov_qos.lq_rw_sem);
                 RETURN(0);
         }
 
                 up_write(&lov->lov_qos.lq_rw_sem);
                 RETURN(0);
         }
 
-        ost_count = lov->desc.ld_tgt_count;
-
-        if (lov->lov_qos.lq_rr_size)
-                OBD_FREE(lov->lov_qos.lq_rr_array, lov->lov_qos.lq_rr_size);
-        lov->lov_qos.lq_rr_size = ost_count *
-                sizeof(lov->lov_qos.lq_rr_array[0]);
-        OBD_ALLOC(lov->lov_qos.lq_rr_array, lov->lov_qos.lq_rr_size);
-        if (!lov->lov_qos.lq_rr_array) {
-                lov->lov_qos.lq_rr_size = 0;
+        if (lqr->lqr_pool.op_size)
+                lov_ost_pool_free(&lqr->lqr_pool);
+        rc = lov_ost_pool_init(&lqr->lqr_pool, src_pool->op_count);
+        if (rc) {
                 up_write(&lov->lov_qos.lq_rw_sem);
                 up_write(&lov->lov_qos.lq_rw_sem);
-                RETURN(-ENOMEM);
+                RETURN(rc);
         }
 
         }
 
-        real_count = 0;
-        for (i = 0; i < ost_count; i++) {
-                lov->lov_qos.lq_rr_array[i] = LOV_QOS_EMPTY;
-                if (lov->lov_tgts[i])
-                        real_count++;
-        }
+        for (i = 0; i < src_pool->op_count; i++)
+                lqr->lqr_pool.op_array[i] = LOV_QOS_EMPTY;
+        lqr->lqr_pool.op_count = src_pool->op_count;
 
         /* Place all the OSTs from 1 OSS at the same time. */
 
         /* Place all the OSTs from 1 OSS at the same time. */
+        real_count = lqr->lqr_pool.op_count;
         placed = 0;
         list_for_each_entry(oss, &lov->lov_qos.lq_oss_list, lqo_oss_list) {
                 int j = 0;
         placed = 0;
         list_for_each_entry(oss, &lov->lov_qos.lq_oss_list, lqo_oss_list) {
                 int j = 0;
-                for (i = 0; i < ost_count; i++) {
-                        if (lov->lov_tgts[i] &&
-                            lov->lov_tgts[i]->ltd_qos.ltq_oss == oss) {
+                for (i = 0; i < lqr->lqr_pool.op_count; i++) {
+                        if (lov->lov_tgts[src_pool->op_array[i]] &&
+                            (lov->lov_tgts[src_pool->op_array[i]]->ltd_qos.ltq_oss == oss)) {
                               /* Evenly space these OSTs across arrayspace */
                               /* Evenly space these OSTs across arrayspace */
-                              int next = j * ost_count / oss->lqo_ost_count;
-                              LASSERT(next < ost_count);
-                              while (lov->lov_qos.lq_rr_array[next] !=
+                              int next = j * lqr->lqr_pool.op_count / oss->lqo_ost_count;
+                              while (lqr->lqr_pool.op_array[next] !=
                                      LOV_QOS_EMPTY)
                                      LOV_QOS_EMPTY)
-                                      next = (next + 1) % ost_count;
-                              lov->lov_qos.lq_rr_array[next] = i;
+                                        next = (next + 1) % lqr->lqr_pool.op_count;
+                              lqr->lqr_pool.op_array[next] = src_pool->op_array[i];
                               j++;
                               placed++;
                         }
                 }
                               j++;
                               placed++;
                         }
                 }
-                LASSERT(j == oss->lqo_ost_count);
         }
 
         }
 
-        lov->lov_qos.lq_dirty_rr = 0;
+        lqr->lqr_dirty = 0;
         up_write(&lov->lov_qos.lq_rw_sem);
 
         if (placed != real_count) {
         up_write(&lov->lov_qos.lq_rw_sem);
 
         if (placed != real_count) {
@@ -409,18 +406,18 @@ static int qos_calc_rr(struct lov_obd *lov)
                 LCONSOLE_ERROR_MSG(0x14e, "Failed to place all OSTs in the "
                                    "round-robin list (%d of %d).\n",
                                    placed, real_count);
                 LCONSOLE_ERROR_MSG(0x14e, "Failed to place all OSTs in the "
                                    "round-robin list (%d of %d).\n",
                                    placed, real_count);
-                for (i = 0; i < ost_count; i++) {
+                for (i = 0; i < lqr->lqr_pool.op_count; i++) {
                         LCONSOLE(D_WARNING, "rr #%d ost idx=%d\n", i,
                         LCONSOLE(D_WARNING, "rr #%d ost idx=%d\n", i,
-                                 lov->lov_qos.lq_rr_array[i]);
+                                 lqr->lqr_pool.op_array[i]);
                 }
                 }
-                lov->lov_qos.lq_dirty_rr = 1;
+                lqr->lqr_dirty = 1;
                 RETURN(-EAGAIN);
         }
 
 #ifdef QOS_DEBUG
                 RETURN(-EAGAIN);
         }
 
 #ifdef QOS_DEBUG
-        for (i = 0; i < ost_count; i++) {
+        for (i = 0; i < lqr->lqr_pool.op_count; i++) {
                 LCONSOLE(D_QOS, "rr #%d ost idx=%d\n", i,
                 LCONSOLE(D_QOS, "rr #%d ost idx=%d\n", i,
-                         lov->lov_qos.lq_rr_array[i]);
+                         lqr->lqr_pool.op_array[i]);
         }
 #endif
 
         }
 #endif
 
@@ -519,54 +516,70 @@ static int min_stripe_count(int stripe_cnt, int flags)
 #define LOV_CREATE_RESEED_MIN  1000
 /* Allocate objects on osts with round-robin algorithm */
 static int alloc_rr(struct lov_obd *lov, int *idx_arr, int *stripe_cnt,
 #define LOV_CREATE_RESEED_MIN  1000
 /* Allocate objects on osts with round-robin algorithm */
 static int alloc_rr(struct lov_obd *lov, int *idx_arr, int *stripe_cnt,
-                    int flags)
+                    char *poolname, int flags)
 {
 {
-        unsigned array_idx, ost_count = lov->desc.ld_tgt_count;
-        unsigned ost_active_count = lov->desc.ld_active_tgt_count;
+        unsigned array_idx;
         int i, *idx_pos;
         __u32 ost_idx;
         int ost_start_idx_temp;
         int speed = 0;
         int stripe_cnt_min = min_stripe_count(*stripe_cnt, flags);
         int i, *idx_pos;
         __u32 ost_idx;
         int ost_start_idx_temp;
         int speed = 0;
         int stripe_cnt_min = min_stripe_count(*stripe_cnt, flags);
+        struct pool_desc *pool;
+        struct ost_pool *osts;
+        struct lov_qos_rr *lqr;
         ENTRY;
 
         ENTRY;
 
-        i = qos_calc_rr(lov);
-        if (i)
+        pool = lov_find_pool(lov, poolname);
+        if (pool == NULL) {
+                osts = &(lov->lov_packed);
+                lqr = &(lov->lov_qos.lq_rr);
+        } else {
+                read_lock(&pool_tgt_rwlock(pool));
+                osts = &(pool->pool_obds);
+                lqr = &(pool->pool_rr);
+        }
+
+        i = qos_calc_rr(lov, osts, lqr);
+        if (i) {
+                if (pool != NULL)
+                        read_unlock(&pool_tgt_rwlock(pool));
                 RETURN(i);
                 RETURN(i);
+        }
 
 
-        if (--lov->lov_start_count <= 0) {
-                lov->lov_start_idx = ll_rand() % ost_count;
-                lov->lov_start_count =
-                        (LOV_CREATE_RESEED_MIN / max(ost_active_count, 1U) +
-                         LOV_CREATE_RESEED_MULT) * max(ost_active_count, 1U);
-        } else if (stripe_cnt_min >= ost_active_count ||
-                   lov->lov_start_idx > ost_count) {
+        if (--lqr->lqr_start_count <= 0) {
+                lqr->lqr_start_idx = ll_rand() % osts->op_count;
+                lqr->lqr_start_count =
+                        (LOV_CREATE_RESEED_MIN / max(osts->op_count, 1U) +
+                         LOV_CREATE_RESEED_MULT) * max(osts->op_count, 1U);
+        } else if (stripe_cnt_min >= osts->op_count ||
+                   lqr->lqr_start_idx > osts->op_count) {
                 /* If we have allocated from all of the OSTs, slowly
                  * precess the next start if the OST/stripe count isn't
                  * already doing this for us. */
                 /* If we have allocated from all of the OSTs, slowly
                  * precess the next start if the OST/stripe count isn't
                  * already doing this for us. */
-                lov->lov_start_idx %= ost_count;
-                if (*stripe_cnt > 1 && (ost_active_count % (*stripe_cnt)) != 1)
-                        ++lov->lov_offset_idx;
+                lqr->lqr_start_idx %= osts->op_count;
+                if (*stripe_cnt > 1 && (osts->op_count % (*stripe_cnt)) != 1)
+                        ++lqr->lqr_offset_idx;
         }
         down_read(&lov->lov_qos.lq_rw_sem);
         }
         down_read(&lov->lov_qos.lq_rw_sem);
-        ost_start_idx_temp = lov->lov_start_idx;
+        ost_start_idx_temp = lqr->lqr_start_idx;
 
 repeat_find:
 
 repeat_find:
-        array_idx = (lov->lov_start_idx + lov->lov_offset_idx) % ost_count;
+        array_idx = (lqr->lqr_start_idx + lqr->lqr_offset_idx) % osts->op_count;
         idx_pos = idx_arr;
 #ifdef QOS_DEBUG
         idx_pos = idx_arr;
 #ifdef QOS_DEBUG
-        CDEBUG(D_QOS, "want %d startidx %d startcnt %d offset %d active %d "
-               "count %d arrayidx %d\n",
-               stripe_cnt, lov->lov_start_idx, lov->lov_start_count,
-               lov->lov_offset_idx, ost_active_count, ost_count, array_idx);
+        CDEBUG(D_QOS, "pool '%s' want %d startidx %d startcnt %d offset %d "
+               "active %d count %d arrayidx %d\n", poolname,
+               *stripe_cnt, lqr->lqr_start_idx, lqr->lqr_start_count,
+               lqr->lqr_offset_idx, osts->op_count, osts->op_count, array_idx);
 #endif
 
 #endif
 
-        for (i = 0; i < ost_count; i++, array_idx=(array_idx + 1) % ost_count) {
-                ++lov->lov_start_idx;
-                ost_idx = lov->lov_qos.lq_rr_array[array_idx];
+        for (i = 0; i < osts->op_count;
+                    i++, array_idx=(array_idx + 1) % osts->op_count) {
+                ++lqr->lqr_start_idx;
+                ost_idx = lqr->lqr_pool.op_array[array_idx];
 #ifdef QOS_DEBUG
                 CDEBUG(D_QOS, "#%d strt %d act %d strp %d ary %d idx %d\n",
 #ifdef QOS_DEBUG
                 CDEBUG(D_QOS, "#%d strt %d act %d strp %d ary %d idx %d\n",
-                       i, lov->lov_start_idx,
+                       i, lqr->lqr_start_idx,
                        ((ost_idx != LOV_QOS_EMPTY) && lov->lov_tgts[ost_idx]) ?
                        lov->lov_tgts[ost_idx]->ltd_active : 0,
                        idx_pos - idx_arr, array_idx, ost_idx);
                        ((ost_idx != LOV_QOS_EMPTY) && lov->lov_tgts[ost_idx]) ?
                        lov->lov_tgts[ost_idx]->ltd_active : 0,
                        idx_pos - idx_arr, array_idx, ost_idx);
@@ -593,10 +606,13 @@ repeat_find:
         if ((speed < 2) && (idx_pos - idx_arr < stripe_cnt_min)) {
                 /* Try again, allowing slower OSCs */
                 speed++;
         if ((speed < 2) && (idx_pos - idx_arr < stripe_cnt_min)) {
                 /* Try again, allowing slower OSCs */
                 speed++;
-                lov->lov_start_idx = ost_start_idx_temp;
+                lqr->lqr_start_idx = ost_start_idx_temp;
                 goto repeat_find;
         }
 
                 goto repeat_find;
         }
 
+        if (pool != NULL)
+                read_unlock(&pool_tgt_rwlock(pool));
+
         up_read(&lov->lov_qos.lq_rw_sem);
 
         *stripe_cnt = idx_pos - idx_arr;
         up_read(&lov->lov_qos.lq_rw_sem);
 
         *stripe_cnt = idx_pos - idx_arr;
@@ -607,15 +623,45 @@ repeat_find:
 static int alloc_specific(struct lov_obd *lov, struct lov_stripe_md *lsm,
                           int *idx_arr)
 {
 static int alloc_specific(struct lov_obd *lov, struct lov_stripe_md *lsm,
                           int *idx_arr)
 {
-        unsigned ost_idx, ost_count = lov->desc.ld_tgt_count;
+        unsigned ost_idx, array_idx, ost_count;
         int i, *idx_pos;
         int speed = 0;
         int i, *idx_pos;
         int speed = 0;
+        struct pool_desc *pool = NULL;
+        struct ost_pool *osts;
         ENTRY;
 
         ENTRY;
 
+        pool = lov_find_pool(lov, lsm->lsm_pool_name);
+        if (pool == NULL) {
+                osts = &(lov->lov_packed);
+        } else {
+                read_lock(&pool_tgt_rwlock(pool));
+                osts = &(pool->pool_obds);
+        }
+
+        ost_count = osts->op_count;
+
 repeat_find:
 repeat_find:
-        ost_idx = lsm->lsm_oinfo[0]->loi_ost_idx;
+        /* search loi_ost_idx in ost array */
+        array_idx = 0;
+        for (i = 0; i < ost_count; i++) {
+                if (osts->op_array[i] == lsm->lsm_oinfo[0]->loi_ost_idx) {
+                        array_idx = i;
+                        break;
+                }
+        }
+        if (i == ost_count) {
+                if (pool != NULL)
+                        read_unlock(&pool_tgt_rwlock(pool));
+                CERROR("Start index %d not found in pool '%s'\n",
+                       lsm->lsm_oinfo[0]->loi_ost_idx, lsm->lsm_pool_name);
+                RETURN(-EINVAL);
+        }
+
         idx_pos = idx_arr;
         idx_pos = idx_arr;
-        for (i = 0; i < ost_count; i++, ost_idx = (ost_idx + 1) % ost_count) {
+        for (i = 0; i < ost_count;
+             i++, array_idx = (array_idx + 1) % ost_count) {
+                ost_idx = osts->op_array[array_idx];
+
                 if (!lov->lov_tgts[ost_idx] ||
                     !lov->lov_tgts[ost_idx]->ltd_active) {
                         continue;
                 if (!lov->lov_tgts[ost_idx] ||
                     !lov->lov_tgts[ost_idx]->ltd_active) {
                         continue;
@@ -634,8 +680,11 @@ repeat_find:
                 *idx_pos = ost_idx;
                 idx_pos++;
                 /* We have enough stripes */
                 *idx_pos = ost_idx;
                 idx_pos++;
                 /* We have enough stripes */
-                if (idx_pos - idx_arr == lsm->lsm_stripe_count)
+                if (idx_pos - idx_arr == lsm->lsm_stripe_count) {
+                        if (pool != NULL)
+                                read_unlock(&pool_tgt_rwlock(pool));
                         RETURN(0);
                         RETURN(0);
+                }
         }
         if (speed < 2) {
                 /* Try again, allowing slower OSCs */
         }
         if (speed < 2) {
                 /* Try again, allowing slower OSCs */
@@ -652,6 +701,10 @@ repeat_find:
         CERROR("can't lstripe objid "LPX64": have %d want %u\n",
                lsm->lsm_object_id, (int)(idx_pos - idx_arr),
                lsm->lsm_stripe_count);
         CERROR("can't lstripe objid "LPX64": have %d want %u\n",
                lsm->lsm_object_id, (int)(idx_pos - idx_arr),
                lsm->lsm_stripe_count);
+
+        if (pool != NULL)
+                read_unlock(&pool_tgt_rwlock(pool));
+
         RETURN(-EFBIG);
 }
 
         RETURN(-EFBIG);
 }
 
@@ -660,20 +713,32 @@ repeat_find:
    - network resources (shared OSS's)
 */
 static int alloc_qos(struct obd_export *exp, int *idx_arr, int *stripe_cnt,
    - network resources (shared OSS's)
 */
 static int alloc_qos(struct obd_export *exp, int *idx_arr, int *stripe_cnt,
-                     int flags)
+                     char *poolname, int flags)
 {
         struct lov_obd *lov = &exp->exp_obd->u.lov;
         static time_t last_warn = 0;
         time_t now = cfs_time_current_sec();
         __u64 total_bavail, total_weight = 0;
 {
         struct lov_obd *lov = &exp->exp_obd->u.lov;
         static time_t last_warn = 0;
         time_t now = cfs_time_current_sec();
         __u64 total_bavail, total_weight = 0;
-        __u32 ost_count;
         int nfound, good_osts, i, warn = 0, rc = 0;
         int stripe_cnt_min = min_stripe_count(*stripe_cnt, flags);
         int nfound, good_osts, i, warn = 0, rc = 0;
         int stripe_cnt_min = min_stripe_count(*stripe_cnt, flags);
+        struct pool_desc *pool;
+        struct ost_pool *osts;
+        struct lov_qos_rr *lqr;
         ENTRY;
 
         if (stripe_cnt_min < 1)
                 GOTO(out_nolock, rc = -EINVAL);
 
         ENTRY;
 
         if (stripe_cnt_min < 1)
                 GOTO(out_nolock, rc = -EINVAL);
 
+        pool = lov_find_pool(lov, poolname);
+        if (pool == NULL) {
+                osts = &(lov->lov_packed);
+                lqr = &(lov->lov_qos.lq_rr);
+        } else {
+                read_lock(&pool_tgt_rwlock(pool));
+                osts = &(pool->pool_obds);
+                lqr = &(pool->pool_rr);
+        }
+
         lov_getref(exp->exp_obd);
 
         /* Detect -EAGAIN early, before expensive lock is taken. */
         lov_getref(exp->exp_obd);
 
         /* Detect -EAGAIN early, before expensive lock is taken. */
@@ -690,8 +755,6 @@ static int alloc_qos(struct obd_export *exp, int *idx_arr, int *stripe_cnt,
         if (!lov->lov_qos.lq_dirty && lov->lov_qos.lq_same_space)
                 GOTO(out, rc = -EAGAIN);
 
         if (!lov->lov_qos.lq_dirty && lov->lov_qos.lq_same_space)
                 GOTO(out, rc = -EAGAIN);
 
-        ost_count = lov->desc.ld_tgt_count;
-
         if (lov->desc.ld_active_tgt_count < 2)
                 GOTO(out, rc = -EAGAIN);
 
         if (lov->desc.ld_active_tgt_count < 2)
                 GOTO(out, rc = -EAGAIN);
 
@@ -705,24 +768,25 @@ static int alloc_qos(struct obd_export *exp, int *idx_arr, int *stripe_cnt,
         if (cfs_time_sub(now, last_warn) > 60 * 30)
                 warn = 1;
         /* Find all the OSTs that are valid stripe candidates */
         if (cfs_time_sub(now, last_warn) > 60 * 30)
                 warn = 1;
         /* Find all the OSTs that are valid stripe candidates */
-        for (i = 0; i < ost_count; i++) {
+        for (i = 0; i < osts->op_count; i++) {
                 __u64 bavail;
 
                 __u64 bavail;
 
-                if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_active)
+                if (!lov->lov_tgts[osts->op_array[i]] ||
+                    !lov->lov_tgts[osts->op_array[i]]->ltd_active)
                         continue;
                         continue;
-                bavail = TGT_BAVAIL(i);
+                bavail = TGT_BAVAIL(osts->op_array[i]);
                 if (!bavail) {
                         if (warn) {
                                 CDEBUG(D_QOS, "no free space on %s\n",
                 if (!bavail) {
                         if (warn) {
                                 CDEBUG(D_QOS, "no free space on %s\n",
-                                     obd_uuid2str(&lov->lov_tgts[i]->ltd_uuid));
+                                     obd_uuid2str(&lov->lov_tgts[osts->op_array[i]]->ltd_uuid));
                                 last_warn = now;
                         }
                         continue;
                 }
                                 last_warn = now;
                         }
                         continue;
                 }
-                if (!TGT_FFREE(i)) {
+                if (!TGT_FFREE(osts->op_array[i])) {
                         if (warn) {
                                 CDEBUG(D_QOS, "no free inodes on %s\n",
                         if (warn) {
                                 CDEBUG(D_QOS, "no free inodes on %s\n",
-                                     obd_uuid2str(&lov->lov_tgts[i]->ltd_uuid));
+                                     obd_uuid2str(&lov->lov_tgts[osts->op_array[i]]->ltd_uuid));
                                 last_warn = now;
                         }
                         continue;
                                 last_warn = now;
                         }
                         continue;
@@ -730,20 +794,24 @@ static int alloc_qos(struct obd_export *exp, int *idx_arr, int *stripe_cnt,
 
                 /* Fail Check before osc_precreate() is called
                    so we can only 'fail' single OSC. */
 
                 /* Fail Check before osc_precreate() is called
                    so we can only 'fail' single OSC. */
-                if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OSC_PRECREATE) && i == 0)
+                if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OSC_PRECREATE) && osts->op_array[i] == 0)
                         continue;
 
                         continue;
 
-                if (obd_precreate(lov->lov_tgts[i]->ltd_exp) > 2)
+                if (obd_precreate(lov->lov_tgts[osts->op_array[i]]->ltd_exp) > 2)
                         continue;
 
                         continue;
 
-                lov->lov_tgts[i]->ltd_qos.ltq_usable = 1;
-                qos_calc_weight(lov, i);
+                lov->lov_tgts[osts->op_array[i]]->ltd_qos.ltq_usable = 1;
+                qos_calc_weight(lov, osts->op_array[i]);
                 total_bavail += bavail;
                 total_bavail += bavail;
-                total_weight += lov->lov_tgts[i]->ltd_qos.ltq_weight;
+                total_weight += lov->lov_tgts[osts->op_array[i]]->ltd_qos.ltq_weight;
 
                 good_osts++;
         }
 
 
                 good_osts++;
         }
 
+#ifdef QOS_DEBUG
+        CDEBUG(D_QOS, "found %d good osts\n", good_osts);
+#endif
+
         if (good_osts < stripe_cnt_min)
                 GOTO(out, rc = -EAGAIN);
 
         if (good_osts < stripe_cnt_min)
                 GOTO(out, rc = -EAGAIN);
 
@@ -792,19 +860,24 @@ static int alloc_qos(struct obd_export *exp, int *idx_arr, int *stripe_cnt,
 
                 /* On average, this will hit larger-weighted osts more often.
                    0-weight osts will always get used last (only when rand=0).*/
 
                 /* On average, this will hit larger-weighted osts more often.
                    0-weight osts will always get used last (only when rand=0).*/
-                for (i = 0; i < ost_count; i++) {
-                        if (!lov->lov_tgts[i] ||
-                            !lov->lov_tgts[i]->ltd_qos.ltq_usable)
+                for (i = 0; i < osts->op_count; i++) {
+                        if (!lov->lov_tgts[osts->op_array[i]] ||
+                            !lov->lov_tgts[osts->op_array[i]]->ltd_qos.ltq_usable)
                                 continue;
 
                                 continue;
 
-                        cur_weight += lov->lov_tgts[i]->ltd_qos.ltq_weight;
+                        cur_weight += lov->lov_tgts[osts->op_array[i]]->ltd_qos.ltq_weight;
+#ifdef QOS_DEBUG
+                        CDEBUG(D_QOS, "stripe_cnt=%d nfound=%d cur_weight="LPU64
+                                      " rand="LPU64" total_weight="LPU64"\n",
+                               *stripe_cnt, nfound, cur_weight, rand, total_weight);
+#endif
                         if (cur_weight >= rand) {
 #ifdef QOS_DEBUG
                                 CDEBUG(D_QOS, "assigned stripe=%d to idx=%d\n",
                         if (cur_weight >= rand) {
 #ifdef QOS_DEBUG
                                 CDEBUG(D_QOS, "assigned stripe=%d to idx=%d\n",
-                                       nfound, i);
+                                       nfound, osts->op_array[i]);
 #endif
 #endif
-                                idx_arr[nfound++] = i;
-                                qos_used(lov, i, &total_weight);
+                                idx_arr[nfound++] = osts->op_array[i];
+                                qos_used(lov, osts, osts->op_array[i], &total_weight);
                                 rc = 0;
                                 break;
                         }
                                 rc = 0;
                                 break;
                         }
@@ -818,11 +891,14 @@ static int alloc_qos(struct obd_export *exp, int *idx_arr, int *stripe_cnt,
         LASSERT(nfound == *stripe_cnt);
 
 out:
         LASSERT(nfound == *stripe_cnt);
 
 out:
+        if (pool != NULL)
+                read_unlock(&pool_tgt_rwlock(pool));
+
         up_write(&lov->lov_qos.lq_rw_sem);
 
 out_nolock:
         if (rc == -EAGAIN)
         up_write(&lov->lov_qos.lq_rw_sem);
 
 out_nolock:
         if (rc == -EAGAIN)
-                rc = alloc_rr(lov, idx_arr, stripe_cnt, flags);
+                rc = alloc_rr(lov, idx_arr, stripe_cnt, poolname, flags);
 
         lov_putref(exp->exp_obd);
         RETURN(rc);
 
         lov_putref(exp->exp_obd);
         RETURN(rc);
@@ -847,7 +923,8 @@ static int alloc_idx_array(struct obd_export *exp, struct lov_stripe_md *lsm,
 
         if (newea ||
             lsm->lsm_oinfo[0]->loi_ost_idx >= lov->desc.ld_tgt_count)
 
         if (newea ||
             lsm->lsm_oinfo[0]->loi_ost_idx >= lov->desc.ld_tgt_count)
-                rc = alloc_qos(exp, tmp_arr, &stripe_cnt, flags);
+                rc = alloc_qos(exp, tmp_arr, &stripe_cnt,
+                               lsm->lsm_pool_name, flags);
         else
                 rc = alloc_specific(lov, lsm, tmp_arr);
 
         else
                 rc = alloc_specific(lov, lsm, tmp_arr);
 
index 182a8b7..d05c902 100644 (file)
@@ -265,12 +265,11 @@ static int mdd_lov_set_dir_md(const struct lu_env *env,
         LASSERT(S_ISDIR(mdd_object_type(obj)));
         lum = (struct lov_user_md*)buf->lb_buf;
 
         LASSERT(S_ISDIR(mdd_object_type(obj)));
         lum = (struct lov_user_md*)buf->lb_buf;
 
-        /* if { size, offset, count } = { 0, -1, 0 } (i.e. all default
+        /* if { size, offset, count } = { 0, -1, 0 } and no pool (i.e. all default
          * values specified) then delete default striping from dir. */
          * values specified) then delete default striping from dir. */
-        if ((lum->lmm_stripe_size == 0 && lum->lmm_stripe_count == 0 &&
-             lum->lmm_stripe_offset == (typeof(lum->lmm_stripe_offset))(-1)) ||
-             /* lmm_stripe_size == -1 is deprecated in 1.4.6 */
-             lum->lmm_stripe_size == (typeof(lum->lmm_stripe_size))(-1)){
+        if (lum->lmm_stripe_size == 0 && lum->lmm_stripe_count == 0 &&
+            lum->lmm_stripe_offset == (typeof(lum->lmm_stripe_offset))(-1) &&
+            lum->lmm_magic != LOV_USER_MAGIC_V3) {
                 rc = mdd_xattr_set_txn(env, obj, &LU_BUF_NULL,
                                        MDS_LOV_MD_NAME, 0, handle);
                 if (rc == -ENODATA)
                 rc = mdd_xattr_set_txn(env, obj, &LU_BUF_NULL,
                                        MDS_LOV_MD_NAME, 0, handle);
                 if (rc == -ENODATA)
@@ -324,7 +323,7 @@ int mdd_lov_set_md(const struct lu_env *env, struct mdd_object *pobj,
                 if (lmmp == NULL && lmm_size == 0) {
                         struct mdd_device *mdd = mdd_obj2mdd_dev(child);
                         struct lov_mds_md *lmm = mdd_max_lmm_get(env, mdd);
                 if (lmmp == NULL && lmm_size == 0) {
                         struct mdd_device *mdd = mdd_obj2mdd_dev(child);
                         struct lov_mds_md *lmm = mdd_max_lmm_get(env, mdd);
-                        int size = sizeof(*lmm);
+                        int size = sizeof(struct lov_mds_md_v3);
 
                         /* Get parent dir stripe and set */
                         if (pobj != NULL)
 
                         /* Get parent dir stripe and set */
                         if (pobj != NULL)
@@ -362,15 +361,21 @@ static void mdd_lov_update_objids(struct obd_device *obd, struct lov_mds_md *lmm
 {
         struct mds_obd *mds = &obd->u.mds;
         int j;
 {
         struct mds_obd *mds = &obd->u.mds;
         int j;
+        struct lov_ost_data_v1 *lmm_objects;
         ENTRY;
 
         /* if we create file without objects - lmm is NULL */
         if (lmm == NULL)
                 return;
 
         ENTRY;
 
         /* if we create file without objects - lmm is NULL */
         if (lmm == NULL)
                 return;
 
+        if (le32_to_cpu(lmm->lmm_magic) == LOV_MAGIC_V3)
+                lmm_objects = ((struct lov_mds_md_v3 *)lmm)->lmm_objects;
+        else
+                lmm_objects = lmm->lmm_objects;
+
         for (j = 0; j < le32_to_cpu(lmm->lmm_stripe_count); j++) {
         for (j = 0; j < le32_to_cpu(lmm->lmm_stripe_count); j++) {
-                int i = le32_to_cpu(lmm->lmm_objects[j].l_ost_idx);
-                obd_id id = le64_to_cpu(lmm->lmm_objects[j].l_object_id);
+                int i = le32_to_cpu(lmm_objects[j].l_ost_idx);
+                obd_id id = le64_to_cpu(lmm_objects[j].l_object_id);
                 int page = i / OBJID_PER_PAGE();
                 int idx = i % OBJID_PER_PAGE();
                 obd_id *data = mds->mds_lov_page_array[page];
                 int page = i / OBJID_PER_PAGE();
                 int idx = i % OBJID_PER_PAGE();
                 obd_id *data = mds->mds_lov_page_array[page];
index 743762f..01ab561 100644 (file)
@@ -121,7 +121,10 @@ int mdd_log_txn_param_build(const struct lu_env *env, struct md_object *obj,
         if (rc || !(ma->ma_valid & MA_LOV))
                 RETURN(rc);
 
         if (rc || !(ma->ma_valid & MA_LOV))
                 RETURN(rc);
 
-        LASSERT(le32_to_cpu(ma->ma_lmm->lmm_magic) == LOV_MAGIC);
+        LASSERTF(le32_to_cpu(ma->ma_lmm->lmm_magic) == LOV_MAGIC_V1 ||
+                 le32_to_cpu(ma->ma_lmm->lmm_magic) == LOV_MAGIC_V3,
+                 "%08x", le32_to_cpu(ma->ma_lmm->lmm_magic));
+
         if ((int)le32_to_cpu(ma->ma_lmm->lmm_stripe_count) < 0)
                 stripe = mdd2obd_dev(mdd)->u.mds.mds_lov_desc.ld_tgt_count;
         else
         if ((int)le32_to_cpu(ma->ma_lmm->lmm_stripe_count) < 0)
                 stripe = mdd2obd_dev(mdd)->u.mds.mds_lov_desc.ld_tgt_count;
         else
index 2c37cc5..ee256b2 100644 (file)
@@ -410,7 +410,7 @@ static int mds_cmd_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
         if (rc)
                 GOTO(err_objects, rc);
 
         if (rc)
                 GOTO(err_objects, rc);
 
-        mds->mds_max_mdsize = sizeof(struct lov_mds_md);
+        mds->mds_max_mdsize = sizeof(struct lov_mds_md_v3);
         mds->mds_max_cookiesize = sizeof(struct llog_cookie);
 
 err_pop:
         mds->mds_max_cookiesize = sizeof(struct llog_cookie);
 
 err_pop:
index 4d30fe3..9cf0e71 100644 (file)
@@ -64,7 +64,8 @@ int mds_post_mds_lovconf(struct obd_device *obd);
 int mds_notify(struct obd_device *obd, struct obd_device *watched,
                enum obd_notify_event ev, void *data);
 int mds_convert_lov_ea(struct obd_device *obd, struct inode *inode,
 int mds_notify(struct obd_device *obd, struct obd_device *watched,
                enum obd_notify_event ev, void *data);
 int mds_convert_lov_ea(struct obd_device *obd, struct inode *inode,
-                       struct lov_mds_md *lmm, int lmm_size);
+                       struct lov_mds_md *lmm, int lmm_size,
+                       __u64 connect_flags);
 int mds_init_lov_desc(struct obd_device *obd, struct obd_export *osc_exp);
 
 int mds_obd_create(struct obd_export *exp, struct obdo *oa,
 int mds_init_lov_desc(struct obd_device *obd, struct obd_export *osc_exp);
 
 int mds_obd_create(struct obd_export *exp, struct obdo *oa,
index 1f0f995..07444ac 100644 (file)
@@ -367,7 +367,7 @@ static int mds_lov_update_desc(struct obd_device *obd, struct obd_export *lov)
         stripes = min_t(__u32, LOV_MAX_STRIPE_COUNT,
                                mds->mds_lov_desc.ld_tgt_count);
 
         stripes = min_t(__u32, LOV_MAX_STRIPE_COUNT,
                                mds->mds_lov_desc.ld_tgt_count);
 
-        mds->mds_max_mdsize = lov_mds_md_size(stripes);
+        mds->mds_max_mdsize = lov_mds_md_size(stripes, LOV_MAGIC_V3);
         mds->mds_max_cookiesize = stripes * sizeof(struct llog_cookie);
         CDEBUG(D_CONFIG, "updated max_mdsize/max_cookiesize for %d stripes: "
                "%d/%d\n", mds->mds_max_mdsize, mds->mds_max_cookiesize,
         mds->mds_max_cookiesize = stripes * sizeof(struct llog_cookie);
         CDEBUG(D_CONFIG, "updated max_mdsize/max_cookiesize for %d stripes: "
                "%d/%d\n", mds->mds_max_mdsize, mds->mds_max_cookiesize,
index 3208572..1bd26d6 100644 (file)
@@ -357,6 +357,21 @@ static int mgs_put_cfg_lock(struct lustre_handle *lockh)
         RETURN(0);
 }
 
         RETURN(0);
 }
 
+static void mgs_revoke_lock(struct obd_device *obd, char *fsname,
+                            struct lustre_handle *lockh)
+{
+        int lockrc;
+
+        if (fsname[0]) {
+                lockrc = mgs_get_cfg_lock(obd, fsname, lockh);
+                if (lockrc != ELDLM_OK)
+                        CERROR("lock error %d for fs %s\n", lockrc,
+                               fsname);
+                else
+                        mgs_put_cfg_lock(lockh);
+        }
+}
+
 /* rc=0 means ok
       1 means update
      <0 means error */
 /* rc=0 means ok
       1 means update
      <0 means error */
@@ -508,7 +523,7 @@ static int mgs_set_info_rpc(struct ptlrpc_request *req)
         struct obd_device *obd = req->rq_export->exp_obd;
         struct mgs_send_param *msp, *rep_msp;
         struct lustre_handle lockh;
         struct obd_device *obd = req->rq_export->exp_obd;
         struct mgs_send_param *msp, *rep_msp;
         struct lustre_handle lockh;
-        int lockrc, rc;
+        int rc;
         struct lustre_cfg_bufs bufs;
         struct lustre_cfg *lcfg;
         char fsname[MTI_NAME_MAXLEN];
         struct lustre_cfg_bufs bufs;
         struct lustre_cfg *lcfg;
         char fsname[MTI_NAME_MAXLEN];
@@ -528,19 +543,9 @@ static int mgs_set_info_rpc(struct ptlrpc_request *req)
                 RETURN(rc);
         }
 
                 RETURN(rc);
         }
 
-        /* Revoke lock so everyone updates.  Should be alright if
-         * someone was already reading while we were updating the logs,
-         * so we don't really need to hold the lock while we're
-         * writing.
-         */
-        if (fsname[0]) {
-                lockrc = mgs_get_cfg_lock(obd, fsname, &lockh);
-                if (lockrc != ELDLM_OK)
-                        CERROR("lock error %d for fs %s\n", lockrc,
-                               fsname);
-                else
-                        mgs_put_cfg_lock(&lockh);
-        }
+        /* request for update */
+        mgs_revoke_lock(obd, fsname, &lockh);
+
         lustre_cfg_free(lcfg);
 
         rc = req_capsule_server_pack(&req->rq_pill);
         lustre_cfg_free(lcfg);
 
         rc = req_capsule_server_pack(&req->rq_pill);
@@ -709,6 +714,134 @@ static inline int mgs_destroy_export(struct obd_export *exp)
         RETURN(0);
 }
 
         RETURN(0);
 }
 
+static int mgs_extract_fs_pool(char * arg, char *fsname, char *poolname)
+{
+        char *ptr;
+
+        ENTRY;
+        for (ptr = arg;  (*ptr != '\0') && (*ptr != '.'); ptr++ ) {
+                *fsname = *ptr;
+                fsname++;
+        }
+        if (*ptr == '\0')
+                return -EINVAL;
+        *fsname = '\0';
+        ptr++;
+        strcpy(poolname, ptr);
+
+        RETURN(0);
+}
+
+static int mgs_iocontrol_pool(struct obd_device *obd, 
+                              struct obd_ioctl_data *data)
+{
+        int rc;
+        struct lustre_handle lockh;
+        struct lustre_cfg *lcfg = NULL;
+        struct llog_rec_hdr rec;
+        char *fsname = NULL;
+        char *poolname = NULL;
+        ENTRY;
+
+        OBD_ALLOC(fsname, MTI_NAME_MAXLEN);
+        if (fsname == NULL)
+                RETURN(-ENOMEM);
+
+        OBD_ALLOC(poolname, MAXPOOLNAME + 1);
+        if (poolname == NULL) {
+                rc = -ENOMEM;
+                GOTO(out_pool, rc);
+        }
+        rec.lrh_len = llog_data_len(data->ioc_plen1);
+
+        if (data->ioc_type == LUSTRE_CFG_TYPE) {
+                rec.lrh_type = OBD_CFG_REC;
+        } else {
+                CERROR("unknown cfg record type:%d \n", data->ioc_type);
+                rc = -EINVAL;
+                GOTO(out_pool, rc);
+        }
+
+        if (data->ioc_plen1 > CFS_PAGE_SIZE) {
+                rc = -E2BIG;
+                GOTO(out_pool, rc);
+        }
+
+        OBD_ALLOC(lcfg, data->ioc_plen1);
+        if (lcfg == NULL) {
+                rc = -ENOMEM;
+                GOTO(out_pool, rc);
+        }
+        rc = copy_from_user(lcfg, data->ioc_pbuf1, data->ioc_plen1);
+        if (rc)
+                GOTO(out_pool, rc);
+
+        if (lcfg->lcfg_bufcount < 2) {
+                rc = -EINVAL;
+                GOTO(out_pool, rc);
+        }
+
+        /* first arg is always <fsname>.<poolname> */
+        mgs_extract_fs_pool(lustre_cfg_string(lcfg, 1), fsname,
+                            poolname);
+
+        switch (lcfg->lcfg_command) {
+        case LCFG_POOL_NEW: {
+                if (lcfg->lcfg_bufcount != 2)
+                        RETURN(-EINVAL);
+                rc = mgs_pool_cmd(obd, LCFG_POOL_NEW, fsname,
+                                  poolname, NULL);
+                break;
+        }
+        case LCFG_POOL_ADD: {
+                if (lcfg->lcfg_bufcount != 3)
+                        RETURN(-EINVAL);
+                rc = mgs_pool_cmd(obd, LCFG_POOL_ADD, fsname, poolname,
+                                  lustre_cfg_string(lcfg, 2));
+                break;
+        }
+        case LCFG_POOL_REM: {
+                if (lcfg->lcfg_bufcount != 3)
+                        RETURN(-EINVAL);
+                rc = mgs_pool_cmd(obd, LCFG_POOL_REM, fsname, poolname,
+                                  lustre_cfg_string(lcfg, 2));
+                break;
+        }
+        case LCFG_POOL_DEL: {
+                if (lcfg->lcfg_bufcount != 2)
+                        RETURN(-EINVAL);
+                rc = mgs_pool_cmd(obd, LCFG_POOL_DEL, fsname,
+                                  poolname, NULL);
+                break;
+        }
+        default: {
+                 rc = -EINVAL;
+                 GOTO(out_pool, rc);
+        }
+        }
+
+        if (rc) {
+                CERROR("OBD_IOC_POOL err %d, cmd %X for pool %s.%s\n",
+                       rc, lcfg->lcfg_command, fsname, poolname);
+                GOTO(out_pool, rc);
+        }
+
+        /* request for update */
+        mgs_revoke_lock(obd, fsname, &lockh);
+
+out_pool:
+        if (lcfg != NULL)
+                OBD_FREE(lcfg, data->ioc_plen1);
+
+        if (fsname != NULL)
+                OBD_FREE(fsname, MTI_NAME_MAXLEN);
+
+        if (poolname != NULL)
+                OBD_FREE(poolname, MAXPOOLNAME + 1);
+
+        RETURN(rc);
+}
+
 /* from mdt_iocontrol */
 int mgs_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
                   void *karg, void *uarg)
 /* from mdt_iocontrol */
 int mgs_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
                   void *karg, void *uarg)
@@ -728,7 +861,6 @@ int mgs_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
                 struct lustre_cfg *lcfg;
                 struct llog_rec_hdr rec;
                 char fsname[MTI_NAME_MAXLEN];
                 struct lustre_cfg *lcfg;
                 struct llog_rec_hdr rec;
                 char fsname[MTI_NAME_MAXLEN];
-                int lockrc;
 
                 rec.lrh_len = llog_data_len(data->ioc_plen1);
 
 
                 rec.lrh_len = llog_data_len(data->ioc_plen1);
 
@@ -759,20 +891,17 @@ int mgs_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
                    someone was already reading while we were updating the logs,
                    so we don't really need to hold the lock while we're
                    writing (above). */
                    someone was already reading while we were updating the logs,
                    so we don't really need to hold the lock while we're
                    writing (above). */
-                if (fsname[0]) {
-                        lockrc = mgs_get_cfg_lock(obd, fsname, &lockh);
-                        if (lockrc != ELDLM_OK)
-                                CERROR("lock error %d for fs %s\n", lockrc,
-                                       fsname);
-                        else
-                                mgs_put_cfg_lock(&lockh);
-                }
+                mgs_revoke_lock(obd, fsname, &lockh);
 
 out_free:
                 OBD_FREE(lcfg, data->ioc_plen1);
                 RETURN(rc);
         }
 
 
 out_free:
                 OBD_FREE(lcfg, data->ioc_plen1);
                 RETURN(rc);
         }
 
+        case OBD_IOC_POOL: {
+                RETURN(mgs_iocontrol_pool(obd, data));
+        }
+
         case OBD_IOC_DUMP_LOG: {
                 struct llog_ctxt *ctxt;
                 ctxt = llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT);
         case OBD_IOC_DUMP_LOG: {
                 struct llog_ctxt *ctxt;
                 ctxt = llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT);
index c0620a1..d5d18bd 100644 (file)
@@ -95,6 +95,9 @@ int mgs_erase_log(struct obd_device *obd, char *name);
 int mgs_erase_logs(struct obd_device *obd, char *fsname);
 int mgs_setparam(struct obd_device *obd, struct lustre_cfg *lcfg, char *fsname);
 
 int mgs_erase_logs(struct obd_device *obd, char *fsname);
 int mgs_setparam(struct obd_device *obd, struct lustre_cfg *lcfg, char *fsname);
 
+int mgs_pool_cmd(struct obd_device *obd, enum lcfg_command_type cmd,
+                 char *poolname, char *fsname, char *ostname);
+
 /* mgs_fs.c */
 int mgs_fs_setup(struct obd_device *obd, struct vfsmount *mnt);
 int mgs_fs_cleanup(struct obd_device *obddev);
 /* mgs_fs.c */
 int mgs_fs_setup(struct obd_device *obd, struct vfsmount *mnt);
 int mgs_fs_cleanup(struct obd_device *obddev);
index 8c3f77d..b621ca6 100644 (file)
@@ -3235,6 +3235,145 @@ out:
         RETURN(rc);
 }
 
         RETURN(rc);
 }
 
+static int mgs_write_log_pool(struct obd_device *obd, char *logname, struct fs_db *fsdb,
+                       char *lovname,
+                       enum lcfg_command_type cmd,
+                       char *poolname, char *fsname,
+                       char *ostname, char *comment)
+{
+        struct llog_handle *llh = NULL;
+        int rc;
+
+        rc = record_start_log(obd, &llh, logname);
+        if (rc)
+                RETURN(rc);
+        rc = record_marker(obd, llh, fsdb, CM_START, lovname, comment);
+        record_base(obd, llh, lovname, 0, cmd, poolname, fsname, ostname, 0);
+        rc = record_marker(obd, llh, fsdb, CM_END, lovname, comment);
+        rc = record_end_log(obd, &llh);
+
+        return(rc);
+}
+
+int mgs_pool_cmd(struct obd_device *obd, enum lcfg_command_type cmd,
+                 char *fsname, char *poolname, char *ostname)
+{
+        struct fs_db *fsdb;
+        char mdt_index[16];
+        char *lovname;
+        char *logname;
+        char *label, *canceled_label = NULL;
+        int label_sz;
+        struct mgs_target_info *mti;
+        int rc;
+        ENTRY;
+
+        rc = mgs_find_or_make_fsdb(obd, fsname, &fsdb);
+        if (rc) {
+                CERROR("Can't get db for %s\n", fsname);
+                RETURN(rc);
+        }
+        if (fsdb->fsdb_flags & FSDB_LOG_EMPTY) {
+                CERROR("%s is not defined\n", fsname);
+                mgs_free_fsdb(obd, fsdb);
+                RETURN(-EINVAL);
+        }
+
+        label_sz = 10 + strlen(fsname) + strlen(poolname);
+
+        /* check if ostname match fsname */
+        if (ostname != NULL) {
+                char *ptr;
+
+                ptr = strrchr(ostname, '-');
+                if ((ptr == NULL) ||
+                    (strncmp(fsname, ostname, ptr-ostname) != 0))
+                        RETURN(-EINVAL);
+                label_sz += strlen(ostname);
+        }
+
+        OBD_ALLOC(label, label_sz);
+        if (label == NULL)
+                RETURN(-ENOMEM);
+
+        switch(cmd) {
+        case LCFG_POOL_NEW: {
+                sprintf(label,
+                        "new %s.%s", fsname, poolname);
+                break;
+        }
+        case LCFG_POOL_ADD: {
+                sprintf(label,
+                        "add %s.%s.%s", fsname, poolname, ostname);
+                break;
+        }
+        case LCFG_POOL_REM: {
+                OBD_ALLOC(canceled_label, label_sz);
+                if (canceled_label == NULL)
+                         RETURN(-ENOMEM);
+                sprintf(label,
+                        "rem %s.%s.%s", fsname, poolname, ostname);
+                sprintf(canceled_label,
+                        "add %s.%s.%s", fsname, poolname, ostname);
+                break;
+        }
+        case LCFG_POOL_DEL: {
+                OBD_ALLOC(canceled_label, label_sz);
+                if (canceled_label == NULL)
+                         RETURN(-ENOMEM);
+                sprintf(label,
+                        "del %s.%s", fsname, poolname);
+                sprintf(canceled_label,
+                        "new %s.%s", fsname, poolname);
+                break;
+        }
+        default: {
+                break;
+        }
+        }
+
+        down(&fsdb->fsdb_sem);
+
+        sprintf(mdt_index, "-MDT%04x", 0);
+        name_create(&logname, fsname, mdt_index);
+        name_create(&lovname, logname, "-mdtlov");
+
+        mti = NULL;
+        if (canceled_label != NULL) {
+                OBD_ALLOC(mti, sizeof(*mti));
+                if (mti != NULL) {
+                        strcpy(mti->mti_svname, "lov pool");
+                        mgs_modify(obd, fsdb, mti, logname, lovname,
+                                   canceled_label, CM_SKIP);
+                }
+        }
+
+        mgs_write_log_pool(obd, logname, fsdb, lovname,
+                           cmd, fsname, poolname, ostname, label);
+        name_destroy(&logname);
+
+        name_create(&logname, fsname, "-client");
+        if (canceled_label != NULL) {
+                mgs_modify(obd, fsdb, mti, logname, lovname,
+                           canceled_label, CM_SKIP);
+        }
+        mgs_write_log_pool(obd, logname, fsdb, fsdb->fsdb_clilov,
+                           cmd, fsname, poolname, ostname, label);
+        name_destroy(&logname);
+        name_destroy(&lovname);
+
+        up(&fsdb->fsdb_sem);
+
+        OBD_FREE(label, label_sz);
+        if (canceled_label != NULL)
+                OBD_FREE(canceled_label, label_sz);
+
+        if (mti != NULL)
+                OBD_FREE(mti, sizeof(*mti));
+
+        RETURN(rc);
+}
+
 #if 0
 /******************** unused *********************/
 static int mgs_backup_llog(struct obd_device *obd, char* fsname)
 #if 0
 /******************** unused *********************/
 static int mgs_backup_llog(struct obd_device *obd, char* fsname)
index 1f4d99b..ed5faa3 100644 (file)
@@ -117,9 +117,10 @@ int dump_obdo(struct obdo *oa)
 void dump_lsm(int level, struct lov_stripe_md *lsm)
 {
         CDEBUG(level, "lsm %p, objid "LPX64", maxbytes "LPX64", magic 0x%08X, "
 void dump_lsm(int level, struct lov_stripe_md *lsm)
 {
         CDEBUG(level, "lsm %p, objid "LPX64", maxbytes "LPX64", magic 0x%08X, "
-               "stripe_size %u, stripe_count %u\n", lsm,
+               "stripe_size %u, stripe_count %u pool "POOLNAMEF"\n", lsm,
                lsm->lsm_object_id, lsm->lsm_maxbytes, lsm->lsm_magic,
                lsm->lsm_object_id, lsm->lsm_maxbytes, lsm->lsm_magic,
-               lsm->lsm_stripe_size, lsm->lsm_stripe_count);
+               lsm->lsm_stripe_size, lsm->lsm_stripe_count,
+               lsm->lsm_pool_name);
 }
 
 /* XXX assumes only a single page in request */
 }
 
 /* XXX assumes only a single page in request */
index a0e0912..79eb987 100644 (file)
@@ -111,28 +111,35 @@ static int lprocfs_obd_snprintf(char **page, int end, int *len,
         return n;
 }
 
         return n;
 }
 
-int lprocfs_add_simple(struct proc_dir_entry *root, char *name,
-                       read_proc_t *read_proc, write_proc_t *write_proc,
-                       void *data)
-{
-        struct proc_dir_entry *proc;
+cfs_proc_dir_entry_t *lprocfs_add_simple(struct proc_dir_entry *root,
+                                         char *name,
+                                         read_proc_t *read_proc,
+                                         write_proc_t *write_proc,
+                                         void *data,
+                                         struct file_operations *fops)
+{
+        cfs_proc_dir_entry_t *proc;
         mode_t mode = 0;
 
         if (root == NULL || name == NULL)
         mode_t mode = 0;
 
         if (root == NULL || name == NULL)
-                return -EINVAL;
+                return ERR_PTR(-EINVAL);
         if (read_proc)
                 mode = 0444;
         if (write_proc)
                 mode |= 0200;
         if (read_proc)
                 mode = 0444;
         if (write_proc)
                 mode |= 0200;
+        if (fops)
+                mode = 0644;
         proc = create_proc_entry(name, mode, root);
         if (!proc) {
                 CERROR("LprocFS: No memory to create /proc entry %s", name);
         proc = create_proc_entry(name, mode, root);
         if (!proc) {
                 CERROR("LprocFS: No memory to create /proc entry %s", name);
-                return -ENOMEM;
+                return ERR_PTR(-ENOMEM);
         }
         proc->read_proc = read_proc;
         proc->write_proc = write_proc;
         proc->data = data;
         }
         proc->read_proc = read_proc;
         proc->write_proc = write_proc;
         proc->data = data;
-        return 0;
+        if (fops)
+                proc->proc_fops = fops;
+        return proc;
 }
 
 struct proc_dir_entry *lprocfs_add_symlink(const char *name,
 }
 
 struct proc_dir_entry *lprocfs_add_symlink(const char *name,
@@ -730,6 +737,8 @@ static const char *obd_connect_names[] = {
         "change_qunit_size",
         "alt_checksum_algorithm",
         "fid_is_enabled",
         "change_qunit_size",
         "alt_checksum_algorithm",
         "fid_is_enabled",
+        "version_recovery",
+        "pools",
         NULL
 };
 
         NULL
 };
 
@@ -1207,6 +1216,10 @@ void lprocfs_init_ops_stats(int num_private_stats, struct lprocfs_stats *stats)
         LPROCFS_OBD_OP_INIT(num_private_stats,stats,unregister_page_removal_cb);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, register_lock_cancel_cb);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats,unregister_lock_cancel_cb);
         LPROCFS_OBD_OP_INIT(num_private_stats,stats,unregister_page_removal_cb);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, register_lock_cancel_cb);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats,unregister_lock_cancel_cb);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, pool_new);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, pool_rem);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, pool_add);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, pool_del);
 }
 
 int lprocfs_alloc_obd_stats(struct obd_device *obd, unsigned num_private_stats)
 }
 
 int lprocfs_alloc_obd_stats(struct obd_device *obd, unsigned num_private_stats)
@@ -1488,6 +1501,7 @@ int lprocfs_exp_setup(struct obd_export *exp, lnet_nid_t *nid, int *newnid)
         int rc = 0;
         struct nid_stat *tmp = NULL, *tmp1;
         struct obd_device *obd = NULL;
         int rc = 0;
         struct nid_stat *tmp = NULL, *tmp1;
         struct obd_device *obd = NULL;
+        cfs_proc_dir_entry_t *entry;
         ENTRY;
 
         *newnid = 0;
         ENTRY;
 
         *newnid = 0;
@@ -1538,15 +1552,19 @@ int lprocfs_exp_setup(struct obd_export *exp, lnet_nid_t *nid, int *newnid)
                 GOTO(destroy_new, rc = -ENOMEM);
         }
 
                 GOTO(destroy_new, rc = -ENOMEM);
         }
 
-        rc = lprocfs_add_simple(tmp->nid_proc, "uuid",
-                                lprocfs_exp_rd_uuid, NULL, tmp);
-        if (rc)
+        entry = lprocfs_add_simple(tmp->nid_proc, "uuid",
+                                   lprocfs_exp_rd_uuid, NULL, tmp, NULL);
+        if (IS_ERR(entry)) {
                 CWARN("Error adding the uuid file\n");
                 CWARN("Error adding the uuid file\n");
+                rc = PTR_ERR(entry);
+        }
 
 
-        rc = lprocfs_add_simple(tmp->nid_proc, "hash",
-                                lprocfs_exp_rd_hash, NULL, tmp);
-        if (rc)
+        entry = lprocfs_add_simple(tmp->nid_proc, "hash",
+                                lprocfs_exp_rd_hash, NULL, tmp, NULL);
+        if (IS_ERR(entry)) {
                 CWARN("Error adding the hash file\n");
                 CWARN("Error adding the hash file\n");
+                rc = PTR_ERR(entry);
+        }
 
         exp->exp_nid_stats = tmp;
         *newnid = 1;
 
         exp->exp_nid_stats = tmp;
         *newnid = 1;
index fbc8a8d..d375ab7 100644 (file)
@@ -825,6 +825,28 @@ int class_process_config(struct lustre_cfg *lcfg)
                 err = class_del_conn(obd, lcfg);
                 GOTO(out, err = 0);
         }
                 err = class_del_conn(obd, lcfg);
                 GOTO(out, err = 0);
         }
+        case LCFG_POOL_NEW: {
+                err = obd_pool_new(obd, lustre_cfg_string(lcfg, 2));
+                GOTO(out, err = 0);
+                break;
+        }
+        case LCFG_POOL_ADD: {
+                err = obd_pool_add(obd, lustre_cfg_string(lcfg, 2),
+                                   lustre_cfg_string(lcfg, 3));
+                GOTO(out, err = 0);
+                break;
+        }
+        case LCFG_POOL_REM: {
+                err = obd_pool_rem(obd, lustre_cfg_string(lcfg, 2),
+                                   lustre_cfg_string(lcfg, 3));
+                GOTO(out, err = 0);
+                break;
+        }
+        case LCFG_POOL_DEL: {
+                err = obd_pool_del(obd, lustre_cfg_string(lcfg, 2));
+                GOTO(out, err = 0);
+                break;
+        }
         default: {
                 err = obd_process_config(obd, sizeof(*lcfg), lcfg);
                 GOTO(out, err);
         default: {
                 err = obd_process_config(obd, sizeof(*lcfg), lcfg);
                 GOTO(out, err);
index 49f209e..3c7c88f 100644 (file)
@@ -2151,7 +2151,7 @@ static int filter_setup(struct obd_device *obd, struct lustre_cfg* lcfg)
         if (obd->obd_proc_exports_entry)
                 lprocfs_add_simple(obd->obd_proc_exports_entry, "clear",
                                    lprocfs_nid_stats_clear_read,
         if (obd->obd_proc_exports_entry)
                 lprocfs_add_simple(obd->obd_proc_exports_entry, "clear",
                                    lprocfs_nid_stats_clear_read,
-                                   lprocfs_nid_stats_clear_write, obd);
+                                   lprocfs_nid_stats_clear_write, obd, NULL);
 
         memcpy((void *)addr, lustre_cfg_buf(lcfg, 4),
                LUSTRE_CFG_BUFLEN(lcfg, 4));
 
         memcpy((void *)addr, lustre_cfg_buf(lcfg, 4),
                LUSTRE_CFG_BUFLEN(lcfg, 4));
index b79cfd4..8e1e2d5 100644 (file)
@@ -3466,29 +3466,45 @@ static int osc_statfs(struct obd_device *obd, struct obd_statfs *osfs,
  */
 static int osc_getstripe(struct lov_stripe_md *lsm, struct lov_user_md *lump)
 {
  */
 static int osc_getstripe(struct lov_stripe_md *lsm, struct lov_user_md *lump)
 {
-        struct lov_user_md lum, *lumk;
+        /* we use lov_user_md_v3 because it is larger than lov_user_md_v1 */
+        struct lov_user_md_v3 lum, *lumk;
+        struct lov_user_ost_data_v1 *lmm_objects;
         int rc = 0, lum_size;
         ENTRY;
 
         if (!lsm)
                 RETURN(-ENODATA);
 
         int rc = 0, lum_size;
         ENTRY;
 
         if (!lsm)
                 RETURN(-ENODATA);
 
-        if (copy_from_user(&lum, lump, sizeof(lum)))
+        /* we only need the header part from user space to get lmm_magic and
+         * lmm_stripe_count, (the header part is common to v1 and v3) */
+        lum_size = sizeof(struct lov_user_md_v1);
+        if (copy_from_user(&lum, lump, lum_size))
                 RETURN(-EFAULT);
 
                 RETURN(-EFAULT);
 
-        if (lum.lmm_magic != LOV_USER_MAGIC)
+        if ((lum.lmm_magic != LOV_USER_MAGIC_V1) &&
+            (lum.lmm_magic != LOV_USER_MAGIC_V3))
                 RETURN(-EINVAL);
 
                 RETURN(-EINVAL);
 
+        /* lov_user_md_vX and lov_mds_md_vX must have the same size */
+        LASSERT(sizeof(struct lov_user_md_v1) == sizeof(struct lov_mds_md_v1));
+        LASSERT(sizeof(struct lov_user_md_v3) == sizeof(struct lov_mds_md_v3));
+        LASSERT(sizeof(lum.lmm_objects[0]) == sizeof(lumk->lmm_objects[0]));
+
+        /* we can use lov_mds_md_size() to compute lum_size
+         * because lov_user_md_vX and lov_mds_md_vX have the same size */
         if (lum.lmm_stripe_count > 0) {
         if (lum.lmm_stripe_count > 0) {
-                lum_size = sizeof(lum) + sizeof(lum.lmm_objects[0]);
+                lum_size = lov_mds_md_size(lum.lmm_stripe_count, lum.lmm_magic);
                 OBD_ALLOC(lumk, lum_size);
                 if (!lumk)
                         RETURN(-ENOMEM);
 
                 OBD_ALLOC(lumk, lum_size);
                 if (!lumk)
                         RETURN(-ENOMEM);
 
-                lumk->lmm_objects[0].l_object_id = lsm->lsm_object_id;
-                lumk->lmm_objects[0].l_object_gr = lsm->lsm_object_gr;
+                if (lum.lmm_magic == LOV_USER_MAGIC_V1)
+                        lmm_objects = &(((struct lov_user_md_v1 *)lumk)->lmm_objects[0]);
+                else
+                        lmm_objects = &(lumk->lmm_objects[0]);
+                lmm_objects->l_object_id = lsm->lsm_object_id;
         } else {
         } else {
-                lum_size = sizeof(lum);
+                lum_size = lov_mds_md_size(0, lum.lmm_magic);
                 lumk = &lum;
         }
 
                 lumk = &lum;
         }
 
index 7df1357..97ea280 100644 (file)
@@ -1955,10 +1955,9 @@ static void print_lum (struct lov_user_md *lum)
         CDEBUG(D_OTHER, "\tlmm_stripe_offset: %#x\n", lum->lmm_stripe_offset);
 }
 
         CDEBUG(D_OTHER, "\tlmm_stripe_offset: %#x\n", lum->lmm_stripe_offset);
 }
 
-void lustre_swab_lov_user_md(struct lov_user_md *lum)
+static void lustre_swab_lov_user_md_common(struct lov_user_md_v1 *lum)
 {
         ENTRY;
 {
         ENTRY;
-        CDEBUG(D_IOCTL, "swabbing lov_user_md\n");
         __swab32s(&lum->lmm_magic);
         __swab32s(&lum->lmm_pattern);
         __swab64s(&lum->lmm_object_id);
         __swab32s(&lum->lmm_magic);
         __swab32s(&lum->lmm_pattern);
         __swab64s(&lum->lmm_object_id);
@@ -1982,6 +1981,23 @@ static void print_lumj (struct lov_user_md_join *lumj)
         CDEBUG(D_OTHER, "\tlmm_extent_count: %#x\n", lumj->lmm_extent_count);
 }
 
         CDEBUG(D_OTHER, "\tlmm_extent_count: %#x\n", lumj->lmm_extent_count);
 }
 
+void lustre_swab_lov_user_md_v1(struct lov_user_md_v1 *lum)
+{
+        ENTRY;
+        CDEBUG(D_IOCTL, "swabbing lov_user_md v1\n");
+        lustre_swab_lov_user_md_common(lum);
+        EXIT;
+}
+
+void lustre_swab_lov_user_md_v3(struct lov_user_md_v3 *lum)
+{
+        ENTRY;
+        CDEBUG(D_IOCTL, "swabbing lov_user_md v3\n");
+        lustre_swab_lov_user_md_common((struct lov_user_md_v1 *)lum);
+        /* lmm_pool_name nothing to do with char */
+        EXIT;
+}
+
 void lustre_swab_lov_user_md_join(struct lov_user_md_join *lumj)
 {
         ENTRY;
 void lustre_swab_lov_user_md_join(struct lov_user_md_join *lumj)
 {
         ENTRY;
@@ -1997,63 +2013,20 @@ void lustre_swab_lov_user_md_join(struct lov_user_md_join *lumj)
         EXIT;
 }
 
         EXIT;
 }
 
-static void print_lum_objs(struct lov_user_md *lum)
+void lustre_swab_lov_user_md_objects(struct lov_user_ost_data *lod,
+                                     int stripe_count)
 {
 {
-        struct lov_user_ost_data *lod;
         int i;
         ENTRY;
         int i;
         ENTRY;
-        if (!(libcfs_debug & D_OTHER)) /* don't loop on nothing */
-                return;
-        CDEBUG(D_OTHER, "lov_user_md_objects: %p\n", lum);
-        for (i = 0; i < lum->lmm_stripe_count; i++) {
-                lod = &lum->lmm_objects[i];
-                CDEBUG(D_OTHER, "(%i) lod->l_object_id: "LPX64"\n", i, lod->l_object_id);
-                CDEBUG(D_OTHER, "(%i) lod->l_object_gr: "LPX64"\n", i, lod->l_object_gr);
-                CDEBUG(D_OTHER, "(%i) lod->l_ost_gen: %#x\n", i, lod->l_ost_gen);
-                CDEBUG(D_OTHER, "(%i) lod->l_ost_idx: %#x\n", i, lod->l_ost_idx);
+        for (i = 0; i < stripe_count; i++) {
+                __swab64s(&(lod[i].l_object_id));
+                __swab64s(&(lod[i].l_object_gr));
+                __swab32s(&(lod[i].l_ost_gen));
+                __swab32s(&(lod[i].l_ost_idx));
         }
         EXIT;
 }
 
         }
         EXIT;
 }
 
-void lustre_swab_lov_user_md_objects(struct lov_user_md *lum)
-{
-        struct lov_user_ost_data *lod;
-        int i;
-        ENTRY;
-        for (i = 0; i < lum->lmm_stripe_count; i++) {
-                lod = &lum->lmm_objects[i];
-                __swab64s(&lod->l_object_id);
-                __swab64s(&lod->l_object_gr);
-                __swab32s(&lod->l_ost_gen);
-                __swab32s(&lod->l_ost_idx);
-        }
-        print_lum_objs(lum);
-        EXIT;
-}
-
-
-void lustre_swab_lov_mds_md(struct lov_mds_md *lmm)
-{
-        struct lov_ost_data *lod;
-        int i;
-        ENTRY;
-        for (i = 0; i < lmm->lmm_stripe_count; i++) {
-                lod = &lmm->lmm_objects[i];
-                __swab64s(&lod->l_object_id);
-                __swab64s(&lod->l_object_gr);
-                __swab32s(&lod->l_ost_gen);
-                __swab32s(&lod->l_ost_idx);
-        }
-        __swab32s(&lmm->lmm_magic);
-        __swab32s(&lmm->lmm_pattern);
-        __swab64s(&lmm->lmm_object_id);
-        __swab64s(&lmm->lmm_object_gr);
-        __swab32s(&lmm->lmm_stripe_size);
-        __swab32s(&lmm->lmm_stripe_count);
-
-        EXIT;
-}
-
 
 void lustre_swab_ldlm_res_id (struct ldlm_res_id *id)
 {
 
 void lustre_swab_ldlm_res_id (struct ldlm_res_id *id)
 {
index 5d7d35e..76e0727 100644 (file)
@@ -252,8 +252,8 @@ EXPORT_SYMBOL(lustre_swab_mds_rec_unlink);
 EXPORT_SYMBOL(lustre_swab_mds_rec_rename);
 EXPORT_SYMBOL(lustre_swab_mdt_rec_reint);
 EXPORT_SYMBOL(lustre_swab_lov_desc);
 EXPORT_SYMBOL(lustre_swab_mds_rec_rename);
 EXPORT_SYMBOL(lustre_swab_mdt_rec_reint);
 EXPORT_SYMBOL(lustre_swab_lov_desc);
-EXPORT_SYMBOL(lustre_swab_lov_user_md);
-EXPORT_SYMBOL(lustre_swab_lov_mds_md);
+EXPORT_SYMBOL(lustre_swab_lov_user_md_v1);
+EXPORT_SYMBOL(lustre_swab_lov_user_md_v3);
 EXPORT_SYMBOL(lustre_swab_lov_user_md_objects);
 EXPORT_SYMBOL(lustre_swab_lov_user_md_join);
 EXPORT_SYMBOL(lustre_swab_ldlm_res_id);
 EXPORT_SYMBOL(lustre_swab_lov_user_md_objects);
 EXPORT_SYMBOL(lustre_swab_lov_user_md_join);
 EXPORT_SYMBOL(lustre_swab_ldlm_res_id);
index 2c92091..8239298 100644 (file)
@@ -240,7 +240,7 @@ int main(int argc, char **argv)
                 return rc;
         }
 
                 return rc;
         }
 
-        lum_size = lov_mds_md_size(MAX_LOV_UUID_COUNT);
+        lum_size = lov_mds_md_size(MAX_LOV_UUID_COUNT, LOV_MAGIC);
         if ((lum_dir = (struct lov_user_md *)malloc(lum_size)) == NULL) {
                 rc = ENOMEM;
                 llapi_err(LLAPI_MSG_ERROR, "error: can't allocate %d bytes "
         if ((lum_dir = (struct lov_user_md *)malloc(lum_size)) == NULL) {
                 rc = ENOMEM;
                 llapi_err(LLAPI_MSG_ERROR, "error: can't allocate %d bytes "
index 4b61102..ca5093c 100644 (file)
@@ -64,7 +64,7 @@ int main(int argc, char** argv)
                 return 1;
         }
 
                 return 1;
         }
 
-        lum_size = lov_mds_md_size(MAX_LOV_UUID_COUNT);
+        lum_size = lov_mds_md_size(MAX_LOV_UUID_COUNT, LOV_MAGIC);
 
         if ((lum_file = (struct lov_user_md *)malloc(lum_size)) == NULL) {
                 fprintf(stderr, "unable to allocate memory for ioctl's");
 
         if ((lum_file = (struct lov_user_md *)malloc(lum_size)) == NULL) {
                 fprintf(stderr, "unable to allocate memory for ioctl's");
index b3024aa..82217bc 100755 (executable)
@@ -7,5 +7,6 @@ LUSTRE=${LUSTRE:-`dirname $0`/..}
 init_test_env $@
 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
 
 init_test_env $@
 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
 
+[ -n "$LOAD" ] && load_modules && exit 0
 [ -z "$NOFORMAT" ] && formatall
 [ -z "$NOSETUP" ] && setupall
 [ -z "$NOFORMAT" ] && formatall
 [ -z "$NOSETUP" ] && setupall
index 7cd617f..8232094 100644 (file)
@@ -3540,6 +3540,7 @@ test_99a() {
 run_test 99a "cvs init ========================================="
 
 test_99b() {
 run_test 99a "cvs init ========================================="
 
 test_99b() {
+        [ -z "$(which cvs 2>/dev/null)" ] && skip "could not find cvs" && return
        [ ! -d $DIR/d99cvsroot ] && test_99a
        cd /etc/init.d
        # some versions of cvs import exit(1) when asked to import links or
        [ ! -d $DIR/d99cvsroot ] && test_99a
        cd /etc/init.d
        # some versions of cvs import exit(1) when asked to import links or
@@ -3552,6 +3553,7 @@ test_99b() {
 run_test 99b "cvs import ======================================="
 
 test_99c() {
 run_test 99b "cvs import ======================================="
 
 test_99c() {
+        [ -z "$(which cvs 2>/dev/null)" ] && skip "could not find cvs" && return
        [ ! -d $DIR/d99cvsroot ] && test_99b
        cd $DIR
        mkdir -p $DIR/d99reposname
        [ ! -d $DIR/d99cvsroot ] && test_99b
        cd $DIR
        mkdir -p $DIR/d99reposname
@@ -3561,6 +3563,7 @@ test_99c() {
 run_test 99c "cvs checkout ====================================="
 
 test_99d() {
 run_test 99c "cvs checkout ====================================="
 
 test_99d() {
+        [ -z "$(which cvs 2>/dev/null)" ] && skip "could not find cvs" && return
        [ ! -d $DIR/d99cvsroot ] && test_99c
        cd $DIR/d99reposname
        $RUNAS touch foo99
        [ ! -d $DIR/d99cvsroot ] && test_99c
        cd $DIR/d99reposname
        $RUNAS touch foo99
@@ -3569,6 +3572,7 @@ test_99d() {
 run_test 99d "cvs add =========================================="
 
 test_99e() {
 run_test 99d "cvs add =========================================="
 
 test_99e() {
+        [ -z "$(which cvs 2>/dev/null)" ] && skip "could not find cvs" && return
        [ ! -d $DIR/d99cvsroot ] && test_99c
        cd $DIR/d99reposname
        $RUNAS cvs update
        [ ! -d $DIR/d99cvsroot ] && test_99c
        cd $DIR/d99reposname
        $RUNAS cvs update
@@ -3576,6 +3580,7 @@ test_99e() {
 run_test 99e "cvs update ======================================="
 
 test_99f() {
 run_test 99e "cvs update ======================================="
 
 test_99f() {
+        [ -z "$(which cvs 2>/dev/null)" ] && skip "could not find cvs" && return
        [ ! -d $DIR/d99cvsroot ] && test_99d
        cd $DIR/d99reposname
        $RUNAS cvs commit -m 'nomsg' foo99
        [ ! -d $DIR/d99cvsroot ] && test_99d
        cd $DIR/d99reposname
        $RUNAS cvs commit -m 'nomsg' foo99
@@ -4955,8 +4960,10 @@ test_121() { #bug #10589
 run_test 121 "read cancel race ========="
 
 test_123a() { # was test 123, statahead(bug 11401)
 run_test 121 "read cancel race ========="
 
 test_123a() { # was test 123, statahead(bug 11401)
+        SLOWOK=0
         if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
                 log "testing on UP system. Performance may be not as good as expected."
         if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
                 log "testing on UP system. Performance may be not as good as expected."
+               SLOWOK=1
         fi
 
         remount_client $MOUNT
         fi
 
         remount_client $MOUNT
@@ -5011,7 +5018,7 @@ test_123a() { # was test 123, statahead(bug 11401)
         lctl get_param -n llite.*.statahead_stats
         # wait for commitment of removal
         sleep 2
         lctl get_param -n llite.*.statahead_stats
         # wait for commitment of removal
         sleep 2
-        [ $error -ne 0 ] && error "statahead is slow!"
+        [ $error -ne 0 -a $SLOWOK -eq 0 ] && error "statahead is slow!"
         return 0
 }
 run_test 123a "verify statahead work"
         return 0
 }
 run_test 123a "verify statahead work"
@@ -5496,6 +5503,123 @@ test_130e() {
 }
 run_test 130e "FIEMAP (test continuation FIEMAP calls)"
 
 }
 run_test 130e "FIEMAP (test continuation FIEMAP calls)"
 
+POOL=${POOL:-cea1}
+TGT_COUNT=$OSTCOUNT
+TGTPOOL_FIRST=1
+TGTPOOL_MAX=$(($TGT_COUNT - 1))
+TGTPOOL_STEP=2
+TGTPOOL_LIST=`seq $TGTPOOL_FIRST $TGTPOOL_STEP $TGTPOOL_MAX`
+POOL_ROOT=${POOL_ROOT:-$DIR/d200.pools}
+POOL_DIR=$POOL_ROOT/dir_tst
+POOL_FILE=$POOL_ROOT/file_tst
+
+check_file_in_pool()
+{
+       file=$1
+       res=$($GETSTRIPE $file | grep 0x | cut -f2)
+       for i in $res
+       do
+               found=$(echo :$TGTPOOL_LIST: | tr " " ":"  | grep :$i:)
+               if [[ "$found" == "" ]]
+               then
+                       echo "pool list: $TGTPOOL_LIST"
+                       echo "striping: $res"
+                       error "$file not allocated in $POOL"
+                       return 1
+               fi
+       done
+       return 0
+}
+
+test_200() {
+       do_facet mgs $LCTL pool_new $FSNAME.$POOL
+       do_facet mgs $LCTL get_param -n lov.$FSNAME-MDT0000-mdtlov.pools.$POOL
+       [ $? == 0 ] || error "Pool creation of $POOL failed"
+}
+run_test 200 "Create new pool =========================================="
+
+test_201() {
+       TGT=$(seq -f $FSNAME-OST%04g_UUID $TGTPOOL_FIRST $TGTPOOL_STEP \
+               $TGTPOOL_MAX | tr '\n' ' ')
+       do_facet mgs $LCTL pool_add $FSNAME.$POOL \
+               $FSNAME-OST[$TGTPOOL_FIRST-$TGTPOOL_MAX/$TGTPOOL_STEP]_UUID
+       res=$(do_facet mgs $LCTL get_param -n lov.$FSNAME-MDT0000-mdtlov.pools.$POOL | sort \
+                       | tr '\n' ' ')
+       [ "$res" = "$TGT" ] || error "Pool content ($res) do not match requested ($TGT)"
+}
+run_test 201 "Add targets to a pool ===================================="
+
+test_202a() {
+       mkdir -p $POOL_DIR
+       $SETSTRIPE -c 2 -p $POOL $POOL_DIR
+       [ $? = 0 ] || error "Cannot set pool $POOL to $POOL_DIR"
+}
+run_test 202a "Set pool on a directory ================================="
+
+test_202b() {
+       res=$($GETSTRIPE $POOL_DIR | grep pool: | cut -f8 -d " ")
+       [ "$res" = $POOL ] || error "Pool on $POOL_DIR is not $POOL"
+}
+run_test 202b "Check pool on a directory ==============================="
+
+test_202c() {
+       failed=0
+       for i in $(seq -w 1 $(($TGT_COUNT * 3)))
+       do
+               file=$POOL_DIR/file-$i
+               touch $file
+               check_file_in_pool $file
+               if [[ $? != 0 ]]
+               then
+                       failed=$(($failed + 1))
+               fi
+       done
+       [ "$failed" = 0 ] || error "$failed files not allocated in $POOL"
+}
+run_test 202c "Check files allocation from directory pool =============="
+
+test_203() {
+       mkdir -p $POOL_FILE
+       failed=0
+       for i in $(seq -w 1 $(($TGT_COUNT * 3)))
+       do
+               file=$POOL_FILE/spoo-$i
+               $SETSTRIPE -p $POOL $file
+               check_file_in_pool $file
+               if [[ $? != 0 ]]
+               then
+                       failed=$(($failed + 1))
+               fi
+       done
+       [ "$failed" = 0 ] || error "$failed files not allocated in $POOL"
+}
+run_test 203 "Create files in a pool ==================================="
+
+test_210a() {
+       TGT=$(do_facet mgs $LCTL get_param -n lov.$FSNAME-MDT0000-mdtlov.pools.$POOL | head -1)
+       do_facet mgs $LCTL pool_remove $FSNAME.$POOL $TGT
+       res=$(do_facet mgs $LCTL get_param -n lov.$FSNAME-MDT0000-mdtlov.pools.$POOL | grep $TGT)
+       [ "$res" = "" ] || error "$TGT not removed from $FSNAME.$POOL"
+}
+run_test 210a "Remove a target from a pool ============================="
+
+test_210b() {
+       for TGT in $(do_facet mgs $LCTL get_param -n lov.$FSNAME-MDT0000-mdtlov.pools.$POOL)
+       do
+               do_facet mgs $LCTL pool_remove $FSNAME.$POOL $TGT
+       done
+       res=$(do_facet mgs $LCTL get_param -n lov.$FSNAME-MDT0000-mdtlov.pools.$POOL)
+       [ "$res" = "" ] || error "Pool $FSNAME.$POOL cannot be drained"
+}
+run_test 210b "Remove all targets from a pool =========================="
+
+test_211() {
+       do_facet mgs $LCTL pool_destroy $FSNAME.$POOL
+       res=$(do_facet mgs "$LCTL get_param -n lov.$FSNAME-MDT0000-mdtlov.pools.$POOL 2>/dev/null")
+       [ "$res" = "" ] || error "Pool $FSNAME.$POOL is not destroyed"
+}
+run_test 211 "Remove a pool ============================================"
+
 TMPDIR=$OLDTMPDIR
 TMP=$OLDTMP
 HOME=$OLDHOME
 TMPDIR=$OLDTMPDIR
 TMP=$OLDTMP
 HOME=$OLDHOME
index b692fe5..229dbf6 100644 (file)
@@ -83,7 +83,7 @@ init_test_env() {
     export LUSTRE=`absolute_path $LUSTRE`
     export TESTSUITE=`basename $0 .sh`
 
     export LUSTRE=`absolute_path $LUSTRE`
     export TESTSUITE=`basename $0 .sh`
 
-    [ -d /r ] && export ROOT=${ROOT:-/r}
+    #[ -d /r ] && export ROOT=${ROOT:-/r}
     export TMP=${TMP:-$ROOT/tmp}
     export TESTSUITELOG=${TMP}/${TESTSUITE}.log
     export HOSTNAME=${HOSTNAME:-`hostname`}
     export TMP=${TMP:-$ROOT/tmp}
     export TESTSUITELOG=${TMP}/${TESTSUITE}.log
     export HOSTNAME=${HOSTNAME:-`hostname`}
@@ -506,7 +506,7 @@ zconf_mount() {
     do_node $client "lctl set_param debug=$PTLDEBUG;
         lctl set_param subsystem_debug=${SUBSYSTEM# };
         lctl set_param debug_mb=${DEBUG_SIZE}"
     do_node $client "lctl set_param debug=$PTLDEBUG;
         lctl set_param subsystem_debug=${SUBSYSTEM# };
         lctl set_param debug_mb=${DEBUG_SIZE}"
-    [ -d /r ] && $LCTL modules > /r/tmp/ogdb-$HOSTNAME
+    
     return 0
 }
 
     return 0
 }
 
index 19b7d41..6b2c150 100644 (file)
@@ -33,7 +33,7 @@ endif # UTILS
 lib_LIBRARIES = liblustreapi.a libiam.a
 
 lctl_SOURCES = obd.c lustre_cfg.c lctl.c obdctl.h
 lib_LIBRARIES = liblustreapi.a libiam.a
 
 lctl_SOURCES = obd.c lustre_cfg.c lctl.c obdctl.h
-lctl_LDADD := $(LIBREADLINE) $(LIBPTLCTL)
+lctl_LDADD := $(LIBREADLINE) liblustreapi.a $(LIBPTLCTL)
 lctl_DEPENDENCIES := $(LIBPTLCTL)
 
 lfs_SOURCES = lfs.c obd.c lustre_cfg.c
 lctl_DEPENDENCIES := $(LIBPTLCTL)
 
 lfs_SOURCES = lfs.c obd.c lustre_cfg.c
@@ -41,7 +41,7 @@ lfs_LDADD := $(LIBREADLINE) liblustreapi.a $(LIBPTLCTL)
 lfs_DEPENDENCIES := $(LIBPTLCTL) liblustreapi.a 
 
 loadgen_SOURCES = loadgen.c lustre_cfg.c obd.c
 lfs_DEPENDENCIES := $(LIBPTLCTL) liblustreapi.a 
 
 loadgen_SOURCES = loadgen.c lustre_cfg.c obd.c
-loadgen_LDADD := $(LIBREADLINE) $(LIBPTLCTL) $(PTHREAD_LIBS)
+loadgen_LDADD := $(LIBREADLINE) liblustreapi.a $(LIBPTLCTL) $(PTHREAD_LIBS)
 loadgen_DEPENDENCIES := $(LIBPTLCTL)
 
 if EXT2FS_DEVEL
 loadgen_DEPENDENCIES := $(LIBPTLCTL)
 
 if EXT2FS_DEVEL
index a1b58f6..264ffcc 100644 (file)
@@ -208,6 +208,24 @@ command_t cmdlist[] = {
          "get the device info of a attached file\n"
          "usage: blockdev_info <device_name>"},
 
          "get the device info of a attached file\n"
          "usage: blockdev_info <device_name>"},
 
+        /* Pool commands */
+        {"===  Pools ==", jt_noop, 0, "pool management"},
+        {"pool_new", jt_pool_cmd, 0,
+         "add a new pool\n"
+         "usage pool_new <fsname>.<poolname>"},
+        {"pool_add", jt_pool_cmd, 0,
+         "add the named OSTs to the pool\n"
+         "usage pool_add <fsname>.<poolname> <ostname indexed list>"},
+        {"pool_remove", jt_pool_cmd, 0,
+         "remove the named OST from the pool\n"
+         "usage pool_remove <fsname>.<poolname> <ostname indexed list>"},
+        {"pool_destroy", jt_pool_cmd, 0,
+         "destroy a pool\n"
+         "usage pool_destroy <fsname>.<poolname>"},
+        {"pool_list", jt_pool_cmd, 0,
+         "list pools and pools members\n"
+         "usage pool_list  <fsname>[.<poolname>] | <pathname>"},
+
         /* Test only commands */
         {"==== testing (DANGEROUS) ====", jt_noop, 0, "testing (DANGEROUS)"},
         {"--threads", jt_opt_threads, 0,
         /* Test only commands */
         {"==== testing (DANGEROUS) ====", jt_noop, 0, "testing (DANGEROUS)"},
         {"--threads", jt_opt_threads, 0,
index 8d4246f..2479e93 100644 (file)
  * Author: Robert Read <rread@clusterfs.com>
  */
 
  * Author: Robert Read <rread@clusterfs.com>
  */
 
+/* for O_DIRECTORY */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
 #include <stdlib.h>
 #include <stdio.h>
 #include <getopt.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <getopt.h>
@@ -94,6 +99,7 @@ static int lfs_rsetfacl(int argc, char **argv);
 static int lfs_rgetfacl(int argc, char **argv);
 static int lfs_cp(int argc, char **argv);
 static int lfs_ls(int argc, char **argv);
 static int lfs_rgetfacl(int argc, char **argv);
 static int lfs_cp(int argc, char **argv);
 static int lfs_ls(int argc, char **argv);
+static int lfs_poollist(int argc, char **argv);
 
 /* all avaialable commands */
 command_t cmdlist[] = {
 
 /* all avaialable commands */
 command_t cmdlist[] = {
@@ -101,30 +107,34 @@ command_t cmdlist[] = {
          "Create a new file with a specific striping pattern or\n"
          "set the default striping pattern on an existing directory or\n"
          "delete the default striping pattern from an existing directory\n"
          "Create a new file with a specific striping pattern or\n"
          "set the default striping pattern on an existing directory or\n"
          "delete the default striping pattern from an existing directory\n"
-         "usage: setstripe <filename|dirname> <stripe_size> <stripe_index> <stripe_count>\n"
-         "       or \n"
-         "       setstripe <filename|dirname> [--size|-s stripe_size]\n"
-         "                                    [--index|-i stripe_index]\n"
-         "                                    [--count|-c stripe_count]\n"
+         "usage: setstripe [--size|-s stripe_size] [--offset|-o start_ost]\n"
+         "                 [--count|-c stripe_count] [--pool|-p pool_name]\n"
+         "                 <dir|filename>\n"
          "       or \n"
          "       or \n"
-         "       setstripe -d <dirname>   (to delete default striping)\n"
+         "       setstripe -d <dir>   (to delete default striping)\n"
          "\tstripe_size:  Number of bytes on each OST (0 filesystem default)\n"
          "\tstripe_size:  Number of bytes on each OST (0 filesystem default)\n"
-         "\t              Can be specified with k, m or g (in KB, MB and GB respectively)\n"
-         "\tstripe_index: OST index of first stripe (-1 filesystem default)\n"
-         "\tstripe_count: Number of OSTs to stripe over (0 default, -1 all)"},
+         "\t              Can be specified with k, m or g (in KB, MB and GB\n"
+         "\t              respectively)\n"
+         "\tstart_ost:    OST index of first stripe (-1 filesystem default)\n"
+         "\tstripe_count: Number of OSTs to stripe over (0 default, -1 all)\n"
+         "\tpool_name:    Name of OST pool"},
         {"getstripe", lfs_getstripe, 0,
         {"getstripe", lfs_getstripe, 0,
-         "To list the striping info for a given filename or files in a\n"
+         "To list the striping info for a given file or files in a\n"
          "directory or recursively for all files in a directory tree.\n"
          "usage: getstripe [--obd|-O <uuid>] [--quiet | -q] [--verbose | -v]\n"
          "                 [--recursive | -r] <dir|file> ..."},
          "directory or recursively for all files in a directory tree.\n"
          "usage: getstripe [--obd|-O <uuid>] [--quiet | -q] [--verbose | -v]\n"
          "                 [--recursive | -r] <dir|file> ..."},
+        {"poollist", lfs_poollist, 0,
+         "List pools or pool OSTs\n"
+         "usage: poollist <fsname>[.<poolname>] | <pathname>\n"},
         {"find", lfs_find, 0,
          "To find files that match given parameters recursively in a directory tree.\n"
         {"find", lfs_find, 0,
          "To find files that match given parameters recursively in a directory tree.\n"
-         "usage: find <dir/file> ... \n"
+         "usage: find <dir|file> ... \n"
          "     [[!] --atime|-A [+-]N] [[!] --mtime|-M [+-]N] [[!] --ctime|-C [+-]N]\n"
          "     [--maxdepth|-D N] [[!] --name|-n <pattern>] [--print0|-P]\n"
          "     [--print|-p] [--obd|-O <uuid[s]>] [[!] --size|-s [+-]N[bkMGTP]]\n"
          "     [[!] --type|-t <filetype>] [[!] --gid|-g N] [[!] --group|-G <name>]\n"
          "     [[!] --uid|-u N] [[!] --user|-U <name>]\n"
          "     [[!] --atime|-A [+-]N] [[!] --mtime|-M [+-]N] [[!] --ctime|-C [+-]N]\n"
          "     [--maxdepth|-D N] [[!] --name|-n <pattern>] [--print0|-P]\n"
          "     [--print|-p] [--obd|-O <uuid[s]>] [[!] --size|-s [+-]N[bkMGTP]]\n"
          "     [[!] --type|-t <filetype>] [[!] --gid|-g N] [[!] --group|-G <name>]\n"
          "     [[!] --uid|-u N] [[!] --user|-U <name>]\n"
+         "     [[!] --pool <name>]\n"
          "\t !: used before an option indicates 'NOT' the requested attribute\n"
          "\t -: used before an value indicates 'AT MOST' the requested value\n"
          "\t +: used before an option indicates 'AT LEAST' the requested value\n"},
          "\t !: used before an option indicates 'NOT' the requested attribute\n"
          "\t -: used before an value indicates 'AT MOST' the requested value\n"
          "\t +: used before an option indicates 'AT LEAST' the requested value\n"},
@@ -203,12 +213,15 @@ static int lfs_setstripe(int argc, char **argv)
         char *stripe_size_arg = NULL;
         char *stripe_off_arg = NULL;
         char *stripe_count_arg = NULL;
         char *stripe_size_arg = NULL;
         char *stripe_off_arg = NULL;
         char *stripe_count_arg = NULL;
+        char *pool_name_arg = NULL;
         unsigned long long size_units;
 
         struct option long_opts[] = {
                 {"size",        required_argument, 0, 's'},
                 {"count",       required_argument, 0, 'c'},
                 {"index",       required_argument, 0, 'i'},
         unsigned long long size_units;
 
         struct option long_opts[] = {
                 {"size",        required_argument, 0, 's'},
                 {"count",       required_argument, 0, 'c'},
                 {"index",       required_argument, 0, 'i'},
+                {"offset",      required_argument, 0, 'o'},
+                {"pool",        required_argument, 0, 'p'},
                 {"delete",      no_argument,       0, 'd'},
                 {0, 0, 0, 0}
         };
                 {"delete",      no_argument,       0, 'd'},
                 {0, 0, 0, 0}
         };
@@ -221,7 +234,7 @@ static int lfs_setstripe(int argc, char **argv)
                  * usage */
                 fname = argv[2];
                 optind = 2;
                  * usage */
                 fname = argv[2];
                 optind = 2;
-        } else if (argc == 5  && 
+        } else if (argc == 5  &&
                    (argv[2][0] != '-' || isdigit(argv[2][1])) &&
                    (argv[3][0] != '-' || isdigit(argv[3][1])) &&
                    (argv[4][0] != '-' || isdigit(argv[4][1])) ) {
                    (argv[2][0] != '-' || isdigit(argv[2][1])) &&
                    (argv[3][0] != '-' || isdigit(argv[3][1])) &&
                    (argv[4][0] != '-' || isdigit(argv[4][1])) ) {
@@ -234,7 +247,7 @@ static int lfs_setstripe(int argc, char **argv)
                 optind = 4;
         } else {
                 optind = 0;
                 optind = 4;
         } else {
                 optind = 0;
-                while ((c = getopt_long(argc, argv, "c:di:s:",
+                while ((c = getopt_long(argc, argv, "c:di:o:s:p:",
                                                 long_opts, NULL)) >= 0) {
                         switch (c) {
                         case 0:
                                                 long_opts, NULL)) >= 0) {
                         switch (c) {
                         case 0:
@@ -248,11 +261,15 @@ static int lfs_setstripe(int argc, char **argv)
                                 delete = 1;
                                 break;
                         case 'i':
                                 delete = 1;
                                 break;
                         case 'i':
+                        case 'o':
                                 stripe_off_arg = optarg;
                                 break;
                         case 's':
                                 stripe_size_arg = optarg;
                                 break;
                                 stripe_off_arg = optarg;
                                 break;
                         case 's':
                                 stripe_size_arg = optarg;
                                 break;
+                        case 'p':
+                                pool_name_arg = optarg;
+                                break;
                         case '?':
                                 return CMD_HELP;
                         default:
                         case '?':
                                 return CMD_HELP;
                         default:
@@ -268,11 +285,11 @@ static int lfs_setstripe(int argc, char **argv)
                         return CMD_HELP;
 
 
                         return CMD_HELP;
 
 
-                if (delete && 
-                    (stripe_size_arg != NULL || stripe_off_arg != NULL || 
-                     stripe_count_arg != NULL)) {
+                if (delete &&
+                    (stripe_size_arg != NULL || stripe_off_arg != NULL ||
+                     stripe_count_arg != NULL || pool_name_arg != NULL)) {
                         fprintf(stderr, "error: %s: cannot specify -d with "
                         fprintf(stderr, "error: %s: cannot specify -d with "
-                                        "-s, -c or -i options\n",
+                                        "-s, -c -o or -p options\n",
                                         argv[0]);
                         return CMD_HELP;
                 }
                                         argv[0]);
                         return CMD_HELP;
                 }
@@ -312,7 +329,12 @@ static int lfs_setstripe(int argc, char **argv)
                 }
         }
 
                 }
         }
 
-        result = llapi_file_create(fname, st_size, st_offset, st_count, 0);
+        if (pool_name_arg == NULL)
+                result = llapi_file_create(fname, st_size, st_offset, st_count, 0);
+        else
+                result = llapi_file_create_pool(fname, st_size, st_offset,
+                                                st_count, 0, pool_name_arg);
+
         if (result)
                 fprintf(stderr, "error: %s: create stripe file failed\n",
                                 argv[0]);
         if (result)
                 fprintf(stderr, "error: %s: create stripe file failed\n",
                                 argv[0]);
@@ -320,11 +342,19 @@ static int lfs_setstripe(int argc, char **argv)
         return result;
 }
 
         return result;
 }
 
+static int lfs_poollist(int argc, char **argv)
+{
+        if (argc != 2)
+                return CMD_HELP;
+
+        return llapi_poollist(argv[1]);
+}
+
 static int set_time(time_t *time, time_t *set, char *str)
 {
         time_t t;
         int res = 0;
 static int set_time(time_t *time, time_t *set, char *str)
 {
         time_t t;
         int res = 0;
-        
+
         if (str[0] == '+')
                 res = 1;
         else if (str[0] == '-')
         if (str[0] == '+')
                 res = 1;
         else if (str[0] == '-')
@@ -399,6 +429,7 @@ static int id2name(char **name, unsigned int id, int type)
         return 0;
 }
 
         return 0;
 }
 
+#define FIND_POOL_OPT 3
 static int lfs_find(int argc, char **argv)
 {
         int new_fashion = 1;
 static int lfs_find(int argc, char **argv)
 {
         int new_fashion = 1;
@@ -417,6 +448,8 @@ static int lfs_find(int argc, char **argv)
                 {"uid",       required_argument, 0, 'u'},
                 {"user",      required_argument, 0, 'U'},
                 {"name",      required_argument, 0, 'n'},
                 {"uid",       required_argument, 0, 'u'},
                 {"user",      required_argument, 0, 'U'},
                 {"name",      required_argument, 0, 'n'},
+                /* no short option for pool, p/P already used */
+                {"pool",      required_argument, 0, FIND_POOL_OPT},
                 /* --obd is considered as a new option. */
                 {"obd",       required_argument, 0, 'O'},
                 {"ost",       required_argument, 0, 'O'},
                 /* --obd is considered as a new option. */
                 {"obd",       required_argument, 0, 'O'},
                 {"ost",       required_argument, 0, 'O'},
@@ -522,8 +555,8 @@ static int lfs_find(int argc, char **argv)
                         new_fashion = 1;
                         param.gid = strtol(optarg, &endptr, 10);
                         if (optarg == endptr) {
                         new_fashion = 1;
                         param.gid = strtol(optarg, &endptr, 10);
                         if (optarg == endptr) {
-                               ret = name2id(&param.gid, optarg, GRPQUOTA);
-                               if (ret != 0) {
+                                ret = name2id(&param.gid, optarg, GRPQUOTA);
+                                if (ret != 0) {
                                         fprintf(stderr, "Group/GID: %s cannot "
                                                 "be found.\n", optarg);
                                         return -1;
                                         fprintf(stderr, "Group/GID: %s cannot "
                                                 "be found.\n", optarg);
                                         return -1;
@@ -546,8 +579,8 @@ static int lfs_find(int argc, char **argv)
                         new_fashion = 1;
                         param.uid = strtol(optarg, &endptr, 10);
                         if (optarg == endptr) {
                         new_fashion = 1;
                         param.uid = strtol(optarg, &endptr, 10);
                         if (optarg == endptr) {
-                               ret = name2id(&param.uid, optarg, USRQUOTA);
-                               if (ret != 0) {
+                                ret = name2id(&param.uid, optarg, USRQUOTA);
+                                if (ret != 0) {
                                         fprintf(stderr, "User/UID: %s cannot "
                                                 "be found.\n", optarg);
                                         return -1;
                                         fprintf(stderr, "User/UID: %s cannot "
                                                 "be found.\n", optarg);
                                         return -1;
@@ -556,6 +589,22 @@ static int lfs_find(int argc, char **argv)
                         param.exclude_uid = !!neg_opt;
                         param.check_uid = 1;
                         break;
                         param.exclude_uid = !!neg_opt;
                         param.check_uid = 1;
                         break;
+                case FIND_POOL_OPT:
+                        new_fashion = 1;
+                        if (strlen(optarg) > MAXPOOLNAME) {
+                                fprintf(stderr,
+                                        "Pool name %s is too long"
+                                        " (max is %d)\n", optarg,
+                                        MAXPOOLNAME);
+                                return -1;
+                        }
+                        /* we do check for empty pool because empty pool
+                         * is used to find V1 lov attributes */
+                        strncpy(param.poolname, optarg, MAXPOOLNAME);
+                        param.poolname[MAXPOOLNAME] = '\0';
+                        param.exclude_pool = !!neg_opt;
+                        param.check_pool = 1;
+                        break;
                 case 'n':
                         new_fashion = 1;
                         param.pattern = (char *)optarg;
                 case 'n':
                         new_fashion = 1;
                         param.pattern = (char *)optarg;
@@ -667,7 +716,7 @@ static int lfs_find(int argc, char **argv)
                         return CMD_HELP;
                 };
         }
                         return CMD_HELP;
                 };
         }
-        
+
         if (pathstart == -1) {
                 fprintf(stderr, "error: %s: no filename|pathname\n",
                         argv[0]);
         if (pathstart == -1) {
                 fprintf(stderr, "error: %s: no filename|pathname\n",
                         argv[0]);
@@ -689,7 +738,7 @@ static int lfs_find(int argc, char **argv)
                 if (!param.recursive && param.maxdepth == -1)
                         param.maxdepth = 1;
         }
                 if (!param.recursive && param.maxdepth == -1)
                         param.maxdepth = 1;
         }
-        
+
         do {
                 if (new_fashion)
                         ret = llapi_find(argv[pathstart], &param);
         do {
                 if (new_fashion)
                         ret = llapi_find(argv[pathstart], &param);
@@ -763,7 +812,7 @@ static int lfs_getstripe(int argc, char **argv)
         } while (++optind < argc && !rc);
 
         if (rc)
         } while (++optind < argc && !rc);
 
         if (rc)
-                fprintf(stderr, "error: %s failed for %s.\n", 
+                fprintf(stderr, "error: %s failed for %s.\n",
                         argv[0], argv[optind - 1]);
         return rc;
 }
                         argv[0], argv[optind - 1]);
         return rc;
 }
@@ -1440,7 +1489,7 @@ do {                                                                    \
  *        2. specifiers may be encountered multiple times (2s3s is 5 seconds)
  *        3. empty integer value is interpreted as 0
  */
  *        2. specifiers may be encountered multiple times (2s3s is 5 seconds)
  *        3. empty integer value is interpreted as 0
  */
+
 static unsigned long str2sec(const char* timestr) {
         const char spec[] = "smhdw";
         const unsigned long mult[] = {1, 60, 60*60, 24*60*60, 7*24*60*60};
 static unsigned long str2sec(const char* timestr) {
         const char spec[] = "smhdw";
         const unsigned long mult[] = {1, 60, 60*60, 24*60*60, 7*24*60*60};
@@ -1462,7 +1511,7 @@ static unsigned long str2sec(const char* timestr) {
 
                 v = strtoul(timestr, &tail, 10);
                 if (v == ULONG_MAX || *tail == '\0')
 
                 v = strtoul(timestr, &tail, 10);
                 if (v == ULONG_MAX || *tail == '\0')
-                        /* value too large (ULONG_MAX or more) 
+                        /* value too large (ULONG_MAX or more)
                            or missing specifier */
                         goto error;
 
                            or missing specifier */
                         goto error;
 
index 1adf2ec..f824151 100644 (file)
@@ -59,6 +59,7 @@
 #include <sys/types.h>
 #include <sys/syscall.h>
 #include <fnmatch.h>
 #include <sys/types.h>
 #include <sys/syscall.h>
 #include <fnmatch.h>
+#include <glob.h>
 #ifdef HAVE_LINUX_UNISTD_H
 #include <linux/unistd.h>
 #else
 #ifdef HAVE_LINUX_UNISTD_H
 #include <linux/unistd.h>
 #else
@@ -209,61 +210,69 @@ int parse_size(char *optarg, unsigned long long *size,
         return 0;
 }
 
         return 0;
 }
 
-int llapi_file_open(const char *name, int flags, int mode,
-                    unsigned long stripe_size, int stripe_offset,
-                    int stripe_count, int stripe_pattern)
+int llapi_stripe_limit_check(unsigned long stripe_size, int stripe_offset,
+                             int stripe_count, int stripe_pattern)
 {
 {
-        struct lov_user_md lum = { 0 };
-        int fd, rc = 0;
-        int isdir = 0;
         int page_size;
 
         int page_size;
 
-        fd = open(name, flags | O_LOV_DELAY_CREATE, mode);
-        if (fd < 0 && errno == EISDIR) {
-                fd = open(name, O_DIRECTORY | O_RDONLY);
-                isdir++;
-        }
-
-        if (fd < 0) {
-                rc = -errno;
-                llapi_err(LLAPI_MSG_ERROR, "unable to open '%s'", name);
-                return rc;
-        }
-
         /* 64 KB is the largest common page size I'm aware of (on ia64), but
          * check the local page size just in case. */
         page_size = LOV_MIN_STRIPE_SIZE;
         if (getpagesize() > page_size) {
                 page_size = getpagesize();
         /* 64 KB is the largest common page size I'm aware of (on ia64), but
          * check the local page size just in case. */
         page_size = LOV_MIN_STRIPE_SIZE;
         if (getpagesize() > page_size) {
                 page_size = getpagesize();
-                llapi_err_noerrno(LLAPI_MSG_WARN, 
+                llapi_err_noerrno(LLAPI_MSG_WARN,
                                   "warning: your page size (%u) is "
                                   "warning: your page size (%u) is "
-                                  "larger than expected (%u)", page_size, 
+                                  "larger than expected (%u)", page_size,
                                   LOV_MIN_STRIPE_SIZE);
         }
         if (stripe_size < 0 || (stripe_size & (LOV_MIN_STRIPE_SIZE - 1))) {
                                   LOV_MIN_STRIPE_SIZE);
         }
         if (stripe_size < 0 || (stripe_size & (LOV_MIN_STRIPE_SIZE - 1))) {
-                errno = rc = -EINVAL;
                 llapi_err(LLAPI_MSG_ERROR, "error: bad stripe_size %lu, "
                 llapi_err(LLAPI_MSG_ERROR, "error: bad stripe_size %lu, "
-                          "must be an even multiple of %d bytes", 
+                          "must be an even multiple of %d bytes",
                           stripe_size, page_size);
                           stripe_size, page_size);
-                goto out;
+                return -EINVAL;
         }
         if (stripe_offset < -1 || stripe_offset > MAX_OBD_DEVICES) {
         }
         if (stripe_offset < -1 || stripe_offset > MAX_OBD_DEVICES) {
-                errno = rc = -EINVAL;
-                llapi_err(LLAPI_MSG_ERROR, "error: bad stripe offset %d", 
+                llapi_err(LLAPI_MSG_ERROR, "error: bad stripe offset %d",
                           stripe_offset);
                           stripe_offset);
-                goto out;
+                return -EINVAL;
         }
         if (stripe_count < -1 || stripe_count > LOV_MAX_STRIPE_COUNT) {
         }
         if (stripe_count < -1 || stripe_count > LOV_MAX_STRIPE_COUNT) {
-                errno = rc = -EINVAL;
-                llapi_err(LLAPI_MSG_ERROR, "error: bad stripe count %d", 
+                llapi_err(LLAPI_MSG_ERROR, "error: bad stripe count %d",
                           stripe_count);
                           stripe_count);
-                goto out;
+                return -EINVAL;
         }
         if (stripe_count > 0 && (__u64)stripe_size * stripe_count > 0xffffffff){
         }
         if (stripe_count > 0 && (__u64)stripe_size * stripe_count > 0xffffffff){
-                errno = rc = -EINVAL;
                 llapi_err(LLAPI_MSG_ERROR, "error: stripe_size %lu * "
                 llapi_err(LLAPI_MSG_ERROR, "error: stripe_size %lu * "
-                          "stripe_count %u exceeds 4GB", stripe_size, 
+                          "stripe_count %u exceeds 4GB", stripe_size,
                           stripe_count);
                           stripe_count);
+                return -EINVAL;
+        }
+        return 0;
+}
+
+int llapi_file_open(const char *name, int flags, int mode,
+                    unsigned long stripe_size, int stripe_offset,
+                    int stripe_count, int stripe_pattern)
+{
+        struct lov_user_md lum = { 0 };
+        int fd, rc = 0;
+        int isdir = 0;
+
+        fd = open(name, flags | O_LOV_DELAY_CREATE, mode);
+        if (fd < 0 && errno == EISDIR) {
+                fd = open(name, O_DIRECTORY | O_RDONLY);
+                isdir++;
+        }
+
+        if (fd < 0) {
+                rc = -errno;
+                llapi_err(LLAPI_MSG_ERROR, "unable to open '%s'", name);
+                return rc;
+        }
+
+        if ((rc = llapi_stripe_limit_check(stripe_size, stripe_offset,
+                                           stripe_count, stripe_pattern)) != 0) {
+                errno = rc;
                 goto out;
         }
 
                 goto out;
         }
 
@@ -293,6 +302,74 @@ out:
         return fd;
 }
 
         return fd;
 }
 
+static int poolpath(char *fsname, char *pathname, char *pool_pathname);
+
+int llapi_file_open_pool(const char *name, int flags, int mode,
+                         unsigned long stripe_size, int stripe_offset,
+                         int stripe_count, int stripe_pattern, char *pool_name)
+{
+        struct lov_user_md_v3 lum = { 0 };
+        int fd, rc = 0;
+        int isdir = 0;
+        char fsname[MAX_OBD_NAME + 1], *ptr;
+
+        fd = open(name, flags | O_LOV_DELAY_CREATE, mode);
+        if (fd < 0 && errno == EISDIR) {
+                fd = open(name, O_DIRECTORY | O_RDONLY);
+                isdir++;
+        }
+
+        if (fd < 0) {
+                rc = -errno;
+                llapi_err(LLAPI_MSG_ERROR, "unable to open '%s'", name);
+                return rc;
+        }
+
+        if ((rc = llapi_stripe_limit_check(stripe_size, stripe_offset,
+                                           stripe_count, stripe_pattern)) != 0) {
+                errno = rc;
+                goto out;
+        }
+
+        /* in case user give the full pool name <fsname>.<poolname>, skip
+         * the fsname */
+        ptr = strchr(pool_name, '.');
+        if (ptr != NULL) {
+                strncpy(fsname, pool_name, ptr - pool_name);
+                fsname[ptr - pool_name] = '\0';
+                /* if fsname matches a fs skip it
+                 * if not keep the poolname as is */
+                if (poolpath(fsname, NULL, NULL) == 0)
+                        pool_name = ptr + 1;
+        }
+
+        /*  Initialize IOCTL striping pattern structure */
+        lum.lmm_magic = LOV_USER_MAGIC_V3;
+        lum.lmm_pattern = stripe_pattern;
+        lum.lmm_stripe_size = stripe_size;
+        lum.lmm_stripe_count = stripe_count;
+        lum.lmm_stripe_offset = stripe_offset;
+        strncpy(lum.lmm_pool_name, pool_name, MAXPOOLNAME);
+
+        if (ioctl(fd, LL_IOC_LOV_SETSTRIPE, &lum)) {
+                char *errmsg = "stripe already set";
+                rc = -errno;
+                if (errno != EEXIST && errno != EALREADY)
+                        errmsg = strerror(errno);
+
+                llapi_err_noerrno(LLAPI_MSG_ERROR,
+                                  "error on ioctl "LPX64" for '%s' (%d): %s",
+                                  (__u64)LL_IOC_LOV_SETSTRIPE, name, fd, errmsg);
+        }
+out:
+        if (rc) {
+                close(fd);
+                fd = rc;
+        }
+
+        return fd;
+}
+
 int llapi_file_create(const char *name, unsigned long stripe_size,
                       int stripe_offset, int stripe_count, int stripe_pattern)
 {
 int llapi_file_create(const char *name, unsigned long stripe_size,
                       int stripe_offset, int stripe_count, int stripe_pattern)
 {
@@ -307,6 +384,202 @@ int llapi_file_create(const char *name, unsigned long stripe_size,
         return 0;
 }
 
         return 0;
 }
 
+int llapi_file_create_pool(const char *name, unsigned long stripe_size,
+                           int stripe_offset, int stripe_count,
+                           int stripe_pattern, char *pool_name)
+{
+        int fd;
+
+        fd = llapi_file_open_pool(name, O_CREAT | O_WRONLY, 0644, stripe_size,
+                                  stripe_offset, stripe_count, stripe_pattern,
+                                  pool_name);
+        if (fd < 0)
+                return fd;
+
+        close(fd);
+        return 0;
+}
+
+
+static int print_pool_members(char *fs, char *pool_dir, char *pool_file)
+{
+        char path[PATH_MAX + 1];
+        char buf[1024];
+        FILE *fd;
+
+        llapi_printf(LLAPI_MSG_NORMAL, "Pool: %s.%s\n", fs, pool_file);
+        sprintf(path, "%s/%s", pool_dir, pool_file);
+        if ((fd = fopen(path, "r")) == NULL) {
+                llapi_err(LLAPI_MSG_ERROR, "Cannot open %s\n", path);
+                return -EINVAL;
+        }
+        while (fgets(buf, sizeof(buf), fd) != NULL)
+               llapi_printf(LLAPI_MSG_NORMAL, buf);
+
+        fclose(fd);
+        return 0;
+}
+
+/*
+ * search lustre fsname from pathname
+ *
+ */
+static int search_fsname(char *pathname, char *fsname)
+{
+        char *ptr;
+        FILE *fp;
+        struct mntent *mnt = NULL;
+
+        /* get the mount point */
+        fp = setmntent(MOUNTED, "r");
+        if (fp == NULL) {
+                 llapi_err(LLAPI_MSG_ERROR,
+                           "setmntent(%s) failed: %s:", MOUNTED,
+                           strerror (errno));
+                 return -EIO;
+        }
+        mnt = getmntent(fp);
+        while ((feof(fp) == 0) && ferror(fp) == 0) {
+                if (llapi_is_lustre_mnt(mnt)) {
+                        /* search by pathname */
+                        if (strncmp(mnt->mnt_dir, pathname,
+                                    strlen(mnt->mnt_dir)) == 0) {
+                                ptr = strchr(mnt->mnt_fsname, '/');
+                                if (ptr == NULL)
+                                        return -EINVAL;
+                                ptr++;
+                                strcpy(fsname, ptr);
+                                return 0;
+                        }
+                }
+                mnt = getmntent(fp);
+        }
+        endmntent(fp);
+        return -ENOENT;
+
+}
+
+/*
+ * find the pool directory path under /proc
+ * (can be also used to test if a fsname is known)
+ */
+static int poolpath(char *fsname, char *pathname, char *pool_pathname)
+{
+        int rc = 0;
+        glob_t glob_info;
+        char pattern[PATH_MAX + 1];
+        char buffer[PATH_MAX];
+
+        if (fsname == NULL) {
+                rc = search_fsname(pathname, buffer);
+                if (rc != 0)
+                        return rc;
+                fsname = buffer;
+                strcpy(pathname, fsname);
+        }
+
+        snprintf(pattern, PATH_MAX,
+                 "/proc/fs/lustre/lov/%s-*/pools",
+                 fsname);
+        rc = glob(pattern, GLOB_BRACE, NULL, &glob_info);
+        if (rc)
+                return -ENOENT;
+
+        if (glob_info.gl_pathc == 0) {
+                globfree(&glob_info);
+                return -ENOENT;
+        }
+
+        /* in fsname test mode, pool_pathname is NULL */
+        if (pool_pathname != NULL)
+                strcpy(pool_pathname, glob_info.gl_pathv[0]);
+
+        return 0;
+}
+
+int llapi_poollist(char *name)
+{
+        char *poolname;
+        char *fsname;
+        char rname[PATH_MAX + 1], pathname[PATH_MAX + 1];
+        char *ptr;
+        int rc = 0;
+
+        /* is name a pathname ? */
+        ptr = strchr(name, '/');
+        if (ptr != NULL) {
+                /* only absolute pathname is supported */
+                if (*name != '/')
+                        return -EINVAL;
+                if (!realpath(name, rname)) {
+                        rc = -errno;
+                        llapi_err(LLAPI_MSG_ERROR,
+                                  "llapi_poollist: invalid path '%s'",
+                                  name);
+                        return rc;
+                }
+
+                rc = poolpath(NULL, rname, pathname);
+                if (rc != 0) {
+                        errno = -rc;
+                        llapi_err(LLAPI_MSG_ERROR,
+                                  "llapi_poollist: '%s' is not"
+                                  " a Lustre filesystem",
+                                  name);
+                        return rc;
+                }
+                fsname = rname;
+                poolname = NULL;
+        } else {
+                /* name is FSNAME[.POOLNAME] */
+                fsname = name;
+                poolname = strchr(name, '.');
+                if (poolname != NULL) {
+                        *poolname = '\0';
+                        poolname++;
+                }
+                rc = poolpath(fsname, NULL, pathname);
+                if (rc != 0) {
+                        errno = -rc;
+                        llapi_err(LLAPI_MSG_ERROR,
+                                  "llapi_poollist: Lustre filesystem '%s'"
+                                  " not found", name);
+                        return rc;
+                }
+        }
+        if (rc != 0) {
+                errno = -rc;
+                llapi_err(LLAPI_MSG_ERROR,
+                          "llapi_poollist: Lustre filesystem '%s' not found",
+                          name);
+                return rc;
+        }
+
+        if (poolname != NULL) {
+                rc = print_pool_members(fsname, pathname, poolname);
+                poolname--;
+                *poolname = '.';
+        } else {
+                DIR *dir;
+                struct dirent *pool;
+
+                llapi_printf(LLAPI_MSG_NORMAL, "Pools from %s:\n", fsname);
+                if ((dir = opendir(pathname)) == NULL) {
+                        return -EINVAL;
+                }
+                while ((pool = readdir(dir)) != NULL) {
+                        if (!((pool->d_name[0] == '.') &&
+                              (pool->d_name[1] == '\0')) &&
+                            !((pool->d_name[0] == '.') &&
+                              (pool->d_name[1] == '.') &&
+                              (pool->d_name[2] == '\0')))
+                        llapi_printf(LLAPI_MSG_NORMAL, " %s.%s\n", fsname, pool->d_name);
+                }
+                closedir(dir);
+        }
+        return rc;
+}
+
 typedef int (semantic_func_t)(char *path, DIR *parent, DIR *d,
                               void *data, cfs_dirent_t *de);
 
 typedef int (semantic_func_t)(char *path, DIR *parent, DIR *d,
                               void *data, cfs_dirent_t *de);
 
@@ -315,9 +588,9 @@ typedef int (semantic_func_t)(char *path, DIR *parent, DIR *d,
 
 static int common_param_init(struct find_param *param)
 {
 
 static int common_param_init(struct find_param *param)
 {
-        param->lumlen = lov_mds_md_size(MAX_LOV_UUID_COUNT);
+        param->lumlen = lov_mds_md_size(MAX_LOV_UUID_COUNT, LOV_MAGIC_V3);
         if ((param->lmd = malloc(sizeof(lstat_t) + param->lumlen)) == NULL) {
         if ((param->lmd = malloc(sizeof(lstat_t) + param->lumlen)) == NULL) {
-                llapi_err(LLAPI_MSG_ERROR, 
+                llapi_err(LLAPI_MSG_ERROR,
                           "error: allocation of %d bytes for ioctl",
                           sizeof(lstat_t) + param->lumlen);
                 return -ENOMEM;
                           "error: allocation of %d bytes for ioctl",
                           sizeof(lstat_t) + param->lumlen);
                 return -ENOMEM;
@@ -338,7 +611,7 @@ static void find_param_fini(struct find_param *param)
                 free(param->lmd);
 }
 
                 free(param->lmd);
 }
 
-int llapi_file_get_lov_fuuid(int fd, struct obd_uuid *lov_name)
+int llapi_file_fget_lov_uuid(int fd, struct obd_uuid *lov_name)
 {
         int rc = ioctl(fd, OBD_IOC_GETNAME, lov_name);
         if (rc) {
 {
         int rc = ioctl(fd, OBD_IOC_GETNAME, lov_name);
         if (rc) {
@@ -355,11 +628,11 @@ int llapi_file_get_lov_uuid(const char *path, struct obd_uuid *lov_uuid)
         fd = open(path, O_RDONLY);
         if (fd < 0) {
                 rc = errno;
         fd = open(path, O_RDONLY);
         if (fd < 0) {
                 rc = errno;
-                llapi_err(LLAPI_MSG_ERROR, "error opening %s\n", path);
+                llapi_err(LLAPI_MSG_ERROR, "error opening %s", path);
                 return rc;
         }
 
                 return rc;
         }
 
-        rc = llapi_file_get_lov_fuuid(fd, lov_uuid);
+        rc = llapi_file_fget_lov_uuid(fd, lov_uuid);
 
         close(fd);
 
 
         close(fd);
 
@@ -380,7 +653,7 @@ int llapi_lov_get_uuids(int fd, struct obd_uuid *uuidp, int *ost_count)
         int rc = 0, index = 0;
 
         /* Get the lov name */
         int rc = 0, index = 0;
 
         /* Get the lov name */
-        rc = llapi_file_get_lov_fuuid(fd, &lov_name);
+        rc = llapi_file_fget_lov_uuid(fd, &lov_name);
         if (rc)
                 return rc;
 
         if (rc)
                 return rc;
 
@@ -422,11 +695,11 @@ static int setup_obd_uuid(DIR *dir, char *dname, struct find_param *param)
         int rc = 0, index;
 
         /* Get the lov name */
         int rc = 0, index;
 
         /* Get the lov name */
-        rc = llapi_file_get_lov_fuuid(dirfd(dir), &lov_uuid);
+        rc = llapi_file_fget_lov_uuid(dirfd(dir), &lov_uuid);
         if (rc) {
                 if (errno != ENOTTY) {
                         rc = errno;
         if (rc) {
                 if (errno != ENOTTY) {
                         rc = errno;
-                        llapi_err(LLAPI_MSG_ERROR, 
+                        llapi_err(LLAPI_MSG_ERROR,
                                   "error: can't get lov name: %s", dname);
                 } else {
                         rc = 0;
                                   "error: can't get lov name: %s", dname);
                 } else {
                         rc = 0;
@@ -470,7 +743,7 @@ static int setup_obd_uuid(DIR *dir, char *dname, struct find_param *param)
 
         if (!param->quiet && param->obduuid &&
             (param->obdindex == OBD_NOT_FOUND)) {
 
         if (!param->quiet && param->obduuid &&
             (param->obdindex == OBD_NOT_FOUND)) {
-                llapi_err_noerrno(LLAPI_MSG_ERROR, 
+                llapi_err_noerrno(LLAPI_MSG_ERROR,
                                   "error: %s: unknown obduuid: %s",
                                   __FUNCTION__, param->obduuid->uuid);
                 //rc = EINVAL;
                                   "error: %s: unknown obduuid: %s",
                                   __FUNCTION__, param->obduuid->uuid);
                 //rc = EINVAL;
@@ -539,14 +812,16 @@ retry_get_uuids:
         return 0;
 }
 
         return 0;
 }
 
-void lov_dump_user_lmm_v1(struct lov_user_md_v1 *lum, char *path, int is_dir,
-                          int obdindex, int quiet, int header, int body)
+void lov_dump_user_lmm_v1v3(struct lov_user_md *lum, char *pool_name,
+                            struct lov_user_ost_data_v1 *objects,
+                            char *path, int is_dir,
+                            int obdindex, int quiet, int header, int body)
 {
         int i, obdstripe = 0;
 
         if (obdindex != OBD_NOT_FOUND) {
                 for (i = 0; !is_dir && i < lum->lmm_stripe_count; i++) {
 {
         int i, obdstripe = 0;
 
         if (obdindex != OBD_NOT_FOUND) {
                 for (i = 0; !is_dir && i < lum->lmm_stripe_count; i++) {
-                        if (obdindex == lum->lmm_objects[i].l_ost_idx) {
+                        if (obdindex == objects[i].l_ost_idx) {
                                 llapi_printf(LLAPI_MSG_NORMAL, "%s\n", path);
                                 obdstripe = 1;
                                 break;
                                 llapi_printf(LLAPI_MSG_NORMAL, "%s\n", path);
                                 obdstripe = 1;
                                 break;
@@ -564,44 +839,49 @@ void lov_dump_user_lmm_v1(struct lov_user_md_v1 *lum, char *path, int is_dir,
                                 llapi_printf(LLAPI_MSG_NORMAL, "(Default) ");
                                 lum->lmm_object_gr = LOV_OBJECT_GROUP_CLEAR;
                         }
                                 llapi_printf(LLAPI_MSG_NORMAL, "(Default) ");
                                 lum->lmm_object_gr = LOV_OBJECT_GROUP_CLEAR;
                         }
-                        llapi_printf(LLAPI_MSG_NORMAL, 
+                        llapi_printf(LLAPI_MSG_NORMAL,
                                      "stripe_count: %d stripe_size: %u "
                                      "stripe_count: %d stripe_size: %u "
-                                     "stripe_offset: %d\n",
+                                     "stripe_offset: %d%s%s\n",
                                      lum->lmm_stripe_count == (__u16)-1 ? -1 :
                                      lum->lmm_stripe_count == (__u16)-1 ? -1 :
-                                     lum->lmm_stripe_count,
+                                        lum->lmm_stripe_count,
                                      lum->lmm_stripe_size,
                                      lum->lmm_stripe_offset == (__u16)-1 ? -1 :
                                      lum->lmm_stripe_size,
                                      lum->lmm_stripe_offset == (__u16)-1 ? -1 :
-                                     lum->lmm_stripe_offset);
+                                        lum->lmm_stripe_offset,
+                                     pool_name != NULL ? " pool: " : "",
+                                     pool_name != NULL ? pool_name : "");
                 }
                 return;
         }
 
         if (header && (obdstripe == 1)) {
                 }
                 return;
         }
 
         if (header && (obdstripe == 1)) {
-                llapi_printf(LLAPI_MSG_NORMAL, 
+                llapi_printf(LLAPI_MSG_NORMAL,
                              "lmm_magic:          0x%08X\n",  lum->lmm_magic);
                              "lmm_magic:          0x%08X\n",  lum->lmm_magic);
-                llapi_printf(LLAPI_MSG_NORMAL, 
+                llapi_printf(LLAPI_MSG_NORMAL,
                              "lmm_object_gr:      "LPX64"\n", lum->lmm_object_gr);
                              "lmm_object_gr:      "LPX64"\n", lum->lmm_object_gr);
-                llapi_printf(LLAPI_MSG_NORMAL, 
+                llapi_printf(LLAPI_MSG_NORMAL,
                              "lmm_object_id:      "LPX64"\n", lum->lmm_object_id);
                              "lmm_object_id:      "LPX64"\n", lum->lmm_object_id);
-                llapi_printf(LLAPI_MSG_NORMAL, 
+                llapi_printf(LLAPI_MSG_NORMAL,
                              "lmm_stripe_count:   %u\n", (int)lum->lmm_stripe_count);
                              "lmm_stripe_count:   %u\n", (int)lum->lmm_stripe_count);
-                llapi_printf(LLAPI_MSG_NORMAL, 
+                llapi_printf(LLAPI_MSG_NORMAL,
                              "lmm_stripe_size:    %u\n",      lum->lmm_stripe_size);
                              "lmm_stripe_size:    %u\n",      lum->lmm_stripe_size);
-                llapi_printf(LLAPI_MSG_NORMAL, 
+                llapi_printf(LLAPI_MSG_NORMAL,
                              "lmm_stripe_pattern: %x\n",      lum->lmm_pattern);
                              "lmm_stripe_pattern: %x\n",      lum->lmm_pattern);
+                if (pool_name != NULL)
+                        llapi_printf(LLAPI_MSG_NORMAL,
+                                     "lmm_pool_name:      %s\n",      pool_name);
         }
 
         if (body) {
                 if ((!quiet) && (obdstripe == 1))
         }
 
         if (body) {
                 if ((!quiet) && (obdstripe == 1))
-                        llapi_printf(LLAPI_MSG_NORMAL, 
+                        llapi_printf(LLAPI_MSG_NORMAL,
                                      "\tobdidx\t\t objid\t\tobjid\t\t group\n");
 
                 for (i = 0; i < lum->lmm_stripe_count; i++) {
                                      "\tobdidx\t\t objid\t\tobjid\t\t group\n");
 
                 for (i = 0; i < lum->lmm_stripe_count; i++) {
-                        int idx = lum->lmm_objects[i].l_ost_idx;
-                        long long oid = lum->lmm_objects[i].l_object_id;
-                        long long gr = lum->lmm_objects[i].l_object_gr;
+                        int idx = objects[i].l_ost_idx;
+                        long long oid = objects[i].l_object_id;
+                        long long gr = objects[i].l_object_gr;
                         if ((obdindex == OBD_NOT_FOUND) || (obdindex == idx))
                         if ((obdindex == OBD_NOT_FOUND) || (obdindex == idx))
-                                llapi_printf(LLAPI_MSG_NORMAL, 
+                                llapi_printf(LLAPI_MSG_NORMAL,
                                              "\t%6u\t%14llu\t%#13llx\t%14llu%s\n",
                                              idx, oid, oid, gr,
                                              obdindex == idx ? " *" : "");
                                              "\t%6u\t%14llu\t%#13llx\t%14llu%s\n",
                                              idx, oid, oid, gr,
                                              obdindex == idx ? " *" : "");
@@ -631,13 +911,13 @@ void lov_dump_user_lmm_join(struct lov_user_md_v1 *lum, char *path,
         }
 
         if (header && obdstripe == 1) {
         }
 
         if (header && obdstripe == 1) {
-                llapi_printf(LLAPI_MSG_NORMAL, "lmm_magic:          0x%08X\n",  
+                llapi_printf(LLAPI_MSG_NORMAL, "lmm_magic:          0x%08X\n",
                              lumj->lmm_magic);
                              lumj->lmm_magic);
-                llapi_printf(LLAPI_MSG_NORMAL, "lmm_object_gr:      "LPX64"\n", 
+                llapi_printf(LLAPI_MSG_NORMAL, "lmm_object_gr:      "LPX64"\n",
                              lumj->lmm_object_gr);
                              lumj->lmm_object_gr);
-                llapi_printf(LLAPI_MSG_NORMAL, "lmm_object_id:      "LPX64"\n", 
+                llapi_printf(LLAPI_MSG_NORMAL, "lmm_object_id:      "LPX64"\n",
                              lumj->lmm_object_id);
                              lumj->lmm_object_id);
-                llapi_printf(LLAPI_MSG_NORMAL, "lmm_stripe_count:   %u\n", 
+                llapi_printf(LLAPI_MSG_NORMAL, "lmm_stripe_count:   %u\n",
                              (int)lumj->lmm_stripe_count);
                 llapi_printf(LLAPI_MSG_NORMAL, "lmm_stripe_size:    %u\n",
                              lumj->lmm_stripe_size);
                              (int)lumj->lmm_stripe_count);
                 llapi_printf(LLAPI_MSG_NORMAL, "lmm_stripe_size:    %u\n",
                              lumj->lmm_stripe_size);
@@ -650,7 +930,7 @@ void lov_dump_user_lmm_join(struct lov_user_md_v1 *lum, char *path,
         if (body) {
                 unsigned long long start = -1, end = 0;
                 if (!quiet && obdstripe == 1)
         if (body) {
                 unsigned long long start = -1, end = 0;
                 if (!quiet && obdstripe == 1)
-                        llapi_printf(LLAPI_MSG_NORMAL, 
+                        llapi_printf(LLAPI_MSG_NORMAL,
                                      "joined\tobdidx\t\t objid\t\tobjid\t\t group"
                                      "\t\tstart\t\tend\n");
                 for (i = 0; i < lumj->lmm_stripe_count; i++) {
                                      "joined\tobdidx\t\t objid\t\tobjid\t\t group"
                                      "\t\tstart\t\tend\n");
                 for (i = 0; i < lumj->lmm_stripe_count; i++) {
@@ -658,7 +938,7 @@ void lov_dump_user_lmm_join(struct lov_user_md_v1 *lum, char *path,
                         long long oid = lumj->lmm_objects[i].l_object_id;
                         long long gr = lumj->lmm_objects[i].l_object_gr;
                         if (obdindex == OBD_NOT_FOUND || obdindex == idx)
                         long long oid = lumj->lmm_objects[i].l_object_id;
                         long long gr = lumj->lmm_objects[i].l_object_gr;
                         if (obdindex == OBD_NOT_FOUND || obdindex == idx)
-                                llapi_printf(LLAPI_MSG_NORMAL, 
+                                llapi_printf(LLAPI_MSG_NORMAL,
                                              "\t%6u\t%14llu\t%#13llx\t%14llu%s",
                                              idx, oid, oid, gr,
                                              obdindex == idx ? " *" : "");
                                              "\t%6u\t%14llu\t%#13llx\t%14llu%s",
                                              idx, oid, oid, gr,
                                              obdindex == idx ? " *" : "");
@@ -668,10 +948,10 @@ void lov_dump_user_lmm_join(struct lov_user_md_v1 *lum, char *path,
                                 llapi_printf(LLAPI_MSG_NORMAL, "\t%14llu", start);
                                 end = lumj->lmm_objects[i].l_extent_end;
                                 if (end == (unsigned long long)-1)
                                 llapi_printf(LLAPI_MSG_NORMAL, "\t%14llu", start);
                                 end = lumj->lmm_objects[i].l_extent_end;
                                 if (end == (unsigned long long)-1)
-                                        llapi_printf(LLAPI_MSG_NORMAL, 
+                                        llapi_printf(LLAPI_MSG_NORMAL,
                                                      "\t\tEOF\n");
                                 else
                                                      "\t\tEOF\n");
                                 else
-                                        llapi_printf(LLAPI_MSG_NORMAL, 
+                                        llapi_printf(LLAPI_MSG_NORMAL,
                                                      "\t\t%llu\n", end);
                         } else {
                                 llapi_printf(LLAPI_MSG_NORMAL, "\t\t\t\t\n");
                                                      "\t\t%llu\n", end);
                         } else {
                                 llapi_printf(LLAPI_MSG_NORMAL, "\t\t\t\t\n");
@@ -686,10 +966,12 @@ void llapi_lov_dump_user_lmm(struct find_param *param,
 {
         switch(*(__u32 *)&param->lmd->lmd_lmm) { /* lum->lmm_magic */
         case LOV_USER_MAGIC_V1:
 {
         switch(*(__u32 *)&param->lmd->lmd_lmm) { /* lum->lmm_magic */
         case LOV_USER_MAGIC_V1:
-                lov_dump_user_lmm_v1(&param->lmd->lmd_lmm, path, is_dir,
-                                      param->obdindex, param->quiet,
-                                      param->verbose,
-                                      (param->verbose || !param->obduuid));
+                lov_dump_user_lmm_v1v3(&param->lmd->lmd_lmm, NULL,
+                                       param->lmd->lmd_lmm.lmm_objects,
+                                       path, is_dir,
+                                       param->obdindex, param->quiet,
+                                       param->verbose,
+                                       (param->verbose || !param->obduuid));
                 break;
         case LOV_USER_MAGIC_JOIN:
                 lov_dump_user_lmm_join(&param->lmd->lmd_lmm, path, is_dir,
                 break;
         case LOV_USER_MAGIC_JOIN:
                 lov_dump_user_lmm_join(&param->lmd->lmd_lmm, path, is_dir,
@@ -697,10 +979,28 @@ void llapi_lov_dump_user_lmm(struct find_param *param,
                                        param->verbose,
                                        (param->verbose || !param->obduuid));
                 break;
                                        param->verbose,
                                        (param->verbose || !param->obduuid));
                 break;
+        case LOV_USER_MAGIC_V3: {
+                char pool_name[MAXPOOLNAME + 1];
+                struct lov_user_ost_data_v1 *objects;
+
+                strncpy(pool_name,
+                        ((struct lov_user_md_v3 *)(&param->lmd->lmd_lmm))->lmm_pool_name,
+                        MAXPOOLNAME);
+                pool_name[MAXPOOLNAME] = '\0';
+                objects = ((struct lov_user_md_v3 *)(&param->lmd->lmd_lmm))->lmm_objects;
+                lov_dump_user_lmm_v1v3(&param->lmd->lmd_lmm, pool_name,
+                                      objects, path, is_dir,
+                                      param->obdindex, param->quiet,
+                                      param->verbose,
+                                      (param->verbose || !param->obduuid));
+                break;
+        }
         default:
         default:
-                llapi_printf(LLAPI_MSG_NORMAL, 
-                             "unknown lmm_magic:  %#x (expecting %#x)\n",
-                       *(__u32 *)&param->lmd->lmd_lmm, LOV_USER_MAGIC_V1);
+                llapi_printf(LLAPI_MSG_NORMAL, "unknown lmm_magic:  %#x "
+                             "(expecting one of %#x %#x %#x)\n",
+                             *(__u32 *)&param->lmd->lmd_lmm,
+                             LOV_USER_MAGIC_V1, LOV_USER_MAGIC_JOIN,
+                             LOV_USER_MAGIC_V3);
                 return;
         }
 }
                 return;
         }
 }
@@ -785,7 +1085,8 @@ int llapi_mds_getfileinfo(char *path, DIR *parent,
 
         fname = (fname == NULL ? path : fname + 1);
         /* retrieve needed file info */
 
         fname = (fname == NULL ? path : fname + 1);
         /* retrieve needed file info */
-        strncpy((char *)lmd, fname, lov_mds_md_size(MAX_LOV_UUID_COUNT));
+        strncpy((char *)lmd, fname,
+                lov_mds_md_size(MAX_LOV_UUID_COUNT, LOV_MAGIC));
         ret = ioctl(dirfd(parent), IOC_MDC_GETFILEINFO, (void *)lmd);
 
         if (ret) {
         ret = ioctl(dirfd(parent), IOC_MDC_GETFILEINFO, (void *)lmd);
 
         if (ret) {
@@ -794,18 +1095,18 @@ int llapi_mds_getfileinfo(char *path, DIR *parent,
                          * Do the regular lstat(2) instead. */
                         ret = lstat_f(path, st);
                         if (ret) {
                          * Do the regular lstat(2) instead. */
                         ret = lstat_f(path, st);
                         if (ret) {
-                                llapi_err(LLAPI_MSG_ERROR, 
+                                llapi_err(LLAPI_MSG_ERROR,
                                           "error: %s: lstat failed for %s",
                                           __FUNCTION__, path);
                                 return ret;
                         }
                 } else if (errno == ENOENT) {
                                           "error: %s: lstat failed for %s",
                                           __FUNCTION__, path);
                                 return ret;
                         }
                 } else if (errno == ENOENT) {
-                        llapi_err(LLAPI_MSG_WARN, 
-                                  "warning: %s: %s does not exist", 
+                        llapi_err(LLAPI_MSG_WARN,
+                                  "warning: %s: %s does not exist",
                                   __FUNCTION__, path);
                         return -ENOENT;
                 } else {
                                   __FUNCTION__, path);
                         return -ENOENT;
                 } else {
-                        llapi_err(LLAPI_MSG_ERROR, 
+                        llapi_err(LLAPI_MSG_ERROR,
                                   "error: %s: IOC_MDC_GETFILEINFO failed for %s",
                                   __FUNCTION__, path);
                         return ret;
                                   "error: %s: IOC_MDC_GETFILEINFO failed for %s",
                                   __FUNCTION__, path);
                         return ret;
@@ -894,7 +1195,7 @@ static int llapi_semantic_traverse(char *path, int size, DIR *parent,
 
                 switch (dent->d_type) {
                 case DT_UNKNOWN:
 
                 switch (dent->d_type) {
                 case DT_UNKNOWN:
-                        llapi_err(LLAPI_MSG_ERROR, 
+                        llapi_err(LLAPI_MSG_ERROR,
                                   "error: %s: '%s' is UNKNOWN type %d",
                                   __FUNCTION__, dent->d_name, dent->d_type);
                         break;
                                   "error: %s: '%s' is UNKNOWN type %d",
                                   __FUNCTION__, dent->d_name, dent->d_type);
                         break;
@@ -1089,13 +1390,13 @@ static int cb_find_init(char *path, DIR *parent, DIR *dir,
                         lustre_fs = 0;
                         ret = lstat_f(path, st);
                         if (ret) {
                         lustre_fs = 0;
                         ret = lstat_f(path, st);
                         if (ret) {
-                                llapi_err(LLAPI_MSG_ERROR, 
+                                llapi_err(LLAPI_MSG_ERROR,
                                           "error: %s: lstat failed for %s",
                                           __FUNCTION__, path);
                                 return ret;
                         }
                 } else if (errno == ENOENT) {
                                           "error: %s: lstat failed for %s",
                                           __FUNCTION__, path);
                                 return ret;
                         }
                 } else if (errno == ENOENT) {
-                        llapi_err(LLAPI_MSG_WARN, 
+                        llapi_err(LLAPI_MSG_WARN,
                                   "warning: %s: %s does not exist",
                                   __FUNCTION__, path);
                         goto decided;
                                   "warning: %s: %s does not exist",
                                   __FUNCTION__, path);
                         goto decided;
@@ -1155,11 +1456,21 @@ static int cb_find_init(char *path, DIR *parent, DIR *dir,
                         goto decided;
                 } else {
                         int i, j;
                         goto decided;
                 } else {
                         int i, j;
+                        struct lov_user_ost_data_v1 *lmm_objects;
+
+                        if (param->lmd->lmd_lmm.lmm_magic ==
+                            LOV_USER_MAGIC_V3) {
+                                lmm_objects =
+                                 ((struct lov_user_md_v3 *)(&(param->lmd->lmd_lmm)))->lmm_objects;
+                        } else {
+                                lmm_objects = param->lmd->lmd_lmm.lmm_objects;
+                        }
+
                         for (i = 0;
                              i < param->lmd->lmd_lmm.lmm_stripe_count; i++) {
                                 for (j = 0; j < param->num_obds; j++) {
                                         if (param->obdindexes[j] ==
                         for (i = 0;
                              i < param->lmd->lmd_lmm.lmm_stripe_count; i++) {
                                 for (j = 0; j < param->num_obds; j++) {
                                         if (param->obdindexes[j] ==
-                                            param->lmd->lmd_lmm.lmm_objects[i].l_ost_idx)
+                                            lmm_objects[i].l_ost_idx)
                                                 goto obd_matches;
                                 }
                         }
                                                 goto obd_matches;
                                 }
                         }
@@ -1189,6 +1500,23 @@ static int cb_find_init(char *path, DIR *parent, DIR *dir,
                 }
         }
 
                 }
         }
 
+        if (param->check_pool) {
+                /* empty requested pool is taken as no pool search => V1 */
+                if (((param->lmd->lmd_lmm.lmm_magic == LOV_USER_MAGIC_V1) &&
+                     (param->poolname[0] == '\0')) ||
+                    ((param->lmd->lmd_lmm.lmm_magic == LOV_USER_MAGIC_V3) &&
+                     (strncmp(((struct lov_user_md_v3 *)(&(param->lmd->lmd_lmm)))->lmm_pool_name,
+                              param->poolname, MAXPOOLNAME) == 0)) ||
+                    ((param->lmd->lmd_lmm.lmm_magic == LOV_USER_MAGIC_V3) &&
+                     (strcmp(param->poolname, "*") == 0))) {
+                        if (param->exclude_pool)
+                                goto decided;
+                } else {
+                        if (!param->exclude_pool)
+                                goto decided;
+                }
+        }
+
         /* Check the time on mds. */
         if (!decision) {
                 int for_mds;
         /* Check the time on mds. */
         if (!decision) {
                 int for_mds;
@@ -1208,7 +1536,7 @@ obd_matches:
                 if (param->obdindex != OBD_NOT_FOUND) {
                         /* Check whether the obd is active or not, if it is
                          * not active, just print the object affected by this
                 if (param->obdindex != OBD_NOT_FOUND) {
                         /* Check whether the obd is active or not, if it is
                          * not active, just print the object affected by this
-                         * failed ost 
+                         * failed ost
                          * */
                         struct obd_statfs stat_buf;
                         struct obd_uuid uuid_buf;
                          * */
                         struct obd_statfs stat_buf;
                         struct obd_uuid uuid_buf;
@@ -1216,15 +1544,15 @@ obd_matches:
                         memset(&stat_buf, 0, sizeof(struct obd_statfs));
                         memset(&uuid_buf, 0, sizeof(struct obd_uuid));
                         ret = llapi_obd_statfs(path, LL_STATFS_LOV,
                         memset(&stat_buf, 0, sizeof(struct obd_statfs));
                         memset(&uuid_buf, 0, sizeof(struct obd_uuid));
                         ret = llapi_obd_statfs(path, LL_STATFS_LOV,
-                                               param->obdindex, &stat_buf, 
+                                               param->obdindex, &stat_buf,
                                                &uuid_buf);
                         if (ret) {
                                                &uuid_buf);
                         if (ret) {
-                                if (ret == -ENODATA || ret == -ENODEV 
+                                if (ret == -ENODATA || ret == -ENODEV
                                     || ret == -EIO)
                                         errno = EIO;
                                     || ret == -EIO)
                                         errno = EIO;
-                                llapi_printf(LLAPI_MSG_NORMAL, 
+                                llapi_printf(LLAPI_MSG_NORMAL,
                                              "obd_uuid: %s failed %s ",
                                              "obd_uuid: %s failed %s ",
-                                             param->obduuid->uuid, 
+                                             param->obduuid->uuid,
                                              strerror(errno));
                                 goto print_path;
                         }
                                              strerror(errno));
                                 goto print_path;
                         }
@@ -1239,12 +1567,12 @@ obd_matches:
 
                 if (ret) {
                         if (errno == ENOENT) {
 
                 if (ret) {
                         if (errno == ENOENT) {
-                                llapi_err(LLAPI_MSG_ERROR, 
+                                llapi_err(LLAPI_MSG_ERROR,
                                           "warning: %s: %s does not exist",
                                           __FUNCTION__, path);
                                 goto decided;
                         } else {
                                           "warning: %s: %s does not exist",
                                           __FUNCTION__, path);
                                 goto decided;
                         } else {
-                                llapi_err(LLAPI_MSG_ERROR, 
+                                llapi_err(LLAPI_MSG_ERROR,
                                           "%s: IOC_LOV_GETINFO on %s failed",
                                           __FUNCTION__, path);
                                 return ret;
                                           "%s: IOC_LOV_GETINFO on %s failed",
                                           __FUNCTION__, path);
                                 return ret;
@@ -1350,20 +1678,20 @@ static int cb_getstripe(char *path, DIR *parent, DIR *d, void *data,
         if (ret) {
                 if (errno == ENODATA) {
                         if (!param->obduuid && !param->quiet)
         if (ret) {
                 if (errno == ENODATA) {
                         if (!param->obduuid && !param->quiet)
-                                llapi_printf(LLAPI_MSG_NORMAL, 
+                                llapi_printf(LLAPI_MSG_NORMAL,
                                              "%s has no stripe info\n", path);
                         goto out;
                 } else if (errno == ENOTTY) {
                                              "%s has no stripe info\n", path);
                         goto out;
                 } else if (errno == ENOTTY) {
-                        llapi_err(LLAPI_MSG_ERROR, 
+                        llapi_err(LLAPI_MSG_ERROR,
                                   "%s: '%s' not on a Lustre fs?",
                                   __FUNCTION__, path);
                 } else if (errno == ENOENT) {
                                   "%s: '%s' not on a Lustre fs?",
                                   __FUNCTION__, path);
                 } else if (errno == ENOENT) {
-                        llapi_err(LLAPI_MSG_WARN, 
+                        llapi_err(LLAPI_MSG_WARN,
                                   "warning: %s: %s does not exist",
                                   __FUNCTION__, path);
                         goto out;
                 } else {
                                   "warning: %s: %s does not exist",
                                   __FUNCTION__, path);
                         goto out;
                 } else {
-                        llapi_err(LLAPI_MSG_ERROR, 
+                        llapi_err(LLAPI_MSG_ERROR,
                                   "error: %s: %s failed for %s",
                                    __FUNCTION__, d ? "LL_IOC_LOV_GETSTRIPE" :
                                   "IOC_MDC_GETFILESTRIPE", path);
                                   "error: %s: %s failed for %s",
                                    __FUNCTION__, d ? "LL_IOC_LOV_GETSTRIPE" :
                                   "IOC_MDC_GETFILESTRIPE", path);
@@ -1388,7 +1716,7 @@ int llapi_getstripe(char *path, struct find_param *param)
         int ret = 0, len = strlen(path);
 
         if (len > PATH_MAX) {
         int ret = 0, len = strlen(path);
 
         if (len > PATH_MAX) {
-                llapi_err(LLAPI_MSG_ERROR, 
+                llapi_err(LLAPI_MSG_ERROR,
                           "%s: Path name '%s' is too long",
                           __FUNCTION__, path);
                 return -EINVAL;
                           "%s: Path name '%s' is too long",
                           __FUNCTION__, path);
                 return -EINVAL;
@@ -1434,7 +1762,7 @@ int llapi_obd_statfs(char *path, __u32 type, __u32 index,
         data.ioc_plen2 = sizeof(struct obd_uuid);
 
         if ((rc = obd_ioctl_pack(&data, &rawbuf, sizeof(raw))) != 0) {
         data.ioc_plen2 = sizeof(struct obd_uuid);
 
         if ((rc = obd_ioctl_pack(&data, &rawbuf, sizeof(raw))) != 0) {
-                llapi_err(LLAPI_MSG_ERROR, 
+                llapi_err(LLAPI_MSG_ERROR,
                           "llapi_obd_statfs: error packing ioctl data");
                 return rc;
         }
                           "llapi_obd_statfs: error packing ioctl data");
                 return rc;
         }
@@ -1445,7 +1773,7 @@ int llapi_obd_statfs(char *path, __u32 type, __u32 index,
 
         if (fd < 0) {
                 rc = errno ? -errno : -EBADF;
 
         if (fd < 0) {
                 rc = errno ? -errno : -EBADF;
-                llapi_err(LLAPI_MSG_ERROR, "error: %s: opening '%s'", 
+                llapi_err(LLAPI_MSG_ERROR, "error: %s: opening '%s'",
                           __FUNCTION__, path);
                 return rc;
         }
                           __FUNCTION__, path);
                 return rc;
         }
@@ -1694,11 +2022,11 @@ static int cb_quotachown(char *path, DIR *parent, DIR *d, void *data,
         if (rc) {
                 if (errno == ENODATA) {
                         if (!param->obduuid && !param->quiet)
         if (rc) {
                 if (errno == ENODATA) {
                         if (!param->obduuid && !param->quiet)
-                                llapi_err(LLAPI_MSG_ERROR, 
+                                llapi_err(LLAPI_MSG_ERROR,
                                           "%s has no stripe info", path);
                         rc = 0;
                 } else if (errno == ENOENT) {
                                           "%s has no stripe info", path);
                         rc = 0;
                 } else if (errno == ENOENT) {
-                        llapi_err(LLAPI_MSG_ERROR, 
+                        llapi_err(LLAPI_MSG_ERROR,
                                   "warning: %s: %s does not exist",
                                   __FUNCTION__, path);
                         rc = 0;
                                   "warning: %s: %s does not exist",
                                   __FUNCTION__, path);
                         rc = 0;
index f173b56..d1fd956 100644 (file)
@@ -398,6 +398,26 @@ void print_lustre_cfg(struct lustre_cfg *lcfg, int *skip)
                        createtime, canceltime);
                 break;
         }
                        createtime, canceltime);
                 break;
         }
+        case(LCFG_POOL_NEW):{
+                printf("pool new ");
+                print_1_cfg(lcfg);
+                break;
+        }
+        case(LCFG_POOL_ADD):{
+                printf("pool add ");
+                print_1_cfg(lcfg);
+                break;
+        }
+        case(LCFG_POOL_REM):{
+                printf("pool remove ");
+                print_1_cfg(lcfg);
+                break;
+        }
+        case(LCFG_POOL_DEL):{
+                printf("pool destroy ");
+                print_1_cfg(lcfg);
+                break;
+        }
         default:
                 printf("unsupported cmd_code = %x\n",cmd);
         }
         default:
                 printf("unsupported cmd_code = %x\n",cmd);
         }
index 00ccd91..5a5bde3 100644 (file)
@@ -52,6 +52,7 @@
 #include <stdarg.h>
 #include <signal.h>
 #include <ctype.h>
 #include <stdarg.h>
 #include <signal.h>
 #include <ctype.h>
+#include <glob.h>
 
 #include "obdctl.h"
 
 
 #include "obdctl.h"
 
@@ -69,6 +70,7 @@
 #include <lnet/lnetctl.h>
 #include <libcfs/libcfsutil.h>
 #include <stdio.h>
 #include <lnet/lnetctl.h>
 #include <libcfs/libcfsutil.h>
 #include <stdio.h>
+#include <lustre/liblustreapi.h>
 
 #define MAX_STRING_SIZE 128
 #define DEVICES_LIST "/proc/fs/lustre/devices"
 
 #define MAX_STRING_SIZE 128
 #define DEVICES_LIST "/proc/fs/lustre/devices"
@@ -164,42 +166,51 @@ int lcfg_ioctl(char * func, int dev_id, struct lustre_cfg *lcfg)
 
 static int do_device(char *func, char *devname);
 
 
 static int do_device(char *func, char *devname);
 
-int lcfg_mgs_ioctl(char *func, int dev_id, struct lustre_cfg *lcfg)
+static int get_mgs_device()
 {
 {
-        struct obd_ioctl_data data;
-        static int mgs_device = -1;
         char mgs[] = "$MGS";
         char mgs[] = "$MGS";
-        int rc;
+        static int mgs_device = -1;
 
 
-        /* Always operates on MGS dev */
         if (mgs_device == -1) {
         if (mgs_device == -1) {
+                int rc;
                 do_disconnect(NULL, 1);
                 rc = do_device("mgsioc", mgs);
                 if (rc) {
                 do_disconnect(NULL, 1);
                 rc = do_device("mgsioc", mgs);
                 if (rc) {
+                        fprintf(stderr,
+                                "This command must be run on the MGS.\n");
                         errno = ENODEV;
                         return -1;
                 }
                 mgs_device = cur_device;
         }
                         errno = ENODEV;
                         return -1;
                 }
                 mgs_device = cur_device;
         }
+        return mgs_device;
+}
+
+/* Returns -1 on error with errno set */
+int lcfg_mgs_ioctl(char *func, int dev_id, struct lustre_cfg *lcfg)
+{
+        struct obd_ioctl_data data;
+        int rc;
 
         IOC_INIT(data);
 
         IOC_INIT(data);
-        data.ioc_dev = mgs_device;
+        rc = data.ioc_dev = get_mgs_device();
+        if (rc < 0)
+                goto out;
         data.ioc_type = LUSTRE_CFG_TYPE;
         data.ioc_plen1 = lustre_cfg_len(lcfg->lcfg_bufcount,
                                         lcfg->lcfg_buflens);
         data.ioc_pbuf1 = (void *)lcfg;
         IOC_PACK(func, data);
 
         data.ioc_type = LUSTRE_CFG_TYPE;
         data.ioc_plen1 = lustre_cfg_len(lcfg->lcfg_bufcount,
                                         lcfg->lcfg_buflens);
         data.ioc_pbuf1 = (void *)lcfg;
         IOC_PACK(func, data);
 
-        rc =  l_ioctl(dev_id, OBD_IOC_PARAM, buf);
-
-        if (rc == ENODEV)
-                fprintf(stderr, "Is the MGS running on this node?\n");
-        if (rc == ENOSYS)
-                fprintf(stderr, "Make sure cfg_device is set first.\n");
-        if (rc == EINVAL)
-                fprintf(stderr, "cfg_device should be of the form "
-                        "'lustre-MDT0000'\n");
-
+        rc = l_ioctl(dev_id, OBD_IOC_PARAM, buf);
+out:
+        if (rc) {
+                if (errno == ENOSYS)
+                        fprintf(stderr, "Make sure cfg_device is set first.\n");
+                if (errno == EINVAL)
+                        fprintf(stderr, "cfg_device should be of the form "
+                                "'lustre-MDT0000'\n");
+        }
         return rc;
 }
 
         return rc;
 }
 
@@ -2334,3 +2345,591 @@ void obd_finalize(int argc, char **argv)
         shmem_stop();
         do_disconnect(argv[0], 1);
 }
         shmem_stop();
         do_disconnect(argv[0], 1);
 }
+
+static int find_target_obdpath(char *fsname, char *path)
+{
+        glob_t glob_info;
+        char pattern[PATH_MAX + 1];
+        int rc;
+
+        snprintf(pattern, PATH_MAX,
+                 "/proc/fs/lustre/lov/%s-*/target_obd",
+                 fsname);
+        rc = glob(pattern, GLOB_BRACE, NULL, &glob_info);
+        if (rc)
+                return -EINVAL;
+
+        if (glob_info.gl_pathc == 0) {
+                globfree(&glob_info);
+                return -EINVAL;
+        }
+
+        strcpy(path, glob_info.gl_pathv[0]);
+        return 0;
+}
+
+static int find_poolpath(char *fsname, char *poolname, char *poolpath)
+{
+        glob_t glob_info;
+        char pattern[PATH_MAX + 1];
+        int rc;
+
+        snprintf(pattern, PATH_MAX,
+                 "/proc/fs/lustre/lov/%s-*/pools/%s",
+                 fsname, poolname);
+        rc = glob(pattern, GLOB_BRACE, NULL, &glob_info);
+        if (rc)
+                return -EINVAL;
+
+        if (glob_info.gl_pathc == 0) {
+                globfree(&glob_info);
+                return -EINVAL;
+        }
+
+        strcpy(poolpath, glob_info.gl_pathv[0]);
+        return 0;
+}
+
+/*
+ * if pool is NULL, search ostname in target_obd
+ * if pool is no NULL
+ *  if pool not found returns < 0
+ *  if ostname is NULL, returns 1 if pool is not empty and 0 if pool empty
+ *  if ostname is not NULL, returns 1 if OST is in pool and 0 if not
+ */
+static int search_ost(char *fsname, char *poolname, char *ostname)
+{
+        FILE *fd;
+        char buffer[PATH_MAX + 1];
+        int len = 0, rc;
+
+        if (ostname != NULL)
+                len = strlen(ostname);
+
+        if (poolname == NULL)
+                rc = find_target_obdpath(fsname, buffer);
+        else
+                rc = find_poolpath(fsname, poolname, buffer);
+        if (rc)
+                return rc;
+
+        if ((fd = fopen(buffer, "r")) == NULL)
+                return -EINVAL;
+
+        while (fgets(buffer, sizeof(buffer), fd) != NULL) {
+                if (poolname == NULL) {
+                        /* we search ostname in target_obd */
+                        if (strncmp(buffer + 3, ostname, len) == 0) {
+                                fclose(fd);
+                                return 1;
+                        }
+                } else {
+                        /* we search a non empty pool or
+                           an ostname in a pool */
+                        if ((ostname == NULL) ||
+                            (strncmp(buffer, ostname, len) == 0)) {
+                                fclose(fd);
+                                return 1;
+                        }
+                }
+        }
+        fclose(fd);
+        return 0;
+}
+
+static int check_pool_cmd(enum lcfg_command_type cmd,
+                          char *fsname, char *poolname,
+                          char *ostname)
+{
+        int rc = 0;
+
+        switch (cmd) {
+        case LCFG_POOL_NEW: {
+                if (search_ost(fsname, poolname, NULL) >= 0) {
+                        fprintf(stderr, "Pool %s.%s already exists\n",
+                                fsname, poolname);
+                        return -EEXIST;
+                }
+                return 0;
+        }
+        case LCFG_POOL_DEL: {
+                rc = search_ost(fsname, poolname, NULL);
+                if (rc < 0) {
+                        fprintf(stderr, "Pool %s.%s not found\n",
+                                fsname, poolname);
+                        return -ENOENT;
+                }
+                if (rc == 1) {
+                        fprintf(stderr, "Pool %s.%s not empty, "
+                                "please remove all members\n",
+                                fsname, poolname);
+                        return -ENOTEMPTY;
+                }
+                return 0;
+        }
+        case LCFG_POOL_ADD: {
+                rc = search_ost(fsname, NULL, ostname);
+                if (rc == 0) {
+                        fprintf(stderr, "OST %s not found in lov of %s\n",
+                                ostname, fsname);
+                        return -ENOENT;
+                }
+                rc = search_ost(fsname, poolname, ostname);
+                if (rc < 0) {
+                        fprintf(stderr, "Pool %s.%s not found\n",
+                                fsname, poolname);
+                        return -ENOENT;
+                }
+                if (rc == 1) {
+                        fprintf(stderr, "OST %s already in pool %s.%s\n",
+                                ostname, fsname, poolname);
+                        return -EEXIST;
+                }
+                return 0;
+        }
+        case LCFG_POOL_REM: {
+                rc = search_ost(fsname, poolname, ostname);
+                if (rc < 0) {
+                        fprintf(stderr, "Pool %s.%s not found\n",
+                                fsname, poolname);
+                        return -ENOENT;
+                }
+                if (rc == 0) {
+                        fprintf(stderr, "OST %s not found in pool %s.%s\n",
+                                ostname, fsname, poolname);
+                        return -ENOENT;
+                }
+                return 0;
+        }
+        default: {
+        }
+        }
+        return 0;
+}
+
+static void check_pool_cmd_result(enum lcfg_command_type cmd,
+                                  char *fsname, char *poolname,
+                                  char *ostname)
+{
+        int cpt, rc = 0;
+
+        cpt = 10;
+        switch (cmd) {
+        case LCFG_POOL_NEW: {
+                do {
+                        rc = search_ost(fsname, poolname, NULL);
+                        if (rc < 0)
+                                sleep(2);
+                        cpt--;
+                } while ((rc < 0) && (cpt > 0));
+                if (rc >= 0)
+                        fprintf(stderr, "Pool %s.%s created\n",
+                                fsname, poolname);
+                else
+                        fprintf(stderr, "Warning, pool %s.%s not found\n",
+                                fsname, poolname);
+                return;
+        }
+        case LCFG_POOL_DEL: {
+                do {
+                         rc = search_ost(fsname, poolname, NULL);
+                         if (rc >= 0)
+                                sleep(2);
+                         cpt--;
+                } while ((rc >= 0) && (cpt > 0));
+                if (rc < 0)
+                        fprintf(stderr, "Pool %s.%s destroyed\n",
+                                fsname, poolname);
+                else
+                        fprintf(stderr, "Warning, pool %s.%s still found\n",
+                                fsname, poolname);
+                return;
+        }
+        case LCFG_POOL_ADD: {
+                do {
+                        rc = search_ost(fsname, poolname, ostname);
+                        if (rc != 1)
+                                sleep(2);
+                        cpt--;
+                } while ((rc != 1) && (cpt > 0));
+                if (rc == 1)
+                        fprintf(stderr, "OST %s added to pool %s.%s\n",
+                                ostname, fsname, poolname);
+                else
+                        fprintf(stderr, "Warning, OST %s not found in pool %s.%s\n",
+                                ostname, fsname, poolname);
+                return;
+        }
+        case LCFG_POOL_REM: {
+                do {
+                        rc = search_ost(fsname, poolname, ostname);
+                        if (rc == 1)
+                                sleep(2);
+                        cpt--;
+                } while ((rc == 1) && (cpt > 0));
+                if (rc != 1)
+                        fprintf(stderr, "OST %s removed from pool %s.%s\n",
+                                ostname, fsname, poolname);
+                else
+                        fprintf(stderr, "Warning, OST %s still found in pool %s.%s\n",
+                                ostname, fsname, poolname);
+                return;
+        }
+        default: {
+        }
+        }
+}
+
+static int check_and_complete_ostname(char *fsname, char *ostname)
+{
+        char *ptr;
+        char real_ostname[MAX_OBD_NAME + 1];
+        char i;
+
+        /* if OST name does not start with fsname, we add it */
+        /* if not check if the fsname is the right one */
+        ptr = strchr(ostname, '-');
+        if (ptr == NULL) {
+                sprintf(real_ostname, "%s-%s", fsname, ostname);
+        } else if (strncmp(ostname, fsname, strlen(fsname)) != 0) {
+                fprintf(stderr, "%s does not start with fsname %s\n",
+                        ostname, fsname);
+                return -EINVAL;
+        } else {
+             strcpy(real_ostname, ostname);
+        }
+        /* real_ostname is fsname-????? */
+        ptr = real_ostname + strlen(fsname) + 1;
+        if (strncmp(ptr, "OST", 3) != 0) {
+                fprintf(stderr, "%s does not start by %s-OST nor OST\n",
+                        ostname, fsname);
+                return -EINVAL;
+        }
+        /* real_ostname is fsname-OST????? */
+        ptr += 3;
+        for (i = 0; i < 4; i++) {
+                if (!isxdigit(*ptr)) {
+                        fprintf(stderr,
+                                "ost's index in %s is not an hexa number\n",
+                                ostname);
+                        return -EINVAL;
+                }
+                ptr++;
+        }
+        /* real_ostname is fsname-OSTXXXX????? */
+        /* if OST name does not end with _UUID, we add it */
+        if (*ptr == '\0') {
+                strcat(real_ostname, "_UUID");
+        } else if (strcmp(ptr, "_UUID") != 0) {
+                fprintf(stderr,
+                        "ostname %s does not end with _UUID\n", ostname);
+                return -EINVAL;
+        }
+        /* real_ostname is fsname-OSTXXXX_UUID */
+        strcpy(ostname, real_ostname);
+        return 0;
+}
+
+/* returns 0 or -errno */
+static int pool_cmd(enum lcfg_command_type cmd,
+                    char *cmdname, char *fullpoolname,
+                    char *fsname, char *poolname, char *ostname)
+{
+        int rc = 0;
+        struct obd_ioctl_data data;
+        struct lustre_cfg_bufs bufs;
+        struct lustre_cfg *lcfg;
+
+        rc = check_pool_cmd(cmd, fsname, poolname, ostname);
+        if (rc)
+                return rc;
+
+        lustre_cfg_bufs_reset(&bufs, NULL);
+        lustre_cfg_bufs_set_string(&bufs, 0, cmdname);
+        lustre_cfg_bufs_set_string(&bufs, 1, fullpoolname);
+        if (ostname != NULL)
+                lustre_cfg_bufs_set_string(&bufs, 2, ostname);
+
+        lcfg = lustre_cfg_new(cmd, &bufs);
+        if (IS_ERR(lcfg)) {
+                rc = PTR_ERR(lcfg);
+                return rc;
+        }
+
+        IOC_INIT(data);
+        rc = data.ioc_dev = get_mgs_device();
+        if (rc < 0)
+                goto out;
+
+        data.ioc_type = LUSTRE_CFG_TYPE;
+        data.ioc_plen1 = lustre_cfg_len(lcfg->lcfg_bufcount,
+                                        lcfg->lcfg_buflens);
+        data.ioc_pbuf1 = (void *)lcfg;
+        IOC_PACK(cmdname, data);
+
+        rc = l_ioctl(OBD_DEV_ID, OBD_IOC_POOL, buf);
+out:
+        if (rc)
+                rc = -errno;
+        lustre_cfg_free(lcfg);
+        return rc;
+}
+
+/*
+ * this function tranforms a rule [start-end/step] into an array
+ * of matching numbers
+ * supported forms are:
+ * [start]                : just this number
+ * [start-end]            : all numbers from start to end
+ * [start-end/step]       : numbers from start to end with increment of step
+ * on return, format contains a printf format string which can be used
+ * to generate all the strings
+ */
+static int get_array_idx(char *rule, char *format, int **array)
+{
+        char *start, *end, *ptr;
+        unsigned int lo, hi, step;
+        int array_sz = 0;
+        int i, array_idx;
+        int rc;
+
+        start = strchr(rule, '[');
+        end = strchr(rule, ']');
+        if ((start == NULL) || (end == NULL)) {
+                *array = malloc(sizeof(int));
+                if (*array == NULL)
+                        return 0;
+                strcpy(format, rule);
+                array_sz = 1;
+                return array_sz;
+        }
+        *start = '\0';
+        *end = '\0';
+        end++;
+        start++;
+        /* put in format the printf format (the rule without the range) */
+        sprintf(format, "%s%%.4d%s", rule, end);
+
+        array_idx = 0;
+        array_sz = 0;
+        *array = NULL;
+        /* loop on , separator */
+        do {
+                /* extract the 3 fields */
+                rc = sscanf(start, "%u-%u/%u", &lo, &hi, &step);
+                switch (rc) {
+                case 0: {
+                        return 0;
+                }
+                case 1: {
+                        array_sz++;
+                        *array = realloc(*array, array_sz * sizeof(int));
+                        if (*array == NULL)
+                                return 0;
+                        (*array)[array_idx] = lo;
+                        array_idx++;
+                        break;
+                }
+                case 2: {
+                        step = 1;
+                        /* do not break to share code with case 3: */
+                }
+                case 3: {
+                        if ((hi < lo) || (step == 0))
+                                return 0;
+                        array_sz += (hi - lo) / step + 1;
+                        *array = realloc(*array, sizeof(int) * array_sz);
+                        if (*array == NULL)
+                                return 0;
+                        for (i = lo; i <= hi; i+=step, array_idx++)
+                                (*array)[array_idx] = i;
+                        break;
+                }
+                }
+                ptr = strchr(start, ',');
+                if (ptr != NULL)
+                        start = ptr + 1;
+
+        } while (ptr != NULL);
+        return array_sz;
+}
+
+static int extract_fsname_poolname(char *arg, char *fsname, char *poolname)
+{
+        char *ptr;
+        int len;
+        int rc;
+
+        strcpy(fsname, arg);
+        ptr = strchr(fsname, '.');
+        if (ptr == NULL) {
+                fprintf(stderr, ". is missing in %s\n", fsname);
+                rc = -EINVAL;
+                goto err;
+        }
+
+        len = ptr - fsname;
+        if (len == 0) {
+                fprintf(stderr, "fsname is empty\n");
+                rc = -EINVAL;
+                goto err;
+        }
+
+        len = strlen(ptr + 1);
+        if (len == 0) {
+                fprintf(stderr, "poolname is empty\n");
+                rc = -EINVAL;
+                goto err;
+        }
+        if (len > MAXPOOLNAME) {
+                fprintf(stderr,
+                        "poolname %s is too long (length is %d max is %d)\n",
+                        ptr + 1, len, MAXPOOLNAME);
+                rc = -ENAMETOOLONG;
+                goto err;
+        }
+        strncpy(poolname, ptr + 1, MAXPOOLNAME);
+        poolname[MAXPOOLNAME] = '\0';
+        *ptr = '\0';
+        return 0;
+
+err:
+        fprintf(stderr, "argument %s must be <fsname>.<poolname>\n", arg);
+        return rc;
+}
+
+int jt_pool_cmd(int argc, char **argv)
+{
+        enum lcfg_command_type cmd;
+        char fsname[PATH_MAX + 1];
+        char poolname[MAXPOOLNAME + 1];
+        char *ostnames_buf = NULL;
+        int i, rc;
+        int *array = NULL, array_sz;
+        struct {
+                int     rc;
+                char   *ostname;
+        } *cmds = NULL;
+
+        switch (argc) {
+        case 0:
+        case 1: return CMD_HELP;
+        case 2: {
+                if (strcmp("pool_new", argv[0]) == 0)
+                        cmd = LCFG_POOL_NEW;
+                else if (strcmp("pool_destroy", argv[0]) == 0)
+                        cmd = LCFG_POOL_DEL;
+                else if (strcmp("pool_list", argv[0]) == 0)
+                         return llapi_poollist(argv[1]);
+                else return CMD_HELP;
+
+                rc = extract_fsname_poolname(argv[1], fsname, poolname);
+                if (rc)
+                        break;
+
+                rc = pool_cmd(cmd, argv[0], argv[1],
+                              fsname, poolname, NULL);
+                if (rc)
+                        break;
+
+                check_pool_cmd_result(cmd, fsname, poolname, NULL);
+                break;
+        }
+        default: {
+                char format[2*MAX_OBD_NAME];
+
+                if (strcmp("pool_remove", argv[0]) == 0) {
+                        cmd = LCFG_POOL_REM;
+                } else if (strcmp("pool_add", argv[0]) == 0) {
+                        cmd = LCFG_POOL_ADD;
+                } else {
+                        return CMD_HELP;
+                }
+
+                rc = extract_fsname_poolname(argv[1], fsname, poolname);
+                if (rc)
+                        break;
+
+                for (i = 2; i < argc; i++) {
+                        int j;
+
+                        array_sz = get_array_idx(argv[i], format, &array);
+                        if (array_sz == 0)
+                                return CMD_HELP;
+
+                        cmds = malloc(array_sz * sizeof(cmds[0]));
+                        if (cmds != NULL) {
+                                ostnames_buf = malloc(array_sz *
+                                                      (MAX_OBD_NAME + 1));
+                        } else {
+                                free(array);
+                                rc = -ENOMEM;
+                                goto out;
+                        }
+
+                        for (j = 0; j < array_sz; j++) {
+                                char ostname[MAX_OBD_NAME + 1];
+
+                                snprintf(ostname, MAX_OBD_NAME, format,
+                                         array[j]);
+                                ostname[MAX_OBD_NAME] = '\0';
+
+                                rc = check_and_complete_ostname(fsname,ostname);
+                                if (rc) {
+                                        free(array);
+                                        free(cmds);
+                                        if (ostnames_buf)
+                                                free(ostnames_buf);
+                                        goto out;
+                                }
+                                if (ostnames_buf != NULL) {
+                                        cmds[j].ostname =
+                                          &ostnames_buf[(MAX_OBD_NAME + 1) * j];
+                                        strcpy(cmds[j].ostname, ostname);
+                                } else {
+                                        cmds[j].ostname = NULL;
+                                }
+                                cmds[j].rc = pool_cmd(cmd, argv[0], argv[1],
+                                                      fsname, poolname,
+                                                      ostname);
+                        }
+                        for (j = 0; j < array_sz; j++) {
+                                if (!cmds[j].rc) {
+                                        char ostname[MAX_OBD_NAME + 1];
+
+                                        if (!cmds[j].ostname) {
+                                                snprintf(ostname, MAX_OBD_NAME,
+                                                         format, array[j]);
+                                                ostname[MAX_OBD_NAME] = '\0';
+                                                check_and_complete_ostname(
+                                                        fsname, ostname);
+                                        } else {
+                                                strcpy(ostname,
+                                                       cmds[j].ostname);
+                                        }
+                                        check_pool_cmd_result(cmd, fsname,
+                                                              poolname,ostname);
+                                }
+                        }
+                        if (array_sz > 0)
+                                free(array);
+                        if (cmds)
+                                free(cmds);
+                        if (ostnames_buf);
+                                free(ostnames_buf);
+                }
+                return 0;
+        }
+        }
+
+
+out:
+        if ((rc == -EINVAL) || (rc == -ENOENT))
+                fprintf(stderr, "Does the fs, pool or ost exist?\n");
+        if (rc != 0) {
+                errno = -rc;
+                perror(argv[0]);
+        }
+
+        return rc;
+}
index a5fd90a..eeb1bb8 100644 (file)
@@ -119,4 +119,6 @@ int jt_blockdev_attach(int argc, char **argv);
 int jt_blockdev_detach(int argc, char **argv);
 int jt_blockdev_info(int argc, char **argv);
 
 int jt_blockdev_detach(int argc, char **argv);
 int jt_blockdev_info(int argc, char **argv);
 
+int jt_pool_cmd(int argc, char **argv);
+
 #endif
 #endif
index ef6d35b..dc366bd 100644 (file)
@@ -60,7 +60,7 @@
 #define lustre_swab_ldlm_request NULL
 #define lustre_swab_ldlm_reply NULL
 #define lustre_swab_ldlm_intent NULL
 #define lustre_swab_ldlm_request NULL
 #define lustre_swab_ldlm_reply NULL
 #define lustre_swab_ldlm_intent NULL
-#define lustre_swab_lov_mds_md NULL
+/* #define lustre_swab_lov_mds_md NULL */
 #define lustre_swab_mdt_rec_reint NULL
 #define lustre_swab_lustre_capa NULL
 #define lustre_swab_lustre_capa_key NULL
 #define lustre_swab_mdt_rec_reint NULL
 #define lustre_swab_lustre_capa NULL
 #define lustre_swab_lustre_capa_key NULL
index 05cdec0..e75cf6c 100644 (file)
@@ -344,6 +344,33 @@ check_lov_mds_md_join(void)
 }
 
 static void
 }
 
 static void
+check_lov_mds_md_v3(void)
+{
+        BLANK_LINE();
+        CHECK_STRUCT(lov_mds_md_v3);
+        CHECK_MEMBER(lov_mds_md_v3, lmm_magic);
+        CHECK_MEMBER(lov_mds_md_v3, lmm_pattern);
+        CHECK_MEMBER(lov_mds_md_v3, lmm_object_id);
+        CHECK_MEMBER(lov_mds_md_v3, lmm_object_gr);
+        CHECK_MEMBER(lov_mds_md_v3, lmm_stripe_size);
+        CHECK_MEMBER(lov_mds_md_v3, lmm_stripe_count);
+        CHECK_MEMBER(lov_mds_md_v3, lmm_pool_name);
+        CHECK_MEMBER(lov_mds_md_v3, lmm_objects);
+
+        BLANK_LINE();
+        CHECK_STRUCT(lov_ost_data_v1);
+        CHECK_MEMBER(lov_ost_data_v1, l_object_id);
+        CHECK_MEMBER(lov_ost_data_v1, l_object_gr);
+        CHECK_MEMBER(lov_ost_data_v1, l_ost_gen);
+        CHECK_MEMBER(lov_ost_data_v1, l_ost_idx);
+
+        CHECK_CDEFINE(LOV_MAGIC_V3);
+
+        CHECK_VALUE(LOV_PATTERN_RAID0);
+        CHECK_VALUE(LOV_PATTERN_RAID1);
+}
+
+static void
 check_obd_statfs(void)
 {
         BLANK_LINE();
 check_obd_statfs(void)
 {
         BLANK_LINE();
@@ -1307,6 +1334,7 @@ main(int argc, char **argv)
         check_obd_connect_data();
         check_obdo();
         check_lov_mds_md_v1();
         check_obd_connect_data();
         check_obdo();
         check_lov_mds_md_v1();
+        check_lov_mds_md_v3();
         check_lov_mds_md_join();
         check_obd_statfs();
         check_obd_ioobj();
         check_lov_mds_md_join();
         check_obd_statfs();
         check_obd_ioobj();