Whamcloud - gitweb
LU-9771 flr: lfs mirror create and extend commands
[fs/lustre-release.git] / lustre / utils / lfs.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2016, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  *
32  * lustre/utils/lfs.c
33  *
34  * Author: Peter J. Braam <braam@clusterfs.com>
35  * Author: Phil Schwan <phil@clusterfs.com>
36  * Author: Robert Read <rread@clusterfs.com>
37  */
38
39 /* for O_DIRECTORY */
40 #ifndef _GNU_SOURCE
41 #define _GNU_SOURCE
42 #endif
43
44 #include <stdlib.h>
45 #include <stdio.h>
46 #include <getopt.h>
47 #include <string.h>
48 #include <mntent.h>
49 #include <unistd.h>
50 #include <errno.h>
51 #include <err.h>
52 #include <pwd.h>
53 #include <grp.h>
54 #include <sys/ioctl.h>
55 #include <sys/quota.h>
56 #include <sys/time.h>
57 #include <sys/types.h>
58 #include <sys/stat.h>
59 #include <fcntl.h>
60 #include <dirent.h>
61 #include <time.h>
62 #include <ctype.h>
63
64 #include <libcfs/util/string.h>
65 #include <libcfs/util/ioctl.h>
66 #include <libcfs/util/parser.h>
67 #include <lustre/lustreapi.h>
68 #include <linux/lustre/lustre_ver.h>
69 #include <linux/lustre/lustre_param.h>
70
71 #ifndef ARRAY_SIZE
72 # define ARRAY_SIZE(a) ((sizeof(a)) / (sizeof((a)[0])))
73 #endif /* !ARRAY_SIZE */
74
75 /* all functions */
76 static int lfs_find(int argc, char **argv);
77 static int lfs_getstripe(int argc, char **argv);
78 static int lfs_getdirstripe(int argc, char **argv);
79 static int lfs_setdirstripe(int argc, char **argv);
80 static int lfs_rmentry(int argc, char **argv);
81 static int lfs_osts(int argc, char **argv);
82 static int lfs_mdts(int argc, char **argv);
83 static int lfs_df(int argc, char **argv);
84 static int lfs_getname(int argc, char **argv);
85 static int lfs_check(int argc, char **argv);
86 #ifdef HAVE_SYS_QUOTA_H
87 static int lfs_setquota(int argc, char **argv);
88 static int lfs_quota(int argc, char **argv);
89 #endif
90 static int lfs_flushctx(int argc, char **argv);
91 static int lfs_cp(int argc, char **argv);
92 static int lfs_ls(int argc, char **argv);
93 static int lfs_poollist(int argc, char **argv);
94 static int lfs_changelog(int argc, char **argv);
95 static int lfs_changelog_clear(int argc, char **argv);
96 static int lfs_fid2path(int argc, char **argv);
97 static int lfs_path2fid(int argc, char **argv);
98 static int lfs_data_version(int argc, char **argv);
99 static int lfs_hsm_state(int argc, char **argv);
100 static int lfs_hsm_set(int argc, char **argv);
101 static int lfs_hsm_clear(int argc, char **argv);
102 static int lfs_hsm_action(int argc, char **argv);
103 static int lfs_hsm_archive(int argc, char **argv);
104 static int lfs_hsm_restore(int argc, char **argv);
105 static int lfs_hsm_release(int argc, char **argv);
106 static int lfs_hsm_remove(int argc, char **argv);
107 static int lfs_hsm_cancel(int argc, char **argv);
108 static int lfs_swap_layouts(int argc, char **argv);
109 static int lfs_mv(int argc, char **argv);
110 static int lfs_ladvise(int argc, char **argv);
111 static int lfs_mirror(int argc, char **argv);
112 static int lfs_mirror_list_commands(int argc, char **argv);
113 static int lfs_list_commands(int argc, char **argv);
114
115 enum setstripe_origin {
116         SO_SETSTRIPE,
117         SO_MIGRATE,
118         SO_MIRROR_CREATE,
119         SO_MIRROR_EXTEND
120 };
121 static int lfs_setstripe0(int argc, char **argv, enum setstripe_origin opc);
122
123 static inline int lfs_setstripe(int argc, char **argv)
124 {
125         return lfs_setstripe0(argc, argv, SO_SETSTRIPE);
126 }
127 static inline int lfs_setstripe_migrate(int argc, char **argv)
128 {
129         return lfs_setstripe0(argc, argv, SO_MIGRATE);
130 }
131 static inline int lfs_mirror_create(int argc, char **argv)
132 {
133         return lfs_setstripe0(argc, argv, SO_MIRROR_CREATE);
134 }
135 static inline int lfs_mirror_extend(int argc, char **argv)
136 {
137         return lfs_setstripe0(argc, argv, SO_MIRROR_EXTEND);
138 }
139
140 /* Setstripe and migrate share mostly the same parameters */
141 #define SSM_CMD_COMMON(cmd) \
142         "usage: "cmd" [--component-end|-E <comp_end>]\n"                \
143         "                 [--stripe-count|-c <stripe_count>]\n"         \
144         "                 [--stripe-index|-i <start_ost_idx>]\n"        \
145         "                 [--stripe-size|-S <stripe_size>]\n"           \
146         "                 [--layout|-L <pattern>]\n"            \
147         "                 [--pool|-p <pool_name>]\n"                    \
148         "                 [--ost|-o <ost_indices>]\n"
149
150 #define SSM_HELP_COMMON \
151         "\tstripe_count: Number of OSTs to stripe over (0=fs default, -1 all)\n" \
152         "\tstart_ost_idx: OST index of first stripe (-1=default round robin)\n"\
153         "\tstripe_size:  Number of bytes on each OST (0=fs default)\n" \
154         "\t              Can be specified with K, M or G (for KB, MB, GB\n" \
155         "\t              respectively)\n"                               \
156         "\tpool_name:    Name of OST pool to use (default none)\n"      \
157         "\tlayout:       stripe pattern type: raid0, mdt (default raid0)\n"\
158         "\tost_indices:  List of OST indices, can be repeated multiple times\n"\
159         "\t              Indices be specified in a format of:\n"        \
160         "\t                -o <ost_1>,<ost_i>-<ost_j>,<ost_n>\n"        \
161         "\t              Or:\n"                                         \
162         "\t                -o <ost_1> -o <ost_i>-<ost_j> -o <ost_n>\n"  \
163         "\t              If --pool is set with --ost, then the OSTs\n"  \
164         "\t              must be the members of the pool.\n"            \
165         "\tcomp_end:     Extent end of component, start after previous end.\n"\
166         "\t              Can be specified with K, M or G (for KB, MB, GB\n" \
167         "\t              respectively, -1 for EOF). Must be a multiple of\n"\
168         "\t              stripe_size.\n"
169
170 #define MIRROR_CREATE_HELP                                                     \
171         "\tmirror_count: Number of mirrors to be created with the upcoming\n"  \
172         "\t              setstripe layout options\n"                           \
173         "\t              It defaults to 1 if not specified; if specified,\n"   \
174         "\t              it must follow the option without a space.\n"         \
175         "\t              The option can also be repeated multiple times to\n"  \
176         "\t              separate mirrors that have different layouts.\n"      \
177         "\tsetstripe options: Mirror layout\n"                                 \
178         "\t              It can be a plain layout or a composite layout.\n"    \
179         "\t              If not specified, the stripe options inherited\n"     \
180         "\t              from the previous component will be used.\n"          \
181         "\tparent:       Use default stripe options from parent directory\n"
182
183 #define MIRROR_EXTEND_HELP                                                     \
184         MIRROR_CREATE_HELP                                                     \
185         "\tvictim_file:  The layout of victim_file will be split and used\n"   \
186         "\t              as a mirror added to the mirrored file.\n"            \
187         "\tno-verify:    This option indicates not to verify the mirror(s)\n"  \
188         "\t              from victim file(s) in case the victim file(s)\n"     \
189         "\t              contains the same data as the original mirrored\n"    \
190         "\t              file.\n"
191
192 #define MIRROR_EXTEND_USAGE                                                    \
193         "                 <--mirror-count|-N[mirror_count]>\n"                 \
194         "                 [setstripe options|--parent|-f <victim_file>]\n"     \
195         "                 [--no-verify]\n"
196
197 #define SETSTRIPE_USAGE                                                 \
198         SSM_CMD_COMMON("setstripe")                                     \
199         MIRROR_EXTEND_USAGE                                             \
200         "                 <directory|filename>\n"                       \
201         SSM_HELP_COMMON                                                 \
202         MIRROR_EXTEND_HELP
203
204 #define MIGRATE_USAGE                                                   \
205         SSM_CMD_COMMON("migrate  ")                                     \
206         "                 [--block|-b]\n"                               \
207         "                 [--non-block|-n]\n"                           \
208         "                 <filename>\n"                                 \
209         SSM_HELP_COMMON                                                 \
210         "\n"                                                            \
211         "\tblock:        Block file access during data migration (default)\n" \
212         "\tnon-block:    Abort migrations if concurrent access is detected\n" \
213
214 #define SETDIRSTRIPE_USAGE                                      \
215         "               [--mdt-count|-c stripe_count>\n"        \
216         "               [--mdt-index|-i mdt_index]\n"           \
217         "               [--mdt-hash|-H mdt_hash]\n"             \
218         "               [--default|-D] [--mode|-m mode] <dir>\n"        \
219         "\tstripe_count: stripe count of the striped directory\n"       \
220         "\tmdt_index: MDT index of first stripe\n"                      \
221         "\tmdt_hash:  hash type of the striped directory. mdt types:\n" \
222         "       fnv_1a_64 FNV-1a hash algorithm (default)\n"            \
223         "       all_char  sum of characters % MDT_COUNT (not recommended)\n" \
224         "\tdefault_stripe: set default dirstripe of the directory\n"    \
225         "\tmode: the mode of the directory\n"
226
227 static const char       *progname;
228
229 /**
230  * command_t mirror_cmdlist - lfs mirror commands.
231  */
232 command_t mirror_cmdlist[] = {
233         { .pc_name = "create", .pc_func = lfs_mirror_create,
234           .pc_help = "Create a mirrored file.\n"
235           "usage: lfs mirror create "
236           "<--mirror-count|-N[mirror_count]> "
237           "[setstripe options|--parent] ... <filename|directory>\n"
238           MIRROR_CREATE_HELP },
239         { .pc_name = "extend", .pc_func = lfs_mirror_extend,
240           .pc_help = "Extend a mirrored file.\n"
241           "usage: lfs mirror extend "
242           "<--mirror-count|-N[mirror_count]> [--no-verify] "
243           "[setstripe options|--parent|-f <victim_file>] ... <filename>\n"
244           MIRROR_EXTEND_HELP },
245         { .pc_name = "--list-commands", .pc_func = lfs_mirror_list_commands,
246           .pc_help = "list commands supported by lfs mirror"},
247         { .pc_name = "help", .pc_func = Parser_help, .pc_help = "help" },
248         { .pc_name = "exit", .pc_func = Parser_quit, .pc_help = "quit" },
249         { .pc_name = "quit", .pc_func = Parser_quit, .pc_help = "quit" },
250         { .pc_help = NULL }
251 };
252
253 /* all available commands */
254 command_t cmdlist[] = {
255         {"setstripe", lfs_setstripe, 0,
256          "To create a file with specified striping/composite layout, or\n"
257          "create/replace the default layout on an existing directory:\n"
258          SSM_CMD_COMMON("setstripe")
259          "                 <directory|filename>\n"
260          " or\n"
261          "To add component(s) to an existing composite file:\n"
262          SSM_CMD_COMMON("setstripe --component-add")
263          SSM_HELP_COMMON
264          "To totally delete the default striping from an existing directory:\n"
265          "usage: setstripe -d <directory>\n"
266          " or\n"
267          "To delete the last component(s) from an existing composite file\n"
268          "(note that this will also delete any data in those components):\n"
269          "usage: setstripe --component-del [--component-id|-I <comp_id>]\n"
270          "                               [--component-flags|-F <comp_flags>]\n"
271          "                               <filename>\n"
272          "\tcomp_id:     Unique component ID to delete\n"
273          "\tcomp_flags:  'init' indicating all instantiated components\n"
274          "\t             '^init' indicating all uninstantiated components\n"
275          "\t-I and -F cannot be specified at the same time\n"},
276         {"getstripe", lfs_getstripe, 0,
277          "To list the striping info for a given file or files in a\n"
278          "directory or recursively for all files in a directory tree.\n"
279          "usage: getstripe [--ost|-O <uuid>] [--quiet|-q] [--verbose|-v]\n"
280          "                 [--stripe-count|-c] [--stripe-index|-i]\n"
281          "                 [--pool|-p] [--stripe-size|-S] [--directory|-d]\n"
282          "                 [--mdt|-m] [--recursive|-r] [--raw|-R] [--yaml|-y]\n"
283          "                 [--layout|-L] [--fid|-F] [--generation|-g]\n"
284          "                 [--component-id[=comp_id]|-I[comp_id]]\n"
285          "                 [--component-flags[=comp_flags]]\n"
286          "                 [--component-count]\n"
287          "                 [--component-start[=[+-]comp_start]]\n"
288          "                 [--component-end[=[+-]comp_end]|-E[[+-]comp_end]]\n"
289          "                 <directory|filename> ..."},
290         {"setdirstripe", lfs_setdirstripe, 0,
291          "To create a striped directory on a specified MDT. This can only\n"
292          "be done on MDT0 with the right of administrator.\n"
293          "usage: setdirstripe [OPTION] <directory>\n"
294          SETDIRSTRIPE_USAGE},
295         {"getdirstripe", lfs_getdirstripe, 0,
296          "To list the striping info for a given directory\n"
297          "or recursively for all directories in a directory tree.\n"
298          "usage: getdirstripe [--obd|-O <uuid>] [--mdt-count|-c]\n"
299          "                    [--mdt-index|-i] [--mdt-hash|-t]\n"
300          "                    [--recursive|-r] [--yaml|-y]\n"
301          "                    [--default|-D] <dir> ..."},
302         {"mkdir", lfs_setdirstripe, 0,
303          "To create a striped directory on a specified MDT. This can only\n"
304          "be done on MDT0 with the right of administrator.\n"
305          "usage: mkdir [OPTION] <directory>\n"
306          SETDIRSTRIPE_USAGE},
307         {"rm_entry", lfs_rmentry, 0,
308          "To remove the name entry of the remote directory. Note: This\n"
309          "command will only delete the name entry, i.e. the remote directory\n"
310          "will become inaccessable after this command. This can only be done\n"
311          "by the administrator\n"
312          "usage: rm_entry <dir>\n"},
313         {"pool_list", lfs_poollist, 0,
314          "List pools or pool OSTs\n"
315          "usage: pool_list <fsname>[.<pool>] | <pathname>\n"},
316         {"find", lfs_find, 0,
317          "find files matching given attributes recursively in directory tree.\n"
318          "usage: find <directory|filename> ...\n"
319          "     [[!] --atime|-A [+-]N] [[!] --ctime|-C [+-]N]\n"
320          "     [[!] --mtime|-M [+-]N] [[!] --mdt|-m <uuid|index,...>]\n"
321          "     [--maxdepth|-D N] [[!] --name|-n <pattern>]\n"
322          "     [[!] --ost|-O <uuid|index,...>] [--print|-p] [--print0|-P]\n"
323          "     [[!] --size|-s [+-]N[bkMGTPE]]\n"
324          "     [[!] --stripe-count|-c [+-]<stripes>]\n"
325          "     [[!] --stripe-index|-i <index,...>]\n"
326          "     [[!] --stripe-size|-S [+-]N[kMGT]] [[!] --type|-t <filetype>]\n"
327          "     [[!] --gid|-g|--group|-G <gid>|<gname>]\n"
328          "     [[!] --uid|-u|--user|-U <uid>|<uname>] [[!] --pool <pool>]\n"
329          "     [[!] --projid <projid>]\n"
330          "     [[!] --layout|-L released,raid0,mdt]\n"
331          "     [[!] --component-count [+-]<comp_cnt>]\n"
332          "     [[!] --component-start [+-]N[kMGTPE]]\n"
333          "     [[!] --component-end|-E [+-]N[kMGTPE]]\n"
334          "     [[!] --component-flags <comp_flags>]\n"
335          "     [[!] --mdt-count|-T [+-]<stripes>]\n"
336          "     [[!] --mdt-hash|-H <hashtype>\n"
337          "\t !: used before an option indicates 'NOT' requested attribute\n"
338          "\t -: used before a value indicates less than requested value\n"
339          "\t +: used before a value indicates more than requested value\n"
340          "\tmdt-hash:   hash type of the striped directory.\n"
341          "\t            fnv_1a_64 FNV-1a hash algorithm\n"
342          "\t            all_char  sum of characters % MDT_COUNT\n"},
343         {"check", lfs_check, 0,
344          "Display the status of MDS or OSTs (as specified in the command)\n"
345          "or all the servers (MDS and OSTs).\n"
346          "usage: check <osts|mds|servers>"},
347         {"osts", lfs_osts, 0, "list OSTs connected to client "
348          "[for specified path only]\n" "usage: osts [path]"},
349         {"mdts", lfs_mdts, 0, "list MDTs connected to client "
350          "[for specified path only]\n" "usage: mdts [path]"},
351         {"df", lfs_df, 0,
352          "report filesystem disk space usage or inodes usage"
353          "of each MDS and all OSDs or a batch belonging to a specific pool .\n"
354          "Usage: df [-i] [-h] [--lazy|-l] [--pool|-p <fsname>[.<pool>] [path]"},
355         {"getname", lfs_getname, 0, "list instances and specified mount points "
356          "[for specified path only]\n"
357          "Usage: getname [-h]|[path ...] "},
358 #ifdef HAVE_SYS_QUOTA_H
359         {"setquota", lfs_setquota, 0, "Set filesystem quotas.\n"
360          "usage: setquota <-u|-g|-p> <uname>|<uid>|<gname>|<gid>|<projid>\n"
361          "                -b <block-softlimit> -B <block-hardlimit>\n"
362          "                -i <inode-softlimit> -I <inode-hardlimit> <filesystem>\n"
363          "       setquota <-u|--user|-g|--group|-p|--projid> <uname>|<uid>|<gname>|<gid>|<projid>\n"
364          "                [--block-softlimit <block-softlimit>]\n"
365          "                [--block-hardlimit <block-hardlimit>]\n"
366          "                [--inode-softlimit <inode-softlimit>]\n"
367          "                [--inode-hardlimit <inode-hardlimit>] <filesystem>\n"
368          "       setquota [-t] <-u|--user|-g|--group|-p|--projid>\n"
369          "                [--block-grace <block-grace>]\n"
370          "                [--inode-grace <inode-grace>] <filesystem>\n"
371          "       -b can be used instead of --block-softlimit/--block-grace\n"
372          "       -B can be used instead of --block-hardlimit\n"
373          "       -i can be used instead of --inode-softlimit/--inode-grace\n"
374          "       -I can be used instead of --inode-hardlimit\n\n"
375          "Note: The total quota space will be split into many qunits and\n"
376          "      balanced over all server targets, the minimal qunit size is\n"
377          "      1M bytes for block space and 1K inodes for inode space.\n\n"
378          "      Quota space rebalancing process will stop when this mininum\n"
379          "      value is reached. As a result, quota exceeded can be returned\n"
380          "      while many targets still have 1MB or 1K inodes of spare\n"
381          "      quota space."},
382         {"quota", lfs_quota, 0, "Display disk usage and limits.\n"
383          "usage: quota [-q] [-v] [-h] [-o <obd_uuid>|-i <mdt_idx>|-I "
384                        "<ost_idx>]\n"
385          "             [<-u|-g|-p> <uname>|<uid>|<gname>|<gid>|<projid>] <filesystem>\n"
386          "       quota [-o <obd_uuid>|-i <mdt_idx>|-I <ost_idx>] -t <-u|-g|-p> <filesystem>"},
387 #endif
388         {"flushctx", lfs_flushctx, 0, "Flush security context for current user.\n"
389          "usage: flushctx [-k] [mountpoint...]"},
390         {"cp", lfs_cp, 0,
391          "Remote user copy files and directories.\n"
392          "usage: cp [OPTION]... [-T] SOURCE DEST\n\tcp [OPTION]... SOURCE... DIRECTORY\n\tcp [OPTION]... -t DIRECTORY SOURCE..."},
393         {"ls", lfs_ls, 0,
394          "Remote user list directory contents.\n"
395          "usage: ls [OPTION]... [FILE]..."},
396         {"changelog", lfs_changelog, 0,
397          "Show the metadata changes on an MDT."
398          "\nusage: changelog <mdtname> [startrec [endrec]]"},
399         {"changelog_clear", lfs_changelog_clear, 0,
400          "Indicate that old changelog records up to <endrec> are no longer of "
401          "interest to consumer <id>, allowing the system to free up space.\n"
402          "An <endrec> of 0 means all records.\n"
403          "usage: changelog_clear <mdtname> <id> <endrec>"},
404         {"fid2path", lfs_fid2path, 0,
405          "Resolve the full path(s) for given FID(s). For a specific hardlink "
406          "specify link number <linkno>.\n"
407         /* "For a historical link name, specify changelog record <recno>.\n" */
408          "usage: fid2path [--link <linkno>] <fsname|rootpath> <fid> ..."
409                 /* [ --rec <recno> ] */ },
410         {"path2fid", lfs_path2fid, 0, "Display the fid(s) for a given path(s).\n"
411          "usage: path2fid [--parents] <path> ..."},
412         {"data_version", lfs_data_version, 0, "Display file data version for "
413          "a given path.\n" "usage: data_version -[n|r|w] <path>"},
414         {"hsm_state", lfs_hsm_state, 0, "Display the HSM information (states, "
415          "undergoing actions) for given files.\n usage: hsm_state <file> ..."},
416         {"hsm_set", lfs_hsm_set, 0, "Set HSM user flag on specified files.\n"
417          "usage: hsm_set [--norelease] [--noarchive] [--dirty] [--exists] "
418          "[--archived] [--lost] <file> ..."},
419         {"hsm_clear", lfs_hsm_clear, 0, "Clear HSM user flag on specified "
420          "files.\n"
421          "usage: hsm_clear [--norelease] [--noarchive] [--dirty] [--exists] "
422          "[--archived] [--lost] <file> ..."},
423         {"hsm_action", lfs_hsm_action, 0, "Display current HSM request for "
424          "given files.\n" "usage: hsm_action <file> ..."},
425         {"hsm_archive", lfs_hsm_archive, 0,
426          "Archive file to external storage.\n"
427          "usage: hsm_archive [--filelist FILELIST] [--data DATA] [--archive NUM] "
428          "<file> ..."},
429         {"hsm_restore", lfs_hsm_restore, 0,
430          "Restore file from external storage.\n"
431          "usage: hsm_restore [--filelist FILELIST] [--data DATA] <file> ..."},
432         {"hsm_release", lfs_hsm_release, 0,
433          "Release files from Lustre.\n"
434          "usage: hsm_release [--filelist FILELIST] [--data DATA] <file> ..."},
435         {"hsm_remove", lfs_hsm_remove, 0,
436          "Remove file copy from external storage.\n"
437          "usage: hsm_remove [--filelist FILELIST] [--data DATA]\n"
438          "                  [--mntpath MOUNTPATH] [--archive NUM] <file|FID> ...\n"
439          "\n"
440          "Note: To remove files from the archive that have been deleted on\n"
441          "Lustre, set mntpath and optionally archive. In that case, all the\n"
442          "positional arguments and entries in the file list must be FIDs."
443         },
444         {"hsm_cancel", lfs_hsm_cancel, 0,
445          "Cancel requests related to specified files.\n"
446          "usage: hsm_cancel [--filelist FILELIST] [--data DATA] <file> ..."},
447         {"swap_layouts", lfs_swap_layouts, 0, "Swap layouts between 2 files.\n"
448          "usage: swap_layouts <path1> <path2>"},
449         {"migrate", lfs_setstripe_migrate, 0,
450          "migrate a directory between MDTs.\n"
451          "usage: migrate --mdt-index <mdt_idx> [--verbose|-v] "
452          "<directory>\n"
453          "\tmdt_idx:      index of the destination MDT\n"
454          "\n"
455          "migrate file objects from one OST "
456          "layout\nto another (may be not safe with concurent writes).\n"
457          "usage: migrate  "
458          "[--stripe-count|-c] <stripe_count>\n"
459          "              [--stripe-index|-i] <start_ost_index>\n"
460          "              [--stripe-size|-S] <stripe_size>\n"
461          "              [--pool|-p] <pool_name>\n"
462          "              [--ost-list|-o] <ost_indices>\n"
463          "              [--block|-b]\n"
464          "              [--non-block|-n]\n"
465          "              <file|directory>\n"
466          "\tstripe_count:     number of OSTs to stripe a file over\n"
467          "\tstripe_ost_index: index of the first OST to stripe a file over\n"
468          "\tstripe_size:      number of bytes to store before moving to the next OST\n"
469          "\tpool_name:        name of the predefined pool of OSTs\n"
470          "\tost_indices:      OSTs to stripe over, in order\n"
471          "\tblock:            wait for the operation to return before continuing\n"
472          "\tnon-block:        do not wait for the operation to return.\n"},
473         {"mv", lfs_mv, 0,
474          "To move directories between MDTs. This command is deprecated, "
475          "use \"migrate\" instead.\n"
476          "usage: mv <directory|filename> [--mdt-index|-M] <mdt_index> "
477          "[--verbose|-v]\n"},
478         {"ladvise", lfs_ladvise, 0,
479          "Provide servers with advice about access patterns for a file.\n"
480          "usage: ladvise [--advice|-a ADVICE] [--start|-s START[kMGT]]\n"
481          "               [--background|-b] [--unset|-u]\n\n"
482          "               {[--end|-e END[kMGT]] | [--length|-l LENGTH[kMGT]]}\n"
483          "               {[--mode|-m [READ,WRITE]}\n"
484          "               <file> ...\n"},
485         {"mirror", lfs_mirror, mirror_cmdlist,
486          "lfs commands used to manage files with mirrored components:\n"
487          "lfs mirror create - create a mirrored file or directory\n"
488          "lfs mirror extend - add mirror(s) to an existing file\n"
489          "lfs mirror split  - split a mirror from an existing mirrored file\n"
490          "lfs mirror resync - resynchronize an out-of-sync mirrored file\n"
491          "lfs mirror verify - verify a mirrored file\n"},
492         {"help", Parser_help, 0, "help"},
493         {"exit", Parser_quit, 0, "quit"},
494         {"quit", Parser_quit, 0, "quit"},
495         {"--version", Parser_version, 0,
496          "output build version of the utility and exit"},
497         {"--list-commands", lfs_list_commands, 0,
498          "list commands supported by the utility and exit"},
499         { 0, 0, 0, NULL }
500 };
501
502
503 static int check_hashtype(const char *hashtype)
504 {
505         int i;
506
507         for (i = LMV_HASH_TYPE_ALL_CHARS; i < LMV_HASH_TYPE_MAX; i++)
508                 if (strcmp(hashtype, mdt_hash_name[i]) == 0)
509                         return i;
510
511         return 0;
512 }
513
514
515 static const char *error_loc = "syserror";
516
517 enum {
518         MIGRATION_NONBLOCK      = 1 << 0,
519         MIGRATION_MIRROR        = 1 << 1,
520 };
521
522 static int lfs_component_create(char *fname, int open_flags, mode_t open_mode,
523                                 struct llapi_layout *layout);
524
525 static int
526 migrate_open_files(const char *name, const struct llapi_stripe_param *param,
527                    struct llapi_layout *layout, int *fd_src, int *fd_tgt)
528 {
529         int                      fd = -1;
530         int                      fdv = -1;
531         int                      mdt_index;
532         int                      random_value;
533         char                     parent[PATH_MAX];
534         char                     volatile_file[PATH_MAX];
535         char                    *ptr;
536         int                      rc;
537         struct stat              st;
538         struct stat              stv;
539
540         if (param == NULL && layout == NULL) {
541                 error_loc = "layout information";
542                 return -EINVAL;
543         }
544
545         /* search for file directory pathname */
546         if (strlen(name) > sizeof(parent) - 1) {
547                 error_loc = "source file name";
548                 return -ERANGE;
549         }
550
551         strncpy(parent, name, sizeof(parent));
552         ptr = strrchr(parent, '/');
553         if (ptr == NULL) {
554                 if (getcwd(parent, sizeof(parent)) == NULL) {
555                         error_loc = "getcwd";
556                         return -errno;
557                 }
558         } else {
559                 if (ptr == parent) /* leading '/' */
560                         ptr = parent + 1;
561                 *ptr = '\0';
562         }
563
564         /* open file, direct io */
565         /* even if the file is only read, WR mode is nedeed to allow
566          * layout swap on fd */
567         fd = open(name, O_RDWR | O_DIRECT);
568         if (fd < 0) {
569                 rc = -errno;
570                 error_loc = "cannot open source file";
571                 return rc;
572         }
573
574         rc = llapi_file_fget_mdtidx(fd, &mdt_index);
575         if (rc < 0) {
576                 error_loc = "cannot get MDT index";
577                 goto out;
578         }
579
580         do {
581                 int open_flags = O_WRONLY | O_CREAT | O_EXCL | O_NOFOLLOW;
582                 mode_t open_mode = S_IRUSR | S_IWUSR;
583
584                 random_value = random();
585                 rc = snprintf(volatile_file, sizeof(volatile_file),
586                               "%s/%s:%.4X:%.4X", parent, LUSTRE_VOLATILE_HDR,
587                               mdt_index, random_value);
588                 if (rc >= sizeof(volatile_file)) {
589                         rc = -ENAMETOOLONG;
590                         break;
591                 }
592
593                 /* create, open a volatile file, use caching (ie no directio) */
594                 if (param != NULL)
595                         fdv = llapi_file_open_param(volatile_file, open_flags,
596                                                     open_mode, param);
597                 else
598                         fdv = lfs_component_create(volatile_file, open_flags,
599                                                    open_mode, layout);
600         } while (fdv < 0 && (rc = fdv) == -EEXIST);
601
602         if (rc < 0) {
603                 error_loc = "cannot create volatile file";
604                 goto out;
605         }
606
607         /* In case the MDT does not support creation of volatile files
608          * we should try to unlink it. */
609         (void)unlink(volatile_file);
610
611         /* Not-owner (root?) special case.
612          * Need to set owner/group of volatile file like original.
613          * This will allow to pass related check during layout_swap.
614          */
615         rc = fstat(fd, &st);
616         if (rc != 0) {
617                 rc = -errno;
618                 error_loc = "cannot stat source file";
619                 goto out;
620         }
621
622         rc = fstat(fdv, &stv);
623         if (rc != 0) {
624                 rc = -errno;
625                 error_loc = "cannot stat volatile";
626                 goto out;
627         }
628
629         if (st.st_uid != stv.st_uid || st.st_gid != stv.st_gid) {
630                 rc = fchown(fdv, st.st_uid, st.st_gid);
631                 if (rc != 0) {
632                         rc = -errno;
633                         error_loc = "cannot change ownwership of volatile";
634                         goto out;
635                 }
636         }
637
638 out:
639         if (rc < 0) {
640                 if (fd > 0)
641                         close(fd);
642                 if (fdv > 0)
643                         close(fdv);
644         } else {
645                 *fd_src = fd;
646                 *fd_tgt = fdv;
647                 error_loc = NULL;
648         }
649         return rc;
650 }
651
652 static int migrate_copy_data(int fd_src, int fd_dst, int (*check_file)(int))
653 {
654         struct llapi_layout *layout;
655         size_t   buf_size = 4 * 1024 * 1024;
656         void    *buf = NULL;
657         ssize_t  rsize = -1;
658         ssize_t  wsize = 0;
659         size_t   rpos = 0;
660         size_t   wpos = 0;
661         off_t    bufoff = 0;
662         int      rc;
663
664         layout = llapi_layout_get_by_fd(fd_src, 0);
665         if (layout != NULL) {
666                 uint64_t stripe_size;
667
668                 rc = llapi_layout_stripe_size_get(layout, &stripe_size);
669                 if (rc == 0)
670                         buf_size = stripe_size;
671
672                 llapi_layout_free(layout);
673         }
674
675         /* Use a page-aligned buffer for direct I/O */
676         rc = posix_memalign(&buf, getpagesize(), buf_size);
677         if (rc != 0)
678                 return -rc;
679
680         while (1) {
681                 /* read new data only if we have written all
682                  * previously read data */
683                 if (wpos == rpos) {
684                         if (check_file) {
685                                 rc = check_file(fd_src);
686                                 if (rc < 0)
687                                         break;
688                         }
689
690                         rsize = read(fd_src, buf, buf_size);
691                         if (rsize < 0) {
692                                 rc = -errno;
693                                 break;
694                         }
695                         rpos += rsize;
696                         bufoff = 0;
697                 }
698                 /* eof ? */
699                 if (rsize == 0)
700                         break;
701
702                 wsize = write(fd_dst, buf + bufoff, rpos - wpos);
703                 if (wsize < 0) {
704                         rc = -errno;
705                         break;
706                 }
707                 wpos += wsize;
708                 bufoff += wsize;
709         }
710
711         if (rc == 0) {
712                 rc = fsync(fd_dst);
713                 if (rc < 0)
714                         rc = -errno;
715         }
716
717         free(buf);
718         return rc;
719 }
720
721 static int migrate_copy_timestamps(int fd, int fdv)
722 {
723         struct stat st;
724
725         if (fstat(fd, &st) == 0) {
726                 struct timeval tv[2] = {
727                         {.tv_sec = st.st_atime},
728                         {.tv_sec = st.st_mtime}
729                 };
730
731                 return futimes(fdv, tv);
732         }
733
734         return -errno;
735 }
736
737 static int migrate_block(int fd, int fdv)
738 {
739         __u64   dv1;
740         int     gid;
741         int     rc;
742         int     rc2;
743
744         rc = llapi_get_data_version(fd, &dv1, LL_DV_RD_FLUSH);
745         if (rc < 0) {
746                 error_loc = "cannot get dataversion";
747                 return rc;
748         }
749
750         do
751                 gid = random();
752         while (gid == 0);
753
754         /* The grouplock blocks all concurrent accesses to the file.
755          * It has to be taken after llapi_get_data_version as it would
756          * block it too. */
757         rc = llapi_group_lock(fd, gid);
758         if (rc < 0) {
759                 error_loc = "cannot get group lock";
760                 return rc;
761         }
762
763         rc = migrate_copy_data(fd, fdv, NULL);
764         if (rc < 0) {
765                 error_loc = "data copy failed";
766                 goto out_unlock;
767         }
768
769         /* Make sure we keep original atime/mtime values */
770         rc = migrate_copy_timestamps(fd, fdv);
771         if (rc < 0) {
772                 error_loc = "timestamp copy failed";
773                 goto out_unlock;
774         }
775
776         /* swap layouts
777          * for a migration we need to check data version on file did
778          * not change.
779          *
780          * Pass in gid=0 since we already own grouplock. */
781         rc = llapi_fswap_layouts_grouplock(fd, fdv, dv1, 0, 0,
782                                            SWAP_LAYOUTS_CHECK_DV1);
783         if (rc == -EAGAIN) {
784                 error_loc = "file changed";
785                 goto out_unlock;
786         } else if (rc < 0) {
787                 error_loc = "cannot swap layout";
788                 goto out_unlock;
789         }
790
791 out_unlock:
792         rc2 = llapi_group_unlock(fd, gid);
793         if (rc2 < 0 && rc == 0) {
794                 error_loc = "unlock group lock";
795                 rc = rc2;
796         }
797
798         return rc;
799 }
800
801 /**
802  * Internal helper for migrate_copy_data(). Check lease and report error if
803  * need be.
804  *
805  * \param[in]  fd           File descriptor on which to check the lease.
806  *
807  * \retval 0       Migration can keep on going.
808  * \retval -errno  Error occurred, abort migration.
809  */
810 static int check_lease(int fd)
811 {
812         int rc;
813
814         rc = llapi_lease_check(fd);
815         if (rc > 0)
816                 return 0; /* llapi_check_lease returns > 0 on success. */
817
818         return -EBUSY;
819 }
820
821 static int migrate_nonblock(int fd, int fdv)
822 {
823         __u64   dv1;
824         __u64   dv2;
825         int     rc;
826
827         rc = llapi_get_data_version(fd, &dv1, LL_DV_RD_FLUSH);
828         if (rc < 0) {
829                 error_loc = "cannot get data version";
830                 return rc;
831         }
832
833         rc = migrate_copy_data(fd, fdv, check_lease);
834         if (rc < 0) {
835                 error_loc = "data copy failed";
836                 return rc;
837         }
838
839         rc = llapi_get_data_version(fd, &dv2, LL_DV_RD_FLUSH);
840         if (rc != 0) {
841                 error_loc = "cannot get data version";
842                 return rc;
843         }
844
845         if (dv1 != dv2) {
846                 rc = -EAGAIN;
847                 error_loc = "source file changed";
848                 return rc;
849         }
850
851         /* Make sure we keep original atime/mtime values */
852         rc = migrate_copy_timestamps(fd, fdv);
853         if (rc < 0) {
854                 error_loc = "timestamp copy failed";
855                 return rc;
856         }
857
858         return 0;
859 }
860
861 static int lfs_component_set(char *fname, int comp_id, __u32 flags)
862 {
863         return -ENOTSUP;
864 }
865
866 static int lfs_component_del(char *fname, __u32 comp_id, __u32 flags)
867 {
868         int     rc = 0;
869
870         if (flags != 0 && comp_id != 0)
871                 return -EINVAL;
872
873         /* LCME_FL_INIT is the only supported flag in PFL */
874         if (flags != 0) {
875                 if (flags & ~LCME_KNOWN_FLAGS) {
876                         fprintf(stderr, "Invalid component flags %#x\n", flags);
877                         return -EINVAL;
878                 }
879         } else if (comp_id > LCME_ID_MAX) {
880                 fprintf(stderr, "Invalid component id %u\n", comp_id);
881                 return -EINVAL;
882         }
883
884         rc = llapi_layout_file_comp_del(fname, comp_id, flags);
885         if (rc)
886                 fprintf(stderr, "Delete component %#x from %s failed. %s\n",
887                         comp_id, fname, strerror(errno));
888         return rc;
889 }
890
891 static int lfs_component_add(char *fname, struct llapi_layout *layout)
892 {
893         int     rc;
894
895         if (layout == NULL)
896                 return -EINVAL;
897
898         rc = llapi_layout_file_comp_add(fname, layout);
899         if (rc)
900                 fprintf(stderr, "Add layout component(s) to %s failed. %s\n",
901                         fname, strerror(errno));
902         return rc;
903 }
904
905 static int lfs_component_create(char *fname, int open_flags, mode_t open_mode,
906                                 struct llapi_layout *layout)
907 {
908         struct stat     st;
909         int     fd;
910
911         if (layout == NULL)
912                 return -EINVAL;
913
914         fd = lstat(fname, &st);
915         if (fd == 0 && S_ISDIR(st.st_mode))
916                 open_flags = O_DIRECTORY | O_RDONLY;
917
918         fd = llapi_layout_file_open(fname, open_flags, open_mode, layout);
919         if (fd < 0)
920                 fprintf(stderr, "%s: cannot %s '%s': %s\n", progname,
921                         S_ISDIR(st.st_mode) ?
922                                 "set default composite layout for" :
923                                 "create composite file",
924                         fname, strerror(errno));
925         return fd;
926 }
927
928 static int lfs_migrate(char *name, __u64 migration_flags,
929                        struct llapi_stripe_param *param,
930                        struct llapi_layout *layout)
931 {
932         int fd = -1;
933         int fdv = -1;
934         int rc;
935
936         rc = migrate_open_files(name, param, layout, &fd, &fdv);
937         if (rc < 0)
938                 goto out;
939
940         if (!(migration_flags & MIGRATION_NONBLOCK)) {
941                 /* Blocking mode (forced if servers do not support file lease).
942                  * It is also the default mode, since we cannot distinguish
943                  * between a broken lease and a server that does not support
944                  * atomic swap/close (LU-6785) */
945                 rc = migrate_block(fd, fdv);
946                 goto out;
947         }
948
949         rc = llapi_lease_get(fd, LL_LEASE_RDLCK);
950         if (rc < 0) {
951                 error_loc = "cannot get lease";
952                 goto out;
953         }
954
955         rc = migrate_nonblock(fd, fdv);
956         if (rc < 0) {
957                 llapi_lease_put(fd);
958                 goto out;
959         }
960
961         /* Atomically put lease, swap layouts and close.
962          * for a migration we need to check data version on file did
963          * not change. */
964         rc = llapi_fswap_layouts(fd, fdv, 0, 0,
965                                  migration_flags & MIGRATION_MIRROR ?
966                                  MERGE_LAYOUTS_CLOSE : SWAP_LAYOUTS_CLOSE);
967         if (rc < 0) {
968                 error_loc = "cannot swap layout";
969                 goto out;
970         }
971
972 out:
973         if (fd >= 0)
974                 close(fd);
975
976         if (fdv >= 0)
977                 close(fdv);
978
979         if (rc < 0)
980                 fprintf(stderr, "error: %s: %s: %s: %s\n",
981                         progname, name, error_loc, strerror(-rc));
982         return rc;
983 }
984
985 /**
986  * struct mirror_args - Command-line arguments for mirror(s).
987  * @m_count:  Number of mirrors to be created with this layout.
988  * @m_layout: Mirror layout.
989  * @m_file:   A victim file. Its layout will be split and used as a mirror.
990  * @m_next:   Point to the next node of the list.
991  *
992  * Command-line arguments for mirror(s) will be parsed and stored in
993  * a linked list that consists of this structure.
994  */
995 struct mirror_args {
996         __u32                   m_count;
997         struct llapi_layout     *m_layout;
998         const char              *m_file;
999         struct mirror_args      *m_next;
1000 };
1001
1002 /**
1003  * enum mirror_flags - Flags for extending a mirrored file.
1004  * @NO_VERIFY: Indicates not to verify the mirror(s) from victim file(s)
1005  *             in case the victim file(s) contains the same data as the
1006  *             original mirrored file.
1007  *
1008  * Flags for extending a mirrored file.
1009  */
1010 enum mirror_flags {
1011         NO_VERIFY       = 0x1,
1012 };
1013
1014 /**
1015  * mirror_create_sanity_check() - Check mirror list.
1016  * @list:  A linked list that stores the mirror arguments.
1017  *
1018  * This function does a sanity check on @list for creating
1019  * a mirrored file.
1020  *
1021  * Return: 0 on success or a negative error code on failure.
1022  */
1023 static int mirror_create_sanity_check(struct mirror_args *list)
1024 {
1025         int rc = 0;
1026         bool has_m_file = false;
1027         bool has_m_layout = false;
1028
1029         if (list == NULL)
1030                 return -EINVAL;
1031
1032         while (list != NULL) {
1033                 uint64_t start, end;
1034
1035                 if (list->m_file != NULL) {
1036                         has_m_file = true;
1037                         llapi_layout_free(list->m_layout);
1038
1039                         list->m_layout =
1040                                 llapi_layout_get_by_path(list->m_file, 0);
1041                         if (list->m_layout == NULL) {
1042                                 fprintf(stderr,
1043                                         "error: %s: file '%s' has no layout\n",
1044                                         progname, list->m_file);
1045                                 return -ENODATA;
1046                         }
1047                 } else {
1048                         if (list->m_layout != NULL)
1049                                 has_m_layout = true;
1050                         else {
1051                                 fprintf(stderr, "error: %s: no mirror layout\n",
1052                                         progname);
1053                                 return -EINVAL;
1054                         }
1055                 }
1056
1057                 rc = llapi_layout_comp_use(list->m_layout,
1058                                            LLAPI_LAYOUT_COMP_USE_LAST);
1059                 if (rc)
1060                         return -errno;
1061
1062                 rc = llapi_layout_comp_extent_get(list->m_layout, &start, &end);
1063                 if (rc)
1064                         return -errno;
1065
1066                 if (end != LUSTRE_EOF) {
1067                         fprintf(stderr,
1068                                 "error: %s: mirror layout doesn't reach eof\n",
1069                                 progname);
1070                         return -EINVAL;
1071                 }
1072
1073                 list = list->m_next;
1074         }
1075
1076         if (has_m_file && has_m_layout) {
1077                 fprintf(stderr, "error: %s: -f <victim_file> option should not "
1078                         "be specified with setstripe options or "
1079                         "--parent option\n", progname);
1080                 return -EINVAL;
1081         }
1082
1083         return 0;
1084 }
1085
1086 /**
1087  * mirror_create() - Create a mirrored file.
1088  * @fname:        The file to be created.
1089  * @mirror_list:  A linked list that stores the mirror arguments.
1090  *
1091  * This function creates a mirrored file @fname with the mirror(s)
1092  * from @mirror_list.
1093  *
1094  * Return: 0 on success or a negative error code on failure.
1095  */
1096 static int mirror_create(char *fname, struct mirror_args *mirror_list)
1097 {
1098         struct llapi_layout *layout = NULL;
1099         struct mirror_args *cur_mirror = NULL;
1100         uint16_t mirror_count = 0;
1101         int i = 0;
1102         int rc = 0;
1103
1104         rc = mirror_create_sanity_check(mirror_list);
1105         if (rc)
1106                 return rc;
1107
1108         cur_mirror = mirror_list;
1109         while (cur_mirror != NULL) {
1110                 for (i = 0; i < cur_mirror->m_count; i++) {
1111                         rc = llapi_layout_merge(&layout, cur_mirror->m_layout);
1112                         if (rc) {
1113                                 rc = -errno;
1114                                 fprintf(stderr, "error: %s: "
1115                                         "merge layout failed: %s\n",
1116                                         progname, strerror(errno));
1117                                 goto error;
1118                         }
1119                 }
1120                 mirror_count += cur_mirror->m_count;
1121                 cur_mirror = cur_mirror->m_next;
1122         }
1123
1124         rc = llapi_layout_mirror_count_set(layout, mirror_count);
1125         if (rc) {
1126                 rc = -errno;
1127                 fprintf(stderr, "error: %s: set mirror count failed: %s\n",
1128                         progname, strerror(errno));
1129                 goto error;
1130         }
1131
1132         rc = lfs_component_create(fname, O_CREAT | O_WRONLY | O_EXCL, 0644,
1133                                   layout);
1134         if (rc >= 0) {
1135                 close(rc);
1136                 rc = 0;
1137         }
1138
1139 error:
1140         llapi_layout_free(layout);
1141         return rc;
1142 }
1143
1144 /**
1145  * Compare files and check lease on @fd.
1146  *
1147  * \retval bytes number of bytes are the same
1148  */
1149 static ssize_t mirror_file_compare(int fd, int fdv)
1150 {
1151         const size_t buflen = 4 * 1024 * 1024; /* 4M */
1152         void *buf;
1153         ssize_t bytes_done = 0;
1154         ssize_t bytes_read = 0;
1155
1156         buf = malloc(buflen * 2);
1157         if (!buf)
1158                 return -ENOMEM;
1159
1160         while (1) {
1161                 if (!llapi_lease_check(fd)) {
1162                         bytes_done = -EBUSY;
1163                         break;
1164                 }
1165
1166                 bytes_read = read(fd, buf, buflen);
1167                 if (bytes_read <= 0)
1168                         break;
1169
1170                 if (bytes_read != read(fdv, buf + buflen, buflen))
1171                         break;
1172
1173                 /* XXX: should compute the checksum on each buffer and then
1174                  * compare checksum to avoid cache collision */
1175                 if (memcmp(buf, buf + buflen, bytes_read))
1176                         break;
1177
1178                 bytes_done += bytes_read;
1179         }
1180
1181         free(buf);
1182
1183         return bytes_done;
1184 }
1185
1186 static int mirror_extend_file(const char *fname, const char *victim_file,
1187                               enum mirror_flags mirror_flags)
1188 {
1189         int fd = -1;
1190         int fdv = -1;
1191         struct stat stbuf;
1192         struct stat stbuf_v;
1193         __u64 dv;
1194         int rc;
1195
1196         fd = open(fname, O_RDWR);
1197         if (fd < 0) {
1198                 error_loc = "open source file";
1199                 rc = -errno;
1200                 goto out;
1201         }
1202
1203         fdv = open(victim_file, O_RDWR);
1204         if (fdv < 0) {
1205                 error_loc = "open target file";
1206                 rc = -errno;
1207                 goto out;
1208         }
1209
1210         if (fstat(fd, &stbuf) || fstat(fdv, &stbuf_v)) {
1211                 error_loc = "stat source or target file";
1212                 rc = -errno;
1213                 goto out;
1214         }
1215
1216         if (stbuf.st_dev != stbuf_v.st_dev) {
1217                 error_loc = "stat source and target file";
1218                 rc = -EXDEV;
1219                 goto out;
1220         }
1221
1222         /* mirrors should be of the same size */
1223         if (stbuf.st_size != stbuf_v.st_size) {
1224                 error_loc = "file sizes don't match";
1225                 rc = -EINVAL;
1226                 goto out;
1227         }
1228
1229         rc = llapi_lease_get(fd, LL_LEASE_RDLCK);
1230         if (rc < 0) {
1231                 error_loc = "cannot get lease";
1232                 goto out;
1233         }
1234
1235         if (!(mirror_flags & NO_VERIFY)) {
1236                 ssize_t ret;
1237                 /* mirrors should have the same contents */
1238                 ret = mirror_file_compare(fd, fdv);
1239                 if (ret != stbuf.st_size) {
1240                         error_loc = "file busy or contents don't match";
1241                         rc = ret < 0 ? ret : -EINVAL;
1242                         goto out;
1243                 }
1244         }
1245
1246         /* Get rid of caching pages from clients */
1247         rc = llapi_get_data_version(fd, &dv, LL_DV_WR_FLUSH);
1248         if (rc < 0) {
1249                 error_loc = "cannot get data version";
1250                 return rc;
1251         }
1252
1253         rc = llapi_get_data_version(fdv, &dv, LL_DV_WR_FLUSH);
1254         if (rc < 0) {
1255                 error_loc = "cannot get data version";
1256                 return rc;
1257
1258         }
1259
1260         /* Make sure we keep original atime/mtime values */
1261         rc = migrate_copy_timestamps(fd, fdv);
1262
1263         /* Atomically put lease, swap layouts and close.
1264          * for a migration we need to check data version on file did
1265          * not change. */
1266         rc = llapi_fswap_layouts(fd, fdv, 0, 0, MERGE_LAYOUTS_CLOSE);
1267         if (rc < 0) {
1268                 error_loc = "cannot swap layout";
1269                 goto out;
1270         }
1271
1272 out:
1273         if (fd >= 0)
1274                 close(fd);
1275
1276         if (fdv >= 0)
1277                 close(fdv);
1278
1279         if (!rc)
1280                 (void) unlink(victim_file);
1281
1282         if (rc < 0)
1283                 fprintf(stderr, "error: %s: %s: %s: %s\n",
1284                         progname, fname, error_loc, strerror(-rc));
1285         return rc;
1286 }
1287
1288 static int mirror_extend(char *fname, struct mirror_args *mirror_list,
1289                          enum mirror_flags mirror_flags)
1290 {
1291         int rc;
1292
1293         rc = mirror_create_sanity_check(mirror_list);
1294         if (rc)
1295                 return rc;
1296
1297         while (mirror_list) {
1298                 if (mirror_list->m_file != NULL) {
1299                         rc = mirror_extend_file(fname, mirror_list->m_file,
1300                                                 mirror_flags);
1301                 } else {
1302                         __u32 mirror_count = mirror_list->m_count;
1303
1304                         while (mirror_count > 0) {
1305                                 rc = lfs_migrate(fname,
1306                                         MIGRATION_NONBLOCK | MIGRATION_MIRROR,
1307                                         NULL, mirror_list->m_layout);
1308                                 if (rc)
1309                                         break;
1310
1311                                 --mirror_count;
1312                         }
1313                 }
1314                 if (rc)
1315                         break;
1316
1317                 mirror_list = mirror_list->m_next;
1318         }
1319
1320         return rc;
1321 }
1322
1323 /**
1324  * Parse a string containing an OST index list into an array of integers.
1325  *
1326  * The input string contains a comma delimited list of individual
1327  * indices and ranges, for example "1,2-4,7". Add the indices into the
1328  * \a osts array and remove duplicates.
1329  *
1330  * \param[out] osts    array to store indices in
1331  * \param[in] size     size of \a osts array
1332  * \param[in] offset   starting index in \a osts
1333  * \param[in] arg      string containing OST index list
1334  *
1335  * \retval positive    number of indices in \a osts
1336  * \retval -EINVAL     unable to parse \a arg
1337  */
1338 static int parse_targets(__u32 *osts, int size, int offset, char *arg)
1339 {
1340         int rc;
1341         int nr = offset;
1342         int slots = size - offset;
1343         char *ptr = NULL;
1344         bool end_of_loop;
1345
1346         if (arg == NULL)
1347                 return -EINVAL;
1348
1349         end_of_loop = false;
1350         while (!end_of_loop) {
1351                 int start_index;
1352                 int end_index;
1353                 int i;
1354                 char *endptr = NULL;
1355
1356                 rc = -EINVAL;
1357
1358                 ptr = strchrnul(arg, ',');
1359
1360                 end_of_loop = *ptr == '\0';
1361                 *ptr = '\0';
1362
1363                 start_index = strtol(arg, &endptr, 0);
1364                 if (endptr == arg) /* no data at all */
1365                         break;
1366                 if (*endptr != '-' && *endptr != '\0') /* has invalid data */
1367                         break;
1368                 if (start_index < 0)
1369                         break;
1370
1371                 end_index = start_index;
1372                 if (*endptr == '-') {
1373                         end_index = strtol(endptr + 1, &endptr, 0);
1374                         if (*endptr != '\0')
1375                                 break;
1376                         if (end_index < start_index)
1377                                 break;
1378                 }
1379
1380                 for (i = start_index; i <= end_index && slots > 0; i++) {
1381                         int j;
1382
1383                         /* remove duplicate */
1384                         for (j = 0; j < offset; j++) {
1385                                 if (osts[j] == i)
1386                                         break;
1387                         }
1388                         if (j == offset) { /* no duplicate */
1389                                 osts[nr++] = i;
1390                                 --slots;
1391                         }
1392                 }
1393                 if (slots == 0 && i < end_index)
1394                         break;
1395
1396                 *ptr = ',';
1397                 arg = ++ptr;
1398                 offset = nr;
1399                 rc = 0;
1400         }
1401         if (!end_of_loop && ptr != NULL)
1402                 *ptr = ',';
1403
1404         return rc < 0 ? rc : nr;
1405 }
1406
1407 struct lfs_setstripe_args {
1408         unsigned long long       lsa_comp_end;
1409         unsigned long long       lsa_stripe_size;
1410         long long                lsa_stripe_count;
1411         long long                lsa_stripe_off;
1412         __u32                    lsa_comp_flags;
1413         int                      lsa_nr_osts;
1414         unsigned long long       lsa_pattern;
1415         __u32                   *lsa_osts;
1416         char                    *lsa_pool_name;
1417 };
1418
1419 static inline void setstripe_args_init(struct lfs_setstripe_args *lsa)
1420 {
1421         memset(lsa, 0, sizeof(*lsa));
1422
1423         lsa->lsa_stripe_size = LLAPI_LAYOUT_DEFAULT;
1424         lsa->lsa_stripe_count = LLAPI_LAYOUT_DEFAULT;
1425         lsa->lsa_stripe_off = LLAPI_LAYOUT_DEFAULT;
1426         lsa->lsa_pattern = LLAPI_LAYOUT_RAID0;
1427         lsa->lsa_pool_name = NULL;
1428 }
1429
1430 /**
1431  * setstripe_args_init_inherit() - Initialize and inherit stripe options.
1432  * @lsa: Stripe options to be initialized and inherited.
1433  *
1434  * This function initializes stripe options in @lsa and inherit
1435  * stripe_size, stripe_count and OST pool_name options.
1436  *
1437  * Return: void.
1438  */
1439 static inline void setstripe_args_init_inherit(struct lfs_setstripe_args *lsa)
1440 {
1441         unsigned long long stripe_size;
1442         long long stripe_count;
1443         char *pool_name = NULL;
1444
1445         stripe_size = lsa->lsa_stripe_size;
1446         stripe_count = lsa->lsa_stripe_count;
1447         pool_name = lsa->lsa_pool_name;
1448
1449         setstripe_args_init(lsa);
1450
1451         lsa->lsa_stripe_size = stripe_size;
1452         lsa->lsa_stripe_count = stripe_count;
1453         lsa->lsa_pool_name = pool_name;
1454 }
1455
1456 static inline bool setstripe_args_specified(struct lfs_setstripe_args *lsa)
1457 {
1458         return (lsa->lsa_stripe_size != LLAPI_LAYOUT_DEFAULT ||
1459                 lsa->lsa_stripe_count != LLAPI_LAYOUT_DEFAULT ||
1460                 lsa->lsa_stripe_off != LLAPI_LAYOUT_DEFAULT ||
1461                 lsa->lsa_pattern != LLAPI_LAYOUT_RAID0 ||
1462                 lsa->lsa_pool_name != NULL ||
1463                 lsa->lsa_comp_end != 0);
1464 }
1465
1466 /**
1467  * comp_args_to_layout() - Create or extend a composite layout.
1468  * @composite:       Pointer to the composite layout.
1469  * @lsa:             Stripe options for the new component.
1470  *
1471  * This function creates or extends a composite layout by adding a new
1472  * component with stripe options from @lsa.
1473  *
1474  * Return: 0 on success or an error code on failure.
1475  */
1476 static int comp_args_to_layout(struct llapi_layout **composite,
1477                                struct lfs_setstripe_args *lsa)
1478 {
1479         struct llapi_layout *layout = *composite;
1480         uint64_t prev_end = 0;
1481         int i = 0, rc;
1482
1483         if (layout == NULL) {
1484                 layout = llapi_layout_alloc();
1485                 if (layout == NULL) {
1486                         fprintf(stderr, "Alloc llapi_layout failed. %s\n",
1487                                 strerror(errno));
1488                         return -ENOMEM;
1489                 }
1490                 *composite = layout;
1491         } else {
1492                 uint64_t start;
1493
1494                 /* Get current component extent, current component
1495                  * must be the tail component. */
1496                 rc = llapi_layout_comp_extent_get(layout, &start, &prev_end);
1497                 if (rc) {
1498                         fprintf(stderr, "Get comp extent failed. %s\n",
1499                                 strerror(errno));
1500                         return rc;
1501                 }
1502
1503                 rc = llapi_layout_comp_add(layout);
1504                 if (rc) {
1505                         fprintf(stderr, "Add component failed. %s\n",
1506                                 strerror(errno));
1507                         return rc;
1508                 }
1509         }
1510
1511         rc = llapi_layout_comp_extent_set(layout, prev_end, lsa->lsa_comp_end);
1512         if (rc) {
1513                 fprintf(stderr, "Set extent [%lu, %llu) failed. %s\n",
1514                         prev_end, lsa->lsa_comp_end, strerror(errno));
1515                 return rc;
1516         }
1517
1518         /* Data-on-MDT component setting */
1519         if (lsa->lsa_pattern == LLAPI_LAYOUT_MDT) {
1520                 /* In case of Data-on-MDT patterns the only extra option
1521                  * applicable is stripe size option. */
1522                 if (lsa->lsa_stripe_count != LLAPI_LAYOUT_DEFAULT) {
1523                         fprintf(stderr, "Option 'stripe-count' can't be "
1524                                 "specified with Data-on-MDT component: %lld\n",
1525                                 lsa->lsa_stripe_count);
1526                         return -EINVAL;
1527                 }
1528                 if (lsa->lsa_stripe_size != LLAPI_LAYOUT_DEFAULT) {
1529                         fprintf(stderr, "Option 'stripe-size' can't be "
1530                                 "specified with Data-on-MDT component: %llu\n",
1531                                 lsa->lsa_stripe_size);
1532                         return -EINVAL;
1533                 }
1534                 if (lsa->lsa_nr_osts != 0) {
1535                         fprintf(stderr, "Option 'ost-list' can't be specified "
1536                                 "with Data-on-MDT component: '%i'\n",
1537                                 lsa->lsa_nr_osts);
1538                         return -EINVAL;
1539                 }
1540                 if (lsa->lsa_stripe_off != LLAPI_LAYOUT_DEFAULT) {
1541                         fprintf(stderr, "Option 'stripe-offset' can't be "
1542                                 "specified with Data-on-MDT component: %lld\n",
1543                                 lsa->lsa_stripe_off);
1544                         return -EINVAL;
1545                 }
1546                 if (lsa->lsa_pool_name != 0) {
1547                         fprintf(stderr, "Option 'pool' can't be specified "
1548                                 "with Data-on-MDT component: '%s'\n",
1549                                 lsa->lsa_pool_name);
1550                         return -EINVAL;
1551                 }
1552
1553                 rc = llapi_layout_pattern_set(layout, lsa->lsa_pattern);
1554                 if (rc) {
1555                         fprintf(stderr, "Set stripe pattern %#llx failed. %s\n",
1556                                 lsa->lsa_pattern, strerror(errno));
1557                         return rc;
1558                 }
1559                 /* Data-on-MDT component has always single stripe up to end */
1560                 lsa->lsa_stripe_size = lsa->lsa_comp_end;
1561         }
1562
1563         rc = llapi_layout_stripe_size_set(layout, lsa->lsa_stripe_size);
1564         if (rc) {
1565                 fprintf(stderr, "Set stripe size %llu failed: %s\n",
1566                         lsa->lsa_stripe_size, strerror(errno));
1567                 return rc;
1568         }
1569
1570         rc = llapi_layout_stripe_count_set(layout, lsa->lsa_stripe_count);
1571         if (rc) {
1572                 fprintf(stderr, "Set stripe count %lld failed: %s\n",
1573                         lsa->lsa_stripe_count, strerror(errno));
1574                 return rc;
1575         }
1576
1577         if (lsa->lsa_pool_name != NULL) {
1578                 rc = llapi_layout_pool_name_set(layout, lsa->lsa_pool_name);
1579                 if (rc) {
1580                         fprintf(stderr, "Set pool name: %s failed. %s\n",
1581                                 lsa->lsa_pool_name, strerror(errno));
1582                         return rc;
1583                 }
1584         } else {
1585                 rc = llapi_layout_pool_name_set(layout, "");
1586                 if (rc) {
1587                         fprintf(stderr, "Clear pool name failed: %s\n",
1588                                 strerror(errno));
1589                         return rc;
1590                 }
1591         }
1592
1593         if (lsa->lsa_nr_osts > 0) {
1594                 if (lsa->lsa_stripe_count > 0 &&
1595                     lsa->lsa_stripe_count != LLAPI_LAYOUT_DEFAULT &&
1596                     lsa->lsa_stripe_count != LLAPI_LAYOUT_WIDE &&
1597                     lsa->lsa_nr_osts != lsa->lsa_stripe_count) {
1598                         fprintf(stderr, "stripe_count(%lld) != nr_osts(%d)\n",
1599                                 lsa->lsa_stripe_count, lsa->lsa_nr_osts);
1600                         return -EINVAL;
1601                 }
1602                 for (i = 0; i < lsa->lsa_nr_osts; i++) {
1603                         rc = llapi_layout_ost_index_set(layout, i,
1604                                                         lsa->lsa_osts[i]);
1605                         if (rc)
1606                                 break;
1607                 }
1608         } else if (lsa->lsa_stripe_off != LLAPI_LAYOUT_DEFAULT) {
1609                 rc = llapi_layout_ost_index_set(layout, 0, lsa->lsa_stripe_off);
1610         }
1611         if (rc) {
1612                 fprintf(stderr, "Set ost index %d failed. %s\n",
1613                         i, strerror(errno));
1614                 return rc;
1615         }
1616
1617         return 0;
1618 }
1619
1620 /* In 'lfs setstripe --component-add' mode, we need to fetch the extent
1621  * end of the last component in the existing file, and adjust the
1622  * first extent start of the components to be added accordingly. */
1623 static int adjust_first_extent(char *fname, struct llapi_layout *layout)
1624 {
1625         struct llapi_layout *head;
1626         uint64_t start, end, stripe_size, prev_end = 0;
1627         int rc;
1628
1629         if (layout == NULL)
1630                 return -EINVAL;
1631
1632         errno = 0;
1633         head = llapi_layout_get_by_path(fname, 0);
1634         if (head == NULL) {
1635                 fprintf(stderr, "Read layout from %s failed. %s\n",
1636                         fname, strerror(errno));
1637                 return -EINVAL;
1638         } else if (errno == ENODATA) {
1639                 /* file without LOVEA, this component-add will be turned
1640                  * into a component-create. */
1641                 llapi_layout_free(head);
1642                 return -ENODATA;
1643         } else if (!llapi_layout_is_composite(head)) {
1644                 fprintf(stderr, "'%s' isn't a composite file.\n",
1645                         fname);
1646                 llapi_layout_free(head);
1647                 return -EINVAL;
1648         }
1649
1650         rc = llapi_layout_comp_extent_get(head, &start, &prev_end);
1651         if (rc) {
1652                 fprintf(stderr, "Get prev extent failed. %s\n",
1653                         strerror(errno));
1654                 llapi_layout_free(head);
1655                 return rc;
1656         }
1657
1658         llapi_layout_free(head);
1659
1660         /* Make sure we use the first component of the layout to be added. */
1661         rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_FIRST);
1662         if (rc < 0) {
1663                 fprintf(stderr, "Move component cursor failed. %s\n",
1664                         strerror(errno));
1665                 return rc;
1666         }
1667
1668         rc = llapi_layout_comp_extent_get(layout, &start, &end);
1669         if (rc) {
1670                 fprintf(stderr, "Get extent failed. %s\n", strerror(errno));
1671                 return rc;
1672         }
1673
1674         if (start > prev_end || end <= prev_end) {
1675                 fprintf(stderr, "First extent to be set [%lu, %lu) isn't "
1676                         "adjacent with the existing file extent end: %lu\n",
1677                         start, end, prev_end);
1678                 return -EINVAL;
1679         }
1680
1681         rc = llapi_layout_stripe_size_get(layout, &stripe_size);
1682         if (rc) {
1683                 fprintf(stderr, "Get stripe size failed. %s\n",
1684                         strerror(errno));
1685                 return rc;
1686         }
1687
1688         if (stripe_size != LLAPI_LAYOUT_DEFAULT &&
1689             (prev_end & (stripe_size - 1))) {
1690                 fprintf(stderr, "Stripe size %lu not aligned with %lu\n",
1691                         stripe_size, prev_end);
1692                 return -EINVAL;
1693         }
1694
1695         rc = llapi_layout_comp_extent_set(layout, prev_end, end);
1696         if (rc) {
1697                 fprintf(stderr, "Set component extent [%lu, %lu) failed. %s\n",
1698                         prev_end, end, strerror(errno));
1699                 return rc;
1700         }
1701
1702         return 0;
1703 }
1704
1705 static inline bool comp_flags_is_neg(__u32 flags)
1706 {
1707         return flags & LCME_FL_NEG;
1708 }
1709
1710 static inline void comp_flags_set_neg(__u32 *flags)
1711 {
1712         *flags |= LCME_FL_NEG;
1713 }
1714
1715 static inline void comp_flags_clear_neg(__u32 *flags)
1716 {
1717         *flags &= ~LCME_FL_NEG;
1718 }
1719
1720 static int comp_str2flags(__u32 *flags, char *string)
1721 {
1722         char *name;
1723         __u32 neg_flags = 0;
1724
1725         if (string == NULL)
1726                 return -EINVAL;
1727
1728         *flags = 0;
1729         for (name = strtok(string, ","); name; name = strtok(NULL, ",")) {
1730                 bool found = false;
1731                 int i;
1732
1733                 for (i = 0; i < ARRAY_SIZE(comp_flags_table); i++) {
1734                         __u32 comp_flag = comp_flags_table[i].cfn_flag;
1735                         const char *comp_name = comp_flags_table[i].cfn_name;
1736
1737                         if (strcmp(name, comp_name) == 0) {
1738                                 *flags |= comp_flag;
1739                                 found = true;
1740                         } else if (strncmp(name, "^", 1) == 0 &&
1741                                    strcmp(name + 1, comp_name) == 0) {
1742                                 neg_flags |= comp_flag;
1743                                 found = true;
1744                         }
1745                 }
1746                 if (!found) {
1747                         llapi_printf(LLAPI_MSG_ERROR,
1748                                      "%s: component flag '%s' not supported\n",
1749                                      progname, name);
1750                         return -EINVAL;
1751                 }
1752         }
1753
1754         if (*flags == 0 && neg_flags == 0)
1755                 return -EINVAL;
1756         /* don't support mixed flags for now */
1757         if (*flags && neg_flags)
1758                 return -EINVAL;
1759
1760         if (neg_flags) {
1761                 *flags = neg_flags;
1762                 comp_flags_set_neg(flags);
1763         }
1764
1765         return 0;
1766 }
1767
1768 static inline bool arg_is_eof(char *arg)
1769 {
1770         return !strncmp(arg, "-1", strlen("-1")) ||
1771                !strncmp(arg, "EOF", strlen("EOF")) ||
1772                !strncmp(arg, "eof", strlen("eof"));
1773 }
1774
1775 /**
1776  * lfs_mirror_alloc() - Allocate a mirror argument structure.
1777  *
1778  * Return: Valid mirror_args pointer on success and
1779  *         NULL if memory allocation fails.
1780  */
1781 static struct mirror_args *lfs_mirror_alloc(void)
1782 {
1783         struct mirror_args *mirror = NULL;
1784
1785         while (1) {
1786                 mirror = calloc(1, sizeof(*mirror));
1787                 if (mirror != NULL)
1788                         break;
1789
1790                 sleep(1);
1791         }
1792
1793         return mirror;
1794 }
1795
1796 /**
1797  * lfs_mirror_free() - Free memory allocated for a mirror argument
1798  *                     structure.
1799  * @mirror: Previously allocated mirror argument structure by
1800  *          lfs_mirror_alloc().
1801  *
1802  * Free memory allocated for @mirror.
1803  *
1804  * Return: void.
1805  */
1806 static void lfs_mirror_free(struct mirror_args *mirror)
1807 {
1808         if (mirror->m_layout != NULL)
1809                 llapi_layout_free(mirror->m_layout);
1810         free(mirror);
1811 }
1812
1813 /**
1814  * lfs_mirror_list_free() - Free memory allocated for a mirror list.
1815  * @mirror_list: Previously allocated mirror list.
1816  *
1817  * Free memory allocated for @mirror_list.
1818  *
1819  * Return: void.
1820  */
1821 static void lfs_mirror_list_free(struct mirror_args *mirror_list)
1822 {
1823         struct mirror_args *next_mirror = NULL;
1824
1825         while (mirror_list != NULL) {
1826                 next_mirror = mirror_list->m_next;
1827                 lfs_mirror_free(mirror_list);
1828                 mirror_list = next_mirror;
1829         }
1830 }
1831
1832 enum {
1833         LFS_POOL_OPT = 3,
1834         LFS_COMP_COUNT_OPT,
1835         LFS_COMP_START_OPT,
1836         LFS_COMP_FLAGS_OPT,
1837         LFS_COMP_DEL_OPT,
1838         LFS_COMP_SET_OPT,
1839         LFS_COMP_ADD_OPT,
1840         LFS_COMP_USE_PARENT_OPT,
1841         LFS_COMP_NO_VERIFY_OPT,
1842         LFS_PROJID_OPT,
1843 };
1844
1845 /* functions */
1846 static int lfs_setstripe0(int argc, char **argv, enum setstripe_origin opc)
1847 {
1848         struct lfs_setstripe_args        lsa;
1849         struct llapi_stripe_param       *param = NULL;
1850         struct find_param                migrate_mdt_param = {
1851                 .fp_max_depth = -1,
1852                 .fp_mdt_index = -1,
1853         };
1854         char                            *fname;
1855         int                              result;
1856         int                              result2 = 0;
1857         char                            *end;
1858         int                              c;
1859         int                              delete = 0;
1860         char                            *mdt_idx_arg = NULL;
1861         unsigned long long               size_units = 1;
1862         bool                             migrate_mode = false;
1863         bool                             migration_block = false;
1864         __u64                            migration_flags = 0;
1865         __u32                            osts[LOV_MAX_STRIPE_COUNT] = { 0 };
1866         int                              comp_del = 0, comp_set = 0;
1867         int                              comp_add = 0;
1868         __u32                            comp_id = 0;
1869         struct llapi_layout             *layout = NULL;
1870         struct llapi_layout             **lpp = &layout;
1871         bool                             mirror_mode = false;
1872         bool                             has_m_file = false;
1873         __u32                            mirror_count = 0;
1874         enum mirror_flags                mirror_flags = 0;
1875         struct mirror_args              *mirror_list = NULL;
1876         struct mirror_args              *new_mirror = NULL;
1877         struct mirror_args              *last_mirror = NULL;
1878         char                             cmd[PATH_MAX];
1879
1880         struct option long_opts[] = {
1881                 /* --block is only valid in migrate mode */
1882         { .val = 'b',   .name = "block",        .has_arg = no_argument},
1883         { .val = LFS_COMP_ADD_OPT,
1884                         .name = "comp-add",     .has_arg = no_argument},
1885         { .val = LFS_COMP_ADD_OPT,
1886                         .name = "component-add",
1887                                                 .has_arg = no_argument},
1888         { .val = LFS_COMP_DEL_OPT,
1889                         .name = "comp-del",     .has_arg = no_argument},
1890         { .val = LFS_COMP_DEL_OPT,
1891                         .name = "component-del",
1892                                                 .has_arg = no_argument},
1893         { .val = LFS_COMP_FLAGS_OPT,
1894                         .name = "comp-flags",   .has_arg = required_argument},
1895         { .val = LFS_COMP_FLAGS_OPT,
1896                         .name = "component-flags",
1897                                                 .has_arg = required_argument},
1898         { .val = LFS_COMP_SET_OPT,
1899                         .name = "comp-set",     .has_arg = no_argument},
1900         { .val = LFS_COMP_SET_OPT,
1901                         .name = "component-set",
1902                                                 .has_arg = no_argument},
1903         { .val = LFS_COMP_USE_PARENT_OPT,
1904                         .name = "parent",       .has_arg = no_argument},
1905         { .val = LFS_COMP_NO_VERIFY_OPT,
1906                         .name = "no-verify",    .has_arg = no_argument},
1907         { .val = 'c',   .name = "stripe-count", .has_arg = required_argument},
1908         { .val = 'c',   .name = "stripe_count", .has_arg = required_argument},
1909         { .val = 'd',   .name = "delete",       .has_arg = no_argument},
1910         { .val = 'E',   .name = "comp-end",     .has_arg = required_argument},
1911         { .val = 'E',   .name = "component-end",
1912                                                 .has_arg = required_argument},
1913         { .val = 'f',   .name = "file",         .has_arg = required_argument },
1914         /* dirstripe {"mdt-hash",     required_argument, 0, 'H'}, */
1915         { .val = 'i',   .name = "stripe-index", .has_arg = required_argument},
1916         { .val = 'i',   .name = "stripe_index", .has_arg = required_argument},
1917         { .val = 'I',   .name = "comp-id",      .has_arg = required_argument},
1918         { .val = 'I',   .name = "component-id", .has_arg = required_argument},
1919         { .val = 'L',   .name = "layout",       .has_arg = required_argument },
1920         { .val = 'm',   .name = "mdt",          .has_arg = required_argument},
1921         { .val = 'm',   .name = "mdt-index",    .has_arg = required_argument},
1922         { .val = 'm',   .name = "mdt_index",    .has_arg = required_argument},
1923         { .val = 'N',   .name = "mirror-count", .has_arg = optional_argument},
1924         /* --non-block is only valid in migrate mode */
1925         { .val = 'n',   .name = "non-block",    .has_arg = no_argument},
1926         { .val = 'o',   .name = "ost",          .has_arg = required_argument},
1927 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 0, 53, 0)
1928         { .val = 'o',   .name = "ost-list",     .has_arg = required_argument },
1929         { .val = 'o',   .name = "ost_list",     .has_arg = required_argument },
1930 #endif
1931         { .val = 'p',   .name = "pool",         .has_arg = required_argument },
1932         { .val = 'S',   .name = "stripe-size",  .has_arg = required_argument },
1933         { .val = 'S',   .name = "stripe_size",  .has_arg = required_argument },
1934         /* dirstripe {"mdt-count",    required_argument, 0, 'T'}, */
1935         /* --verbose is only valid in migrate mode */
1936         { .val = 'v',   .name = "verbose",      .has_arg = no_argument },
1937         { .name = NULL } };
1938
1939         setstripe_args_init(&lsa);
1940
1941         migrate_mode = (opc == SO_MIGRATE);
1942         mirror_mode = (opc == SO_MIRROR_CREATE || opc == SO_MIRROR_EXTEND);
1943
1944         snprintf(cmd, sizeof(cmd), "%s %s", progname, argv[0]);
1945         progname = cmd;
1946         while ((c = getopt_long(argc, argv, "bc:dE:f:i:I:m:N::no:p:L:s:S:v",
1947                                 long_opts, NULL)) >= 0) {
1948                 switch (c) {
1949                 case 0:
1950                         /* Long options. */
1951                         break;
1952                 case LFS_COMP_ADD_OPT:
1953                         comp_add = 1;
1954                         break;
1955                 case LFS_COMP_DEL_OPT:
1956                         comp_del = 1;
1957                         break;
1958                 case LFS_COMP_FLAGS_OPT:
1959                         result = comp_str2flags(&lsa.lsa_comp_flags, optarg);
1960                         if (result != 0)
1961                                 goto usage_error;
1962                         break;
1963                 case LFS_COMP_SET_OPT:
1964                         comp_set = 1;
1965                         break;
1966                 case LFS_COMP_USE_PARENT_OPT:
1967                         if (!mirror_mode) {
1968                                 fprintf(stderr, "error: %s: --parent must be "
1969                                         "specified with --mirror-count|-N "
1970                                         "option\n", progname);
1971                                 goto usage_error;
1972                         }
1973                         setstripe_args_init(&lsa);
1974                         break;
1975                 case LFS_COMP_NO_VERIFY_OPT:
1976                         mirror_flags |= NO_VERIFY;
1977                         break;
1978                 case 'b':
1979                         if (!migrate_mode) {
1980                                 fprintf(stderr,
1981                                         "%s %s: -b|--block valid only for migrate command\n",
1982                                         progname, argv[0]);
1983                                 goto usage_error;
1984                         }
1985                         migration_block = true;
1986                         break;
1987                 case 'c':
1988                         lsa.lsa_stripe_count = strtoul(optarg, &end, 0);
1989                         if (*end != '\0') {
1990                                 fprintf(stderr,
1991                                         "%s %s: invalid stripe count '%s'\n",
1992                                         progname, argv[0], optarg);
1993                                 goto usage_error;
1994                         }
1995
1996                         if (lsa.lsa_stripe_count == -1)
1997                                 lsa.lsa_stripe_count = LLAPI_LAYOUT_WIDE;
1998                         break;
1999                 case 'd':
2000                         /* delete the default striping pattern */
2001                         delete = 1;
2002                         break;
2003                 case 'E':
2004                         if (lsa.lsa_comp_end != 0) {
2005                                 result = comp_args_to_layout(lpp, &lsa);
2006                                 if (result) {
2007                                         fprintf(stderr,
2008                                                 "%s %s: invalid layout\n",
2009                                                 progname, argv[0]);
2010                                         goto usage_error;
2011                                 }
2012
2013                                 setstripe_args_init_inherit(&lsa);
2014                         }
2015
2016                         if (arg_is_eof(optarg)) {
2017                                 lsa.lsa_comp_end = LUSTRE_EOF;
2018                         } else {
2019                                 result = llapi_parse_size(optarg,
2020                                                         &lsa.lsa_comp_end,
2021                                                         &size_units, 0);
2022                                 if (result) {
2023                                         fprintf(stderr,
2024                                                 "%s %s: invalid component end '%s'\n",
2025                                                 progname, argv[0], optarg);
2026                                         goto usage_error;
2027                                 }
2028                         }
2029                         break;
2030                 case 'i':
2031                         lsa.lsa_stripe_off = strtol(optarg, &end, 0);
2032                         if (*end != '\0') {
2033                                 fprintf(stderr,
2034                                         "%s %s: invalid stripe offset '%s'\n",
2035                                         progname, argv[0], optarg);
2036                                 goto usage_error;
2037                         }
2038                         if (lsa.lsa_stripe_off == -1)
2039                                 lsa.lsa_stripe_off = LLAPI_LAYOUT_DEFAULT;
2040                         break;
2041                 case 'I':
2042                         comp_id = strtoul(optarg, &end, 0);
2043                         if (*end != '\0' || comp_id == 0 ||
2044                             comp_id > LCME_ID_MAX) {
2045                                 fprintf(stderr,
2046                                         "%s %s: invalid component ID '%s'\n",
2047                                         progname, argv[0], optarg);
2048                                 goto usage_error;
2049                         }
2050                         break;
2051                 case 'f':
2052                         if (opc != SO_MIRROR_EXTEND) {
2053                                 fprintf(stderr,
2054                                         "error: %s: invalid option: %s\n",
2055                                         progname, argv[optopt + 1]);
2056                                 goto usage_error;
2057                         }
2058                         if (last_mirror == NULL) {
2059                                 fprintf(stderr, "error: %s: '-N' must exist "
2060                                         "in front of '%s'\n",
2061                                         progname, argv[optopt + 1]);
2062                                 goto usage_error;
2063                         }
2064
2065                         last_mirror->m_file = optarg;
2066                         last_mirror->m_count = 1;
2067                         has_m_file = true;
2068                         break;
2069                 case 'L':
2070                         if (strcmp(argv[optind - 1], "mdt") == 0) {
2071                                 /* Can be only the first component */
2072                                 if (layout != NULL) {
2073                                         result = -EINVAL;
2074                                         fprintf(stderr, "error: 'mdt' layout "
2075                                                 "can be only the first one\n");
2076                                         goto error;
2077                                 }
2078                                 if (lsa.lsa_comp_end > (1ULL << 30)) { /* 1Gb */
2079                                         result = -EFBIG;
2080                                         fprintf(stderr, "error: 'mdt' layout "
2081                                                 "size is too big\n");
2082                                         goto error;
2083                                 }
2084                                 lsa.lsa_pattern = LLAPI_LAYOUT_MDT;
2085                         } else if (strcmp(argv[optind - 1], "raid0") != 0) {
2086                                 result = -EINVAL;
2087                                 fprintf(stderr, "error: layout '%s' is "
2088                                         "unknown, supported layouts are: "
2089                                         "'mdt', 'raid0'\n", argv[optind]);
2090                                 goto error;
2091                         }
2092                         break;
2093                 case 'm':
2094                         if (!migrate_mode) {
2095                                 fprintf(stderr,
2096                                         "%s %s: -m|--mdt-index valid only for migrate command\n",
2097                                         progname, argv[0]);
2098                                 goto usage_error;
2099                         }
2100                         mdt_idx_arg = optarg;
2101                         break;
2102                 case 'n':
2103                         if (!migrate_mode) {
2104                                 fprintf(stderr,
2105                                         "%s %s: -n|--non-block valid only for migrate command\n",
2106                                         progname, argv[0]);
2107                                 goto usage_error;
2108                         }
2109                         migration_flags |= MIGRATION_NONBLOCK;
2110                         break;
2111                 case 'N':
2112                         if (opc == SO_SETSTRIPE) {
2113                                 opc = SO_MIRROR_CREATE;
2114                                 mirror_mode = true;
2115                         }
2116                         mirror_count = 1;
2117                         if (optarg != NULL) {
2118                                 mirror_count = strtoul(optarg, &end, 0);
2119                                 if (*end != '\0' || mirror_count == 0) {
2120                                         fprintf(stderr,
2121                                                 "error: %s: bad mirror count: %s\n",
2122                                                 progname, optarg);
2123                                         result = -EINVAL;
2124                                         goto error;
2125                                 }
2126                         }
2127
2128                         new_mirror = lfs_mirror_alloc();
2129                         new_mirror->m_count = mirror_count;
2130
2131                         if (mirror_list == NULL)
2132                                 mirror_list = new_mirror;
2133
2134                         if (last_mirror != NULL) {
2135                                 /* wrap up last mirror */
2136                                 if (lsa.lsa_comp_end == 0)
2137                                         lsa.lsa_comp_end = LUSTRE_EOF;
2138
2139                                 result = comp_args_to_layout(lpp, &lsa);
2140                                 if (result) {
2141                                         lfs_mirror_free(new_mirror);
2142                                         goto error;
2143                                 }
2144
2145                                 setstripe_args_init_inherit(&lsa);
2146
2147                                 last_mirror->m_next = new_mirror;
2148                         }
2149
2150                         last_mirror = new_mirror;
2151                         lpp = &last_mirror->m_layout;
2152                         break;
2153                 case 'o':
2154                         lsa.lsa_nr_osts = parse_targets(osts,
2155                                                 sizeof(osts) / sizeof(__u32),
2156                                                 lsa.lsa_nr_osts, optarg);
2157                         if (lsa.lsa_nr_osts < 0) {
2158                                 fprintf(stderr,
2159                                         "%s %s: invalid OST target(s) '%s'\n",
2160                                         progname, argv[0], optarg);
2161                                 goto usage_error;
2162                         }
2163
2164                         lsa.lsa_osts = osts;
2165                         if (lsa.lsa_stripe_off == LLAPI_LAYOUT_DEFAULT)
2166                                 lsa.lsa_stripe_off = osts[0];
2167                         break;
2168                 case 'p':
2169                         if (optarg == NULL)
2170                                 goto usage_error;
2171                         lsa.lsa_pool_name = optarg;
2172                         break;
2173                 case 'S':
2174                         result = llapi_parse_size(optarg, &lsa.lsa_stripe_size,
2175                                                   &size_units, 0);
2176                         if (result) {
2177                                 fprintf(stderr,
2178                                         "%s %s: invalid stripe size '%s'\n",
2179                                         progname, argv[0], optarg);
2180                                 goto usage_error;
2181                         }
2182                         break;
2183                 case 'v':
2184                         if (!migrate_mode) {
2185                                 fprintf(stderr,
2186                                         "%s %s: -v|--verbose valid only for migrate command\n",
2187                                         progname, argv[0]);
2188                                 goto usage_error;
2189                         }
2190                         migrate_mdt_param.fp_verbose = VERBOSE_DETAIL;
2191                         break;
2192                 default:
2193                         fprintf(stderr, "%s %s: unrecognized option '%s'\n",
2194                                 progname, argv[0], argv[optind - 1]);
2195                         goto usage_error;
2196                 }
2197         }
2198
2199         fname = argv[optind];
2200
2201         if (optind == argc) {
2202                 fprintf(stderr, "%s %s: FILE must be specified\n",
2203                         progname, argv[0]);
2204                 goto usage_error;
2205         }
2206
2207         if (mirror_mode && mirror_count == 0) {
2208                 fprintf(stderr,
2209                         "error: %s: --mirror-count|-N option is required\n",
2210                         progname);
2211                 result = -EINVAL;
2212                 goto error;
2213         }
2214
2215         if (mirror_mode) {
2216                 if (lsa.lsa_comp_end == 0)
2217                         lsa.lsa_comp_end = LUSTRE_EOF;
2218         }
2219
2220         if (lsa.lsa_comp_end != 0) {
2221                 result = comp_args_to_layout(lpp, &lsa);
2222                 if (result)
2223                         goto error;
2224         }
2225
2226         if (mirror_flags & NO_VERIFY) {
2227                 if (opc != SO_MIRROR_EXTEND) {
2228                         fprintf(stderr,
2229                                 "error: %s: --no-verify is valid only for lfs mirror extend command\n",
2230                                 progname);
2231                         result = -EINVAL;
2232                         goto error;
2233                 } else if (!has_m_file) {
2234                         fprintf(stderr,
2235                                 "error: %s: --no-verify must be specified with -f <victim_file> option\n",
2236                                 progname);
2237                         result = -EINVAL;
2238                         goto error;
2239                 }
2240         }
2241
2242         /* Only LCME_FL_INIT flags is used in PFL, and it shouldn't be
2243          * altered by user space tool, so we don't need to support the
2244          * --component-set for this moment. */
2245         if (comp_set != 0) {
2246                 fprintf(stderr, "%s %s: --component-set not supported\n",
2247                         progname, argv[0]);
2248                 goto usage_error;
2249         }
2250
2251         if ((delete + comp_set + comp_del + comp_add) > 1) {
2252                 fprintf(stderr,
2253                         "%s %s: options --component-set, --component-del, --component-add and -d are mutually exclusive\n",
2254                         progname, argv[0]);
2255                 goto usage_error;
2256         }
2257
2258         if (delete && (setstripe_args_specified(&lsa) || comp_id != 0 ||
2259                        lsa.lsa_comp_flags != 0 || layout != NULL)) {
2260                 fprintf(stderr,
2261                         "%s %s: option -d is mutually exclusive with -s, -c, -o, -p, -I, -F and -E options\n",
2262                         progname, argv[0]);
2263                 goto usage_error;
2264         }
2265
2266         if ((comp_set || comp_del) &&
2267             (setstripe_args_specified(&lsa) || layout != NULL)) {
2268                 fprintf(stderr,
2269                         "%s %s: options --component-del and --component-set are mutually exclusive when used with -c, -E, -o, -p, or -s\n",
2270                         progname, argv[0]);
2271                 goto usage_error;
2272         }
2273
2274         if (comp_del && comp_id != 0 && lsa.lsa_comp_flags != 0) {
2275                 fprintf(stderr,
2276                         "%s %s: options -I and -F are mutually exclusive when used with --component-del\n",
2277                         progname, argv[0]);
2278                 goto usage_error;
2279         }
2280
2281         if (comp_add || comp_del) {
2282                 struct stat st;
2283
2284                 result = lstat(fname, &st);
2285                 if (result == 0 && S_ISDIR(st.st_mode)) {
2286                         fprintf(stderr,
2287                                 "%s setstripe: cannot use --component-add or --component-del for directory\n",
2288                                 progname);
2289                         goto usage_error;
2290                 }
2291
2292                 if (mirror_mode) {
2293                         fprintf(stderr, "error: %s: can't use --component-add "
2294                                 "or --component-del for mirror operation\n",
2295                                 progname);
2296                         goto usage_error;
2297                 }
2298         }
2299
2300         if (comp_add) {
2301                 if (layout == NULL) {
2302                         fprintf(stderr,
2303                                 "%s %s: option -E must be specified with --component-add\n",
2304                                 progname, argv[0]);
2305                         goto usage_error;
2306                 }
2307
2308                 result = adjust_first_extent(fname, layout);
2309                 if (result == -ENODATA)
2310                         comp_add = 0;
2311                 else if (result != 0)
2312                         goto error;
2313         }
2314
2315         if (mdt_idx_arg != NULL && optind > 3) {
2316                 fprintf(stderr,
2317                         "%s %s: option -m cannot be used with other options\n",
2318                         progname, argv[0]);
2319                 goto usage_error;
2320         }
2321
2322         if ((migration_flags & MIGRATION_NONBLOCK) && migration_block) {
2323                 fprintf(stderr,
2324                         "%s %s: options --non-block and --block are mutually exclusive\n",
2325                         progname, argv[0]);
2326                 goto usage_error;
2327         }
2328
2329         if (!comp_del && !comp_set && comp_id != 0) {
2330                 fprintf(stderr,
2331                         "%s %s: option -I can only be used with --component-del\n",
2332                         progname, argv[0]);
2333                 goto usage_error;
2334         }
2335
2336         if (mdt_idx_arg != NULL) {
2337                 /* initialize migrate mdt parameters */
2338                 migrate_mdt_param.fp_mdt_index = strtoul(mdt_idx_arg, &end, 0);
2339                 if (*end != '\0') {
2340                         fprintf(stderr, "%s %s: invalid MDT index '%s'\n",
2341                                 progname, argv[0], mdt_idx_arg);
2342                         goto usage_error;
2343                 }
2344                 migrate_mdt_param.fp_migrate = 1;
2345         } else if (layout == NULL) {
2346                 /* initialize stripe parameters */
2347                 param = calloc(1, offsetof(typeof(*param),
2348                                lsp_osts[lsa.lsa_nr_osts]));
2349                 if (param == NULL) {
2350                         fprintf(stderr,
2351                                 "%s %s: cannot allocate memory for parameters: %s\n",
2352                                 progname, argv[0], strerror(ENOMEM));
2353                         result = -ENOMEM;
2354                         goto error;
2355                 }
2356
2357                 if (lsa.lsa_stripe_size != LLAPI_LAYOUT_DEFAULT)
2358                         param->lsp_stripe_size = lsa.lsa_stripe_size;
2359                 if (lsa.lsa_stripe_count != LLAPI_LAYOUT_DEFAULT) {
2360                         if (lsa.lsa_stripe_count == LLAPI_LAYOUT_WIDE)
2361                                 param->lsp_stripe_count = -1;
2362                         else
2363                                 param->lsp_stripe_count = lsa.lsa_stripe_count;
2364                 }
2365                 if (lsa.lsa_stripe_off == LLAPI_LAYOUT_DEFAULT)
2366                         param->lsp_stripe_offset = -1;
2367                 else
2368                         param->lsp_stripe_offset = lsa.lsa_stripe_off;
2369                 param->lsp_pool = lsa.lsa_pool_name;
2370                 param->lsp_is_specific = false;
2371                 if (lsa.lsa_nr_osts > 0) {
2372                         if (lsa.lsa_stripe_count > 0 &&
2373                             lsa.lsa_stripe_count != LLAPI_LAYOUT_DEFAULT &&
2374                             lsa.lsa_stripe_count != LLAPI_LAYOUT_WIDE &&
2375                             lsa.lsa_nr_osts != lsa.lsa_stripe_count) {
2376                                 fprintf(stderr, "error: %s: stripe count %lld "
2377                                         "doesn't match the number of OSTs: %d\n"
2378                                         , argv[0], lsa.lsa_stripe_count,
2379                                         lsa.lsa_nr_osts);
2380                                 free(param);
2381                                 goto usage_error;
2382                         }
2383
2384                         param->lsp_is_specific = true;
2385                         param->lsp_stripe_count = lsa.lsa_nr_osts;
2386                         memcpy(param->lsp_osts, osts,
2387                                sizeof(*osts) * lsa.lsa_nr_osts);
2388                 }
2389         }
2390
2391         for (fname = argv[optind]; fname != NULL; fname = argv[++optind]) {
2392                 if (mdt_idx_arg != NULL) {
2393                         result = llapi_migrate_mdt(fname, &migrate_mdt_param);
2394                 } else if (migrate_mode) {
2395                         result = lfs_migrate(fname, migration_flags, param,
2396                                              layout);
2397                 } else if (comp_set != 0) {
2398                         result = lfs_component_set(fname, comp_id,
2399                                                    lsa.lsa_comp_flags);
2400                 } else if (comp_del != 0) {
2401                         result = lfs_component_del(fname, comp_id,
2402                                                    lsa.lsa_comp_flags);
2403                 } else if (comp_add != 0) {
2404                         result = lfs_component_add(fname, layout);
2405                 } else if (opc == SO_MIRROR_CREATE) {
2406                         result = mirror_create(fname, mirror_list);
2407                 } else if (opc == SO_MIRROR_EXTEND) {
2408                         result = mirror_extend(fname, mirror_list,
2409                                                mirror_flags);
2410                 } else if (layout != NULL) {
2411                         result = lfs_component_create(fname, O_CREAT | O_WRONLY,
2412                                                       0644, layout);
2413                         if (result >= 0) {
2414                                 close(result);
2415                                 result = 0;
2416                         }
2417                 } else {
2418                         result = llapi_file_open_param(fname,
2419                                                        O_CREAT | O_WRONLY,
2420                                                        0644, param);
2421                         if (result >= 0) {
2422                                 close(result);
2423                                 result = 0;
2424                         }
2425                 }
2426                 if (result) {
2427                         /* Save the first error encountered. */
2428                         if (result2 == 0)
2429                                 result2 = result;
2430                         continue;
2431                 }
2432         }
2433
2434         free(param);
2435         llapi_layout_free(layout);
2436         lfs_mirror_list_free(mirror_list);
2437         return result2;
2438 usage_error:
2439         result = CMD_HELP;
2440 error:
2441         llapi_layout_free(layout);
2442         lfs_mirror_list_free(mirror_list);
2443         return result;
2444 }
2445
2446 static int lfs_poollist(int argc, char **argv)
2447 {
2448         if (argc != 2)
2449                 return CMD_HELP;
2450
2451         return llapi_poollist(argv[1]);
2452 }
2453
2454 static int set_time(time_t *time, time_t *set, char *str)
2455 {
2456         time_t t;
2457         int res = 0;
2458
2459         if (str[0] == '+')
2460                 res = 1;
2461         else if (str[0] == '-')
2462                 res = -1;
2463
2464         if (res)
2465                 str++;
2466
2467         t = strtol(str, NULL, 0);
2468         if (*time < t * 24 * 60 * 60) {
2469                 if (res)
2470                         str--;
2471                 fprintf(stderr, "Wrong time '%s' is specified.\n", str);
2472                 return INT_MAX;
2473         }
2474
2475         *set = *time - t * 24 * 60 * 60;
2476         return res;
2477 }
2478 static int name2uid(unsigned int *id, const char *name)
2479 {
2480         struct passwd *passwd;
2481
2482         passwd = getpwnam(name);
2483         if (passwd == NULL)
2484                 return -ENOENT;
2485         *id = passwd->pw_uid;
2486
2487         return 0;
2488 }
2489
2490 static int name2gid(unsigned int *id, const char *name)
2491 {
2492         struct group *group;
2493
2494         group = getgrnam(name);
2495         if (group == NULL)
2496                 return -ENOENT;
2497         *id = group->gr_gid;
2498
2499         return 0;
2500 }
2501
2502 static inline int name2projid(unsigned int *id, const char *name)
2503 {
2504         return -ENOTSUP;
2505 }
2506
2507 static int uid2name(char **name, unsigned int id)
2508 {
2509         struct passwd *passwd;
2510
2511         passwd = getpwuid(id);
2512         if (passwd == NULL)
2513                 return -ENOENT;
2514         *name = passwd->pw_name;
2515
2516         return 0;
2517 }
2518
2519 static inline int gid2name(char **name, unsigned int id)
2520 {
2521         struct group *group;
2522
2523         group = getgrgid(id);
2524         if (group == NULL)
2525                 return -ENOENT;
2526         *name = group->gr_name;
2527
2528         return 0;
2529 }
2530
2531 static int name2layout(__u32 *layout, char *name)
2532 {
2533         char *ptr, *layout_name;
2534
2535         *layout = 0;
2536         for (ptr = name; ; ptr = NULL) {
2537                 layout_name = strtok(ptr, ",");
2538                 if (layout_name == NULL)
2539                         break;
2540                 if (strcmp(layout_name, "released") == 0)
2541                         *layout |= LOV_PATTERN_F_RELEASED;
2542                 else if (strcmp(layout_name, "raid0") == 0)
2543                         *layout |= LOV_PATTERN_RAID0;
2544                 else if (strcmp(layout_name, "mdt") == 0)
2545                         *layout |= LOV_PATTERN_MDT;
2546                 else
2547                         return -1;
2548         }
2549         return 0;
2550 }
2551
2552 static int lfs_find(int argc, char **argv)
2553 {
2554         int c, rc;
2555         int ret = 0;
2556         time_t t;
2557         struct find_param param = {
2558                 .fp_max_depth = -1,
2559                 .fp_quiet = 1,
2560         };
2561         struct option long_opts[] = {
2562         { .val = 'A',   .name = "atime",        .has_arg = required_argument },
2563         { .val = LFS_COMP_COUNT_OPT,
2564                         .name = "comp-count",   .has_arg = required_argument },
2565         { .val = LFS_COMP_COUNT_OPT,
2566                         .name = "component-count",
2567                                                 .has_arg = required_argument },
2568         { .val = LFS_COMP_FLAGS_OPT,
2569                         .name = "comp-flags",   .has_arg = required_argument },
2570         { .val = LFS_COMP_FLAGS_OPT,
2571                         .name = "component-flags",
2572                                                 .has_arg = required_argument },
2573         { .val = LFS_COMP_START_OPT,
2574                         .name = "comp-start",   .has_arg = required_argument },
2575         { .val = LFS_COMP_START_OPT,
2576                         .name = "component-start",
2577                                                 .has_arg = required_argument },
2578         { .val = 'c',   .name = "stripe-count", .has_arg = required_argument },
2579         { .val = 'c',   .name = "stripe_count", .has_arg = required_argument },
2580         { .val = 'C',   .name = "ctime",        .has_arg = required_argument },
2581         { .val = 'D',   .name = "maxdepth",     .has_arg = required_argument },
2582         { .val = 'E',   .name = "comp-end",     .has_arg = required_argument },
2583         { .val = 'E',   .name = "component-end",
2584                                                 .has_arg = required_argument },
2585         { .val = 'g',   .name = "gid",          .has_arg = required_argument },
2586         { .val = 'G',   .name = "group",        .has_arg = required_argument },
2587         { .val = 'H',   .name = "mdt-hash",     .has_arg = required_argument },
2588         { .val = 'i',   .name = "stripe-index", .has_arg = required_argument },
2589         { .val = 'i',   .name = "stripe_index", .has_arg = required_argument },
2590         /*{"component-id", required_argument, 0, 'I'},*/
2591         { .val = 'L',   .name = "layout",       .has_arg = required_argument },
2592         { .val = 'm',   .name = "mdt",          .has_arg = required_argument },
2593         { .val = 'm',   .name = "mdt-index",    .has_arg = required_argument },
2594         { .val = 'm',   .name = "mdt_index",    .has_arg = required_argument },
2595         { .val = 'M',   .name = "mtime",        .has_arg = required_argument },
2596         { .val = 'n',   .name = "name",         .has_arg = required_argument },
2597      /* reserve {"or",           no_argument,     , 0, 'o'}, to match find(1) */
2598         { .val = 'O',   .name = "obd",          .has_arg = required_argument },
2599         { .val = 'O',   .name = "ost",          .has_arg = required_argument },
2600         /* no short option for pool, p/P already used */
2601         { .val = LFS_POOL_OPT,
2602                         .name = "pool",         .has_arg = required_argument },
2603         { .val = 'p',   .name = "print0",       .has_arg = no_argument },
2604         { .val = 'P',   .name = "print",        .has_arg = no_argument },
2605         { .val = LFS_PROJID_OPT,
2606                         .name = "projid",       .has_arg = required_argument },
2607         { .val = 's',   .name = "size",         .has_arg = required_argument },
2608         { .val = 'S',   .name = "stripe-size",  .has_arg = required_argument },
2609         { .val = 'S',   .name = "stripe_size",  .has_arg = required_argument },
2610         { .val = 't',   .name = "type",         .has_arg = required_argument },
2611         { .val = 'T',   .name = "mdt-count",    .has_arg = required_argument },
2612         { .val = 'u',   .name = "uid",          .has_arg = required_argument },
2613         { .val = 'U',   .name = "user",         .has_arg = required_argument },
2614         { .name = NULL } };
2615         int pathstart = -1;
2616         int pathend = -1;
2617         int neg_opt = 0;
2618         time_t *xtime;
2619         int *xsign;
2620         int isoption;
2621         char *endptr;
2622
2623         time(&t);
2624
2625         /* when getopt_long_only() hits '!' it returns 1, puts "!" in optarg */
2626         while ((c = getopt_long_only(argc, argv,
2627                         "-A:c:C:D:E:g:G:H:i:L:m:M:n:O:Ppqrs:S:t:T:u:U:v",
2628                         long_opts, NULL)) >= 0) {
2629                 xtime = NULL;
2630                 xsign = NULL;
2631                 if (neg_opt)
2632                         --neg_opt;
2633                 /* '!' is part of option */
2634                 /* when getopt_long_only() finds a string which is not
2635                  * an option nor a known option argument it returns 1
2636                  * in that case if we already have found pathstart and pathend
2637                  * (i.e. we have the list of pathnames),
2638                  * the only supported value is "!"
2639                  */
2640                 isoption = (c != 1) || (strcmp(optarg, "!") == 0);
2641                 if (!isoption && pathend != -1) {
2642                         fprintf(stderr, "err: %s: filename|dirname must either "
2643                                         "precede options or follow options\n",
2644                                         argv[0]);
2645                         ret = CMD_HELP;
2646                         goto err;
2647                 }
2648                 if (!isoption && pathstart == -1)
2649                         pathstart = optind - 1;
2650                 if (isoption && pathstart != -1 && pathend == -1)
2651                         pathend = optind - 2;
2652                 switch (c) {
2653                 case 0:
2654                         /* Long options. */
2655                         break;
2656                 case 1:
2657                         /* unknown; opt is "!" or path component,
2658                          * checking done above.
2659                          */
2660                         if (strcmp(optarg, "!") == 0)
2661                                 neg_opt = 2;
2662                         break;
2663                 case 'A':
2664                         xtime = &param.fp_atime;
2665                         xsign = &param.fp_asign;
2666                         param.fp_exclude_atime = !!neg_opt;
2667                         /* no break, this falls through to 'C' for ctime */
2668                 case 'C':
2669                         if (c == 'C') {
2670                                 xtime = &param.fp_ctime;
2671                                 xsign = &param.fp_csign;
2672                                 param.fp_exclude_ctime = !!neg_opt;
2673                         }
2674                         /* no break, this falls through to 'M' for mtime */
2675                 case 'M':
2676                         if (c == 'M') {
2677                                 xtime = &param.fp_mtime;
2678                                 xsign = &param.fp_msign;
2679                                 param.fp_exclude_mtime = !!neg_opt;
2680                         }
2681                         rc = set_time(&t, xtime, optarg);
2682                         if (rc == INT_MAX) {
2683                                 ret = -1;
2684                                 goto err;
2685                         }
2686                         if (rc)
2687                                 *xsign = rc;
2688                         break;
2689                 case LFS_COMP_COUNT_OPT:
2690                         if (optarg[0] == '+') {
2691                                 param.fp_comp_count_sign = -1;
2692                                 optarg++;
2693                         } else if (optarg[0] == '-') {
2694                                 param.fp_comp_count_sign =  1;
2695                                 optarg++;
2696                         }
2697
2698                         param.fp_comp_count = strtoul(optarg, &endptr, 0);
2699                         if (*endptr != '\0') {
2700                                 fprintf(stderr, "error: bad component count "
2701                                         "'%s'\n", optarg);
2702                                 goto err;
2703                         }
2704                         param.fp_check_comp_count = 1;
2705                         param.fp_exclude_comp_count = !!neg_opt;
2706                         break;
2707                 case LFS_COMP_FLAGS_OPT:
2708                         rc = comp_str2flags(&param.fp_comp_flags, optarg);
2709                         if (rc || comp_flags_is_neg(param.fp_comp_flags)) {
2710                                 fprintf(stderr, "error: bad component flags "
2711                                         "'%s'\n", optarg);
2712                                 goto err;
2713                         }
2714                         param.fp_check_comp_flags = 1;
2715                         param.fp_exclude_comp_flags = !!neg_opt;
2716                         break;
2717                 case LFS_COMP_START_OPT:
2718                         if (optarg[0] == '+') {
2719                                 param.fp_comp_start_sign = -1;
2720                                 optarg++;
2721                         } else if (optarg[0] == '-') {
2722                                 param.fp_comp_start_sign =  1;
2723                                 optarg++;
2724                         }
2725
2726                         rc = llapi_parse_size(optarg, &param.fp_comp_start,
2727                                               &param.fp_comp_start_units, 0);
2728                         if (rc) {
2729                                 fprintf(stderr, "error: bad component start "
2730                                         "'%s'\n", optarg);
2731                                 goto err;
2732                         }
2733                         param.fp_check_comp_start = 1;
2734                         param.fp_exclude_comp_start = !!neg_opt;
2735                         break;
2736                 case 'c':
2737                         if (optarg[0] == '+') {
2738                                 param.fp_stripe_count_sign = -1;
2739                                 optarg++;
2740                         } else if (optarg[0] == '-') {
2741                                 param.fp_stripe_count_sign =  1;
2742                                 optarg++;
2743                         }
2744
2745                         param.fp_stripe_count = strtoul(optarg, &endptr, 0);
2746                         if (*endptr != '\0') {
2747                                 fprintf(stderr,"error: bad stripe_count '%s'\n",
2748                                         optarg);
2749                                 ret = -1;
2750                                 goto err;
2751                         }
2752                         param.fp_check_stripe_count = 1;
2753                         param.fp_exclude_stripe_count = !!neg_opt;
2754                         break;
2755                 case 'D':
2756                         param.fp_max_depth = strtol(optarg, 0, 0);
2757                         break;
2758                 case 'E':
2759                         if (optarg[0] == '+') {
2760                                 param.fp_comp_end_sign = -1;
2761                                 optarg++;
2762                         } else if (optarg[0] == '-') {
2763                                 param.fp_comp_end_sign =  1;
2764                                 optarg++;
2765                         }
2766
2767                         if (arg_is_eof(optarg)) {
2768                                 param.fp_comp_end = LUSTRE_EOF;
2769                                 param.fp_comp_end_units = 1;
2770                                 rc = 0;
2771                         } else {
2772                                 rc = llapi_parse_size(optarg,
2773                                                 &param.fp_comp_end,
2774                                                 &param.fp_comp_end_units, 0);
2775                         }
2776                         if (rc) {
2777                                 fprintf(stderr, "error: bad component end "
2778                                         "'%s'\n", optarg);
2779                                 goto err;
2780                         }
2781                         param.fp_check_comp_end = 1;
2782                         param.fp_exclude_comp_end = !!neg_opt;
2783                         break;
2784                 case 'g':
2785                 case 'G':
2786                         rc = name2gid(&param.fp_gid, optarg);
2787                         if (rc) {
2788                                 param.fp_gid = strtoul(optarg, &endptr, 10);
2789                                 if (*endptr != '\0') {
2790                                         fprintf(stderr, "Group/GID: %s cannot "
2791                                                 "be found.\n", optarg);
2792                                         ret = -1;
2793                                         goto err;
2794                                 }
2795                         }
2796                         param.fp_exclude_gid = !!neg_opt;
2797                         param.fp_check_gid = 1;
2798                         break;
2799                 case 'H':
2800                         param.fp_hash_type = check_hashtype(optarg);
2801                         if (param.fp_hash_type == 0) {
2802                                 fprintf(stderr, "error: bad hash_type '%s'\n",
2803                                         optarg);
2804                                 ret = -1;
2805                                 goto err;
2806                         }
2807                         param.fp_check_hash_type = 1;
2808                         param.fp_exclude_hash_type = !!neg_opt;
2809                         break;
2810                 case 'L':
2811                         ret = name2layout(&param.fp_layout, optarg);
2812                         if (ret)
2813                                 goto err;
2814                         param.fp_exclude_layout = !!neg_opt;
2815                         param.fp_check_layout = 1;
2816                         break;
2817                 case 'u':
2818                 case 'U':
2819                         rc = name2uid(&param.fp_uid, optarg);
2820                         if (rc) {
2821                                 param.fp_uid = strtoul(optarg, &endptr, 10);
2822                                 if (*endptr != '\0') {
2823                                         fprintf(stderr, "User/UID: %s cannot "
2824                                                 "be found.\n", optarg);
2825                                         ret = -1;
2826                                         goto err;
2827                                 }
2828                         }
2829                         param.fp_exclude_uid = !!neg_opt;
2830                         param.fp_check_uid = 1;
2831                         break;
2832                 case LFS_POOL_OPT:
2833                         if (strlen(optarg) > LOV_MAXPOOLNAME) {
2834                                 fprintf(stderr,
2835                                         "Pool name %s is too long"
2836                                         " (max is %d)\n", optarg,
2837                                         LOV_MAXPOOLNAME);
2838                                 ret = -1;
2839                                 goto err;
2840                         }
2841                         /* we do check for empty pool because empty pool
2842                          * is used to find V1 lov attributes */
2843                         strncpy(param.fp_poolname, optarg, LOV_MAXPOOLNAME);
2844                         param.fp_poolname[LOV_MAXPOOLNAME] = '\0';
2845                         param.fp_exclude_pool = !!neg_opt;
2846                         param.fp_check_pool = 1;
2847                         break;
2848                 case 'n':
2849                         param.fp_pattern = (char *)optarg;
2850                         param.fp_exclude_pattern = !!neg_opt;
2851                         break;
2852                 case 'm':
2853                 case 'i':
2854                 case 'O': {
2855                         char *buf, *token, *next, *p;
2856                         int len = 1;
2857                         void *tmp;
2858
2859                         buf = strdup(optarg);
2860                         if (buf == NULL) {
2861                                 ret = -ENOMEM;
2862                                 goto err;
2863                         }
2864
2865                         param.fp_exclude_obd = !!neg_opt;
2866
2867                         token = buf;
2868                         while (token && *token) {
2869                                 token = strchr(token, ',');
2870                                 if (token) {
2871                                         len++;
2872                                         token++;
2873                                 }
2874                         }
2875                         if (c == 'm') {
2876                                 param.fp_exclude_mdt = !!neg_opt;
2877                                 param.fp_num_alloc_mdts += len;
2878                                 tmp = realloc(param.fp_mdt_uuid,
2879                                               param.fp_num_alloc_mdts *
2880                                               sizeof(*param.fp_mdt_uuid));
2881                                 if (tmp == NULL) {
2882                                         ret = -ENOMEM;
2883                                         goto err_free;
2884                                 }
2885
2886                                 param.fp_mdt_uuid = tmp;
2887                         } else {
2888                                 param.fp_exclude_obd = !!neg_opt;
2889                                 param.fp_num_alloc_obds += len;
2890                                 tmp = realloc(param.fp_obd_uuid,
2891                                               param.fp_num_alloc_obds *
2892                                               sizeof(*param.fp_obd_uuid));
2893                                 if (tmp == NULL) {
2894                                         ret = -ENOMEM;
2895                                         goto err_free;
2896                                 }
2897
2898                                 param.fp_obd_uuid = tmp;
2899                         }
2900                         for (token = buf; token && *token; token = next) {
2901                                 struct obd_uuid *puuid;
2902                                 if (c == 'm') {
2903                                         puuid =
2904                                         &param.fp_mdt_uuid[param.fp_num_mdts++];
2905                                 } else {
2906                                         puuid =
2907                                         &param.fp_obd_uuid[param.fp_num_obds++];
2908                                 }
2909                                 p = strchr(token, ',');
2910                                 next = 0;
2911                                 if (p) {
2912                                         *p = 0;
2913                                         next = p+1;
2914                                 }
2915
2916                                 if (strlen(token) > sizeof(puuid->uuid) - 1) {
2917                                         ret = -E2BIG;
2918                                         goto err_free;
2919                                 }
2920
2921                                 strncpy(puuid->uuid, token,
2922                                         sizeof(puuid->uuid));
2923                         }
2924 err_free:
2925                         if (buf)
2926                                 free(buf);
2927                         break;
2928                 }
2929                 case 'p':
2930                         param.fp_zero_end = 1;
2931                         break;
2932                 case 'P':
2933                         break;
2934                 case LFS_PROJID_OPT:
2935                         rc = name2projid(&param.fp_projid, optarg);
2936                         if (rc) {
2937                                 param.fp_projid = strtoul(optarg, &endptr, 10);
2938                                 if (*endptr != '\0') {
2939                                         fprintf(stderr,
2940                                                 "Invalid project ID: %s",
2941                                                 optarg);
2942                                         ret = -1;
2943                                         goto err;
2944                                 }
2945                         }
2946                         param.fp_exclude_projid = !!neg_opt;
2947                         param.fp_check_projid = 1;
2948                         break;
2949                 case 's':
2950                         if (optarg[0] == '+') {
2951                                 param.fp_size_sign = -1;
2952                                 optarg++;
2953                         } else if (optarg[0] == '-') {
2954                                 param.fp_size_sign =  1;
2955                                 optarg++;
2956                         }
2957
2958                         ret = llapi_parse_size(optarg, &param.fp_size,
2959                                                &param.fp_size_units, 0);
2960                         if (ret) {
2961                                 fprintf(stderr, "error: bad file size '%s'\n",
2962                                         optarg);
2963                                 goto err;
2964                         }
2965                         param.fp_check_size = 1;
2966                         param.fp_exclude_size = !!neg_opt;
2967                         break;
2968                 case 'S':
2969                         if (optarg[0] == '+') {
2970                                 param.fp_stripe_size_sign = -1;
2971                                 optarg++;
2972                         } else if (optarg[0] == '-') {
2973                                 param.fp_stripe_size_sign =  1;
2974                                 optarg++;
2975                         }
2976
2977                         ret = llapi_parse_size(optarg, &param.fp_stripe_size,
2978                                                &param.fp_stripe_size_units, 0);
2979                         if (ret) {
2980                                 fprintf(stderr, "error: bad stripe_size '%s'\n",
2981                                         optarg);
2982                                 goto err;
2983                         }
2984                         param.fp_check_stripe_size = 1;
2985                         param.fp_exclude_stripe_size = !!neg_opt;
2986                         break;
2987                 case 't':
2988                         param.fp_exclude_type = !!neg_opt;
2989                         switch (optarg[0]) {
2990                         case 'b':
2991                                 param.fp_type = S_IFBLK;
2992                                 break;
2993                         case 'c':
2994                                 param.fp_type = S_IFCHR;
2995                                 break;
2996                         case 'd':
2997                                 param.fp_type = S_IFDIR;
2998                                 break;
2999                         case 'f':
3000                                 param.fp_type = S_IFREG;
3001                                 break;
3002                         case 'l':
3003                                 param.fp_type = S_IFLNK;
3004                                 break;
3005                         case 'p':
3006                                 param.fp_type = S_IFIFO;
3007                                 break;
3008                         case 's':
3009                                 param.fp_type = S_IFSOCK;
3010                &n