Whamcloud - gitweb
LU-8631 quota: better error message for 'lfs quota'
[fs/lustre-release.git] / lustre / utils / lfs.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2015, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  *
32  * lustre/utils/lfs.c
33  *
34  * Author: Peter J. Braam <braam@clusterfs.com>
35  * Author: Phil Schwan <phil@clusterfs.com>
36  * Author: Robert Read <rread@clusterfs.com>
37  */
38
39 /* for O_DIRECTORY */
40 #ifndef _GNU_SOURCE
41 #define _GNU_SOURCE
42 #endif
43
44 #include <stdlib.h>
45 #include <stdio.h>
46 #include <getopt.h>
47 #include <string.h>
48 #include <mntent.h>
49 #include <unistd.h>
50 #include <errno.h>
51 #include <err.h>
52 #include <pwd.h>
53 #include <grp.h>
54 #include <sys/ioctl.h>
55 #include <sys/quota.h>
56 #include <sys/time.h>
57 #include <sys/types.h>
58 #include <sys/stat.h>
59 #include <fcntl.h>
60 #include <dirent.h>
61 #include <time.h>
62 #include <ctype.h>
63 #ifdef HAVE_SYS_QUOTA_H
64 # include <sys/quota.h>
65 #endif
66
67 #include <libcfs/util/string.h>
68 #include <libcfs/util/ioctl.h>
69 #include <libcfs/util/parser.h>
70 #include <lustre/lustreapi.h>
71 #include <lustre_ver.h>
72 #include <lustre_param.h>
73
74 #ifndef ARRAY_SIZE
75 # define ARRAY_SIZE(a) ((sizeof(a)) / (sizeof((a)[0])))
76 #endif /* !ARRAY_SIZE */
77
78 /* all functions */
79 static int lfs_setstripe(int argc, char **argv);
80 static int lfs_find(int argc, char **argv);
81 static int lfs_getstripe(int argc, char **argv);
82 static int lfs_getdirstripe(int argc, char **argv);
83 static int lfs_setdirstripe(int argc, char **argv);
84 static int lfs_rmentry(int argc, char **argv);
85 static int lfs_osts(int argc, char **argv);
86 static int lfs_mdts(int argc, char **argv);
87 static int lfs_df(int argc, char **argv);
88 static int lfs_getname(int argc, char **argv);
89 static int lfs_check(int argc, char **argv);
90 #ifdef HAVE_SYS_QUOTA_H
91 static int lfs_setquota(int argc, char **argv);
92 static int lfs_quota(int argc, char **argv);
93 #endif
94 static int lfs_flushctx(int argc, char **argv);
95 static int lfs_join(int argc, char **argv);
96 static int lfs_lsetfacl(int argc, char **argv);
97 static int lfs_lgetfacl(int argc, char **argv);
98 static int lfs_rsetfacl(int argc, char **argv);
99 static int lfs_rgetfacl(int argc, char **argv);
100 static int lfs_cp(int argc, char **argv);
101 static int lfs_ls(int argc, char **argv);
102 static int lfs_poollist(int argc, char **argv);
103 static int lfs_changelog(int argc, char **argv);
104 static int lfs_changelog_clear(int argc, char **argv);
105 static int lfs_fid2path(int argc, char **argv);
106 static int lfs_path2fid(int argc, char **argv);
107 static int lfs_data_version(int argc, char **argv);
108 static int lfs_hsm_state(int argc, char **argv);
109 static int lfs_hsm_set(int argc, char **argv);
110 static int lfs_hsm_clear(int argc, char **argv);
111 static int lfs_hsm_action(int argc, char **argv);
112 static int lfs_hsm_archive(int argc, char **argv);
113 static int lfs_hsm_restore(int argc, char **argv);
114 static int lfs_hsm_release(int argc, char **argv);
115 static int lfs_hsm_remove(int argc, char **argv);
116 static int lfs_hsm_cancel(int argc, char **argv);
117 static int lfs_swap_layouts(int argc, char **argv);
118 static int lfs_mv(int argc, char **argv);
119 static int lfs_ladvise(int argc, char **argv);
120
121 /* Setstripe and migrate share mostly the same parameters */
122 #define SSM_CMD_COMMON(cmd) \
123         "usage: "cmd" [--stripe-count|-c <stripe_count>]\n"             \
124         "                 [--stripe-index|-i <start_ost_idx>]\n"        \
125         "                 [--stripe-size|-S <stripe_size>]\n"           \
126         "                 [--pool|-p <pool_name>]\n"                    \
127         "                 [--ost|-o <ost_indices>]\n"
128
129 #define SSM_HELP_COMMON \
130         "\tstripe_size:  Number of bytes on each OST (0 filesystem default)\n" \
131         "\t              Can be specified with k, m or g (in KB, MB and GB\n" \
132         "\t              respectively)\n"                               \
133         "\tstart_ost_idx: OST index of first stripe (-1 default)\n"     \
134         "\tstripe_count: Number of OSTs to stripe over (0 default, -1 all)\n" \
135         "\tpool_name:    Name of OST pool to use (default none)\n"      \
136         "\tost_indices:  List of OST indices, can be repeated multiple times\n"\
137         "\t              Indices be specified in a format of:\n"        \
138         "\t                -o <ost_1>,<ost_i>-<ost_j>,<ost_n>\n"        \
139         "\t              Or:\n"                                         \
140         "\t                -o <ost_1> -o <ost_i>-<ost_j> -o <ost_n>\n"  \
141         "\t              If --pool is set with --ost, then the OSTs\n" \
142         "\t              must be the members of the pool."
143
144 #define SETSTRIPE_USAGE                                         \
145         SSM_CMD_COMMON("setstripe")                             \
146         "                 <directory|filename>\n"               \
147         SSM_HELP_COMMON
148
149 #define MIGRATE_USAGE                                                   \
150         SSM_CMD_COMMON("migrate  ")                                     \
151         "                 [--block|-b]\n"                               \
152         "                 [--non-block|-n]\n"                           \
153         "                 <filename>\n"                                 \
154         SSM_HELP_COMMON                                                 \
155         "\n"                                                            \
156         "\tblock:        Block file access during data migration (default)\n" \
157         "\tnon-block:    Abort migrations if concurrent access is detected\n" \
158
159 #define SETDIRSTRIPE_USAGE                                      \
160         "               [--mdt-count|-c stripe_count>\n"        \
161         "               [--mdt-index|-i mdt_index]\n"           \
162         "               [--mdt-hash|-t mdt_hash]\n"             \
163         "               [--default_stripe|-D] [--mode|-m mode] <dir>\n" \
164         "\tstripe_count: stripe count of the striped directory\n"       \
165         "\tmdt_index: MDT index of first stripe\n"                      \
166         "\tmdt_hash:  hash type of the striped directory. mdt types:\n" \
167         "       fnv_1a_64 FNV-1a hash algorithm (default)\n"            \
168         "       all_char  sum of characters % MDT_COUNT (not recommended)\n" \
169         "\tdefault_stripe: set default dirstripe of the directory\n"    \
170         "\tmode: the mode of the directory\n"
171
172 static const char       *progname;
173 static bool              file_lease_supported = true;
174
175 /* all available commands */
176 command_t cmdlist[] = {
177         {"setstripe", lfs_setstripe, 0,
178          "Create a new file with a specific striping pattern or\n"
179          "set the default striping pattern on an existing directory or\n"
180          "delete the default striping pattern from an existing directory\n"
181          "usage: setstripe -d <directory>   (to delete default striping)\n"\
182          " or\n"
183          SETSTRIPE_USAGE},
184         {"getstripe", lfs_getstripe, 0,
185          "To list the striping info for a given file or files in a\n"
186          "directory or recursively for all files in a directory tree.\n"
187          "usage: getstripe [--ost|-O <uuid>] [--quiet|-q] [--verbose|-v]\n"
188          "                 [--stripe-count|-c] [--stripe-index|-i]\n"
189          "                 [--pool|-p] [--stripe-size|-S] [--directory|-d]\n"
190          "                 [--mdt|-m] [--recursive|-r] [--raw|-R]\n"
191          "                 [--layout|-L] [--fid|-F] [--generation|-g]\n"
192          "                 <directory|filename> ..."},
193         {"setdirstripe", lfs_setdirstripe, 0,
194          "To create a striped directory on a specified MDT. This can only\n"
195          "be done on MDT0 with the right of administrator.\n"
196          "usage: setdirstripe [OPTION] <directory>\n"
197          SETDIRSTRIPE_USAGE},
198         {"getdirstripe", lfs_getdirstripe, 0,
199          "To list the striping info for a given directory\n"
200          "or recursively for all directories in a directory tree.\n"
201          "usage: getdirstripe [--obd|-O <uuid>] [--quiet|-q] [--verbose|-v]\n"
202          "               [--count|-c ] [--index|-i ] [--raw|-R]\n"
203          "               [--recursive | -r] [ --default_stripe | -D ] <dir> "},
204         {"mkdir", lfs_setdirstripe, 0,
205          "To create a striped directory on a specified MDT. This can only\n"
206          "be done on MDT0 with the right of administrator.\n"
207          "usage: mkdir [OPTION] <directory>\n"
208          SETDIRSTRIPE_USAGE},
209         {"rm_entry", lfs_rmentry, 0,
210          "To remove the name entry of the remote directory. Note: This\n"
211          "command will only delete the name entry, i.e. the remote directory\n"
212          "will become inaccessable after this command. This can only be done\n"
213          "by the administrator\n"
214          "usage: rm_entry <dir>\n"},
215         {"pool_list", lfs_poollist, 0,
216          "List pools or pool OSTs\n"
217          "usage: pool_list <fsname>[.<pool>] | <pathname>\n"},
218         {"find", lfs_find, 0,
219          "find files matching given attributes recursively in directory tree.\n"
220          "usage: find <directory|filename> ...\n"
221          "     [[!] --atime|-A [+-]N] [[!] --ctime|-C [+-]N]\n"
222          "     [[!] --mtime|-M [+-]N] [[!] --mdt|-m <uuid|index,...>]\n"
223          "     [--maxdepth|-D N] [[!] --name|-n <pattern>]\n"
224          "     [[!] --ost|-O <uuid|index,...>] [--print|-p] [--print0|-P]\n"
225          "     [[!] --size|-s [+-]N[bkMGTPE]]\n"
226          "     [[!] --stripe-count|-c [+-]<stripes>]\n"
227          "     [[!] --stripe-index|-i <index,...>]\n"
228          "     [[!] --stripe-size|-S [+-]N[kMGT]] [[!] --type|-t <filetype>]\n"
229          "     [[!] --gid|-g|--group|-G <gid>|<gname>]\n"
230          "     [[!] --uid|-u|--user|-U <uid>|<uname>] [[!] --pool <pool>]\n"
231          "     [[!] --layout|-L released,raid0]\n"
232          "\t !: used before an option indicates 'NOT' requested attribute\n"
233          "\t -: used before a value indicates 'AT MOST' requested value\n"
234          "\t +: used before a value indicates 'AT LEAST' requested value\n"},
235         {"check", lfs_check, 0,
236          "Display the status of MDS or OSTs (as specified in the command)\n"
237          "or all the servers (MDS and OSTs).\n"
238          "usage: check <osts|mds|servers>"},
239         {"join", lfs_join, 0,
240          "join two lustre files into one.\n"
241          "obsolete, HEAD does not support it anymore.\n"},
242         {"osts", lfs_osts, 0, "list OSTs connected to client "
243          "[for specified path only]\n" "usage: osts [path]"},
244         {"mdts", lfs_mdts, 0, "list MDTs connected to client "
245          "[for specified path only]\n" "usage: mdts [path]"},
246         {"df", lfs_df, 0,
247          "report filesystem disk space usage or inodes usage"
248          "of each MDS and all OSDs or a batch belonging to a specific pool .\n"
249          "Usage: df [-i] [-h] [--lazy|-l] [--pool|-p <fsname>[.<pool>] [path]"},
250         {"getname", lfs_getname, 0, "list instances and specified mount points "
251          "[for specified path only]\n"
252          "Usage: getname [-h]|[path ...] "},
253 #ifdef HAVE_SYS_QUOTA_H
254         {"setquota", lfs_setquota, 0, "Set filesystem quotas.\n"
255          "usage: setquota <-u|-g> <uname>|<uid>|<gname>|<gid>\n"
256          "                -b <block-softlimit> -B <block-hardlimit>\n"
257          "                -i <inode-softlimit> -I <inode-hardlimit> <filesystem>\n"
258          "       setquota <-u|--user|-g|--group> <uname>|<uid>|<gname>|<gid>\n"
259          "                [--block-softlimit <block-softlimit>]\n"
260          "                [--block-hardlimit <block-hardlimit>]\n"
261          "                [--inode-softlimit <inode-softlimit>]\n"
262          "                [--inode-hardlimit <inode-hardlimit>] <filesystem>\n"
263          "       setquota [-t] <-u|--user|-g|--group>\n"
264          "                [--block-grace <block-grace>]\n"
265          "                [--inode-grace <inode-grace>] <filesystem>\n"
266          "       -b can be used instead of --block-softlimit/--block-grace\n"
267          "       -B can be used instead of --block-hardlimit\n"
268          "       -i can be used instead of --inode-softlimit/--inode-grace\n"
269          "       -I can be used instead of --inode-hardlimit\n\n"
270          "Note: The total quota space will be split into many qunits and\n"
271          "      balanced over all server targets, the minimal qunit size is\n"
272          "      1M bytes for block space and 1K inodes for inode space.\n\n"
273          "      Quota space rebalancing process will stop when this mininum\n"
274          "      value is reached. As a result, quota exceeded can be returned\n"
275          "      while many targets still have 1MB or 1K inodes of spare\n"
276          "      quota space."},
277         {"quota", lfs_quota, 0, "Display disk usage and limits.\n"
278          "usage: quota [-q] [-v] [-h] [-o <obd_uuid>|-i <mdt_idx>|-I "
279                        "<ost_idx>]\n"
280          "             [<-u|-g> <uname>|<uid>|<gname>|<gid>] <filesystem>\n"
281          "       quota [-o <obd_uuid>|-i <mdt_idx>|-I <ost_idx>] -t <-u|-g> <filesystem>"},
282 #endif
283         {"flushctx", lfs_flushctx, 0, "Flush security context for current user.\n"
284          "usage: flushctx [-k] [mountpoint...]"},
285         {"lsetfacl", lfs_lsetfacl, 0,
286          "Remote user setfacl for user/group on the same remote client.\n"
287          "usage: lsetfacl [-bkndRLPvh] [{-m|-x} acl_spec] [{-M|-X} acl_file] file ..."},
288         {"lgetfacl", lfs_lgetfacl, 0,
289          "Remote user getfacl for user/group on the same remote client.\n"
290          "usage: lgetfacl [-dRLPvh] file ..."},
291         {"rsetfacl", lfs_rsetfacl, 0,
292          "Remote user setfacl for user/group on other clients.\n"
293          "usage: rsetfacl [-bkndRLPvh] [{-m|-x} acl_spec] [{-M|-X} acl_file] file ..."},
294         {"rgetfacl", lfs_rgetfacl, 0,
295          "Remote user getfacl for user/group on other clients.\n"
296          "usage: rgetfacl [-dRLPvh] file ..."},
297         {"cp", lfs_cp, 0,
298          "Remote user copy files and directories.\n"
299          "usage: cp [OPTION]... [-T] SOURCE DEST\n\tcp [OPTION]... SOURCE... DIRECTORY\n\tcp [OPTION]... -t DIRECTORY SOURCE..."},
300         {"ls", lfs_ls, 0,
301          "Remote user list directory contents.\n"
302          "usage: ls [OPTION]... [FILE]..."},
303         {"changelog", lfs_changelog, 0,
304          "Show the metadata changes on an MDT."
305          "\nusage: changelog <mdtname> [startrec [endrec]]"},
306         {"changelog_clear", lfs_changelog_clear, 0,
307          "Indicate that old changelog records up to <endrec> are no longer of "
308          "interest to consumer <id>, allowing the system to free up space.\n"
309          "An <endrec> of 0 means all records.\n"
310          "usage: changelog_clear <mdtname> <id> <endrec>"},
311         {"fid2path", lfs_fid2path, 0,
312          "Resolve the full path(s) for given FID(s). For a specific hardlink "
313          "specify link number <linkno>.\n"
314         /* "For a historical link name, specify changelog record <recno>.\n" */
315          "usage: fid2path [--link <linkno>] <fsname|rootpath> <fid> ..."
316                 /* [ --rec <recno> ] */ },
317         {"path2fid", lfs_path2fid, 0, "Display the fid(s) for a given path(s).\n"
318          "usage: path2fid [--parents] <path> ..."},
319         {"data_version", lfs_data_version, 0, "Display file data version for "
320          "a given path.\n" "usage: data_version -[n|r|w] <path>"},
321         {"hsm_state", lfs_hsm_state, 0, "Display the HSM information (states, "
322          "undergoing actions) for given files.\n usage: hsm_state <file> ..."},
323         {"hsm_set", lfs_hsm_set, 0, "Set HSM user flag on specified files.\n"
324          "usage: hsm_set [--norelease] [--noarchive] [--dirty] [--exists] "
325          "[--archived] [--lost] <file> ..."},
326         {"hsm_clear", lfs_hsm_clear, 0, "Clear HSM user flag on specified "
327          "files.\n"
328          "usage: hsm_clear [--norelease] [--noarchive] [--dirty] [--exists] "
329          "[--archived] [--lost] <file> ..."},
330         {"hsm_action", lfs_hsm_action, 0, "Display current HSM request for "
331          "given files.\n" "usage: hsm_action <file> ..."},
332         {"hsm_archive", lfs_hsm_archive, 0,
333          "Archive file to external storage.\n"
334          "usage: hsm_archive [--filelist FILELIST] [--data DATA] [--archive NUM] "
335          "<file> ..."},
336         {"hsm_restore", lfs_hsm_restore, 0,
337          "Restore file from external storage.\n"
338          "usage: hsm_restore [--filelist FILELIST] [--data DATA] <file> ..."},
339         {"hsm_release", lfs_hsm_release, 0,
340          "Release files from Lustre.\n"
341          "usage: hsm_release [--filelist FILELIST] [--data DATA] <file> ..."},
342         {"hsm_remove", lfs_hsm_remove, 0,
343          "Remove file copy from external storage.\n"
344          "usage: hsm_remove [--filelist FILELIST] [--data DATA]\n"
345          "                  [--mntpath MOUNTPATH] [--archive NUM] <file|FID> ...\n"
346          "\n"
347          "Note: To remove files from the archive that have been deleted on\n"
348          "Lustre, set mntpath and optionally archive. In that case, all the\n"
349          "positional arguments and entries in the file list must be FIDs."
350         },
351         {"hsm_cancel", lfs_hsm_cancel, 0,
352          "Cancel requests related to specified files.\n"
353          "usage: hsm_cancel [--filelist FILELIST] [--data DATA] <file> ..."},
354         {"swap_layouts", lfs_swap_layouts, 0, "Swap layouts between 2 files.\n"
355          "usage: swap_layouts <path1> <path2>"},
356         {"migrate", lfs_setstripe, 0,
357          "migrate a directory between MDTs.\n"
358          "usage: migrate --mdt-index <mdt_idx> [--verbose|-v] "
359          "<directory>\n"
360          "\tmdt_idx:      index of the destination MDT\n"
361          "\n"
362          "migrate file objects from one OST "
363          "layout\nto another (may be not safe with concurent writes).\n"
364          "usage: migrate  "
365          "[--stripe-count|-c] <stripe_count>\n"
366          "              [--stripe-index|-i] <start_ost_index>\n"
367          "              [--stripe-size|-S] <stripe_size>\n"
368          "              [--pool|-p] <pool_name>\n"
369          "              [--ost-list|-o] <ost_indices>\n"
370          "              [--block|-b]\n"
371          "              [--non-block|-n]\n"
372          "              <file|directory>\n"
373          "\tstripe_count:     number of OSTs to stripe a file over\n"
374          "\tstripe_ost_index: index of the first OST to stripe a file over\n"
375          "\tstripe_size:      number of bytes to store before moving to the next OST\n"
376          "\tpool_name:        name of the predefined pool of OSTs\n"
377          "\tost_indices:      OSTs to stripe over, in order\n"
378          "\tblock:            wait for the operation to return before continuing\n"
379          "\tnon-block:        do not wait for the operation to return.\n"},
380         {"mv", lfs_mv, 0,
381          "To move directories between MDTs. This command is deprecated, "
382          "use \"migrate\" instead.\n"
383          "usage: mv <directory|filename> [--mdt-index|-M] <mdt_index> "
384          "[--verbose|-v]\n"},
385         {"ladvise", lfs_ladvise, 0,
386          "Provide servers with advice about access patterns for a file.\n"
387          "usage: ladvise [--advice|-a ADVICE] [--start|-s START[kMGT]]\n"
388          "               [--background|-b]\n"
389          "               {[--end|-e END[kMGT]] | [--length|-l LENGTH[kMGT]]}\n"
390          "               <file> ..."},
391         {"help", Parser_help, 0, "help"},
392         {"exit", Parser_quit, 0, "quit"},
393         {"quit", Parser_quit, 0, "quit"},
394         {"--version", Parser_version, 0,
395          "output build version of the utility and exit"},
396         { 0, 0, 0, NULL }
397 };
398
399
400 #define MIGRATION_NONBLOCK      1
401
402 /**
403  * Internal helper for migrate_copy_data(). Check lease and report error if
404  * need be.
405  *
406  * \param[in]  fd           File descriptor on which to check the lease.
407  * \param[out] lease_broken Set to true if the lease was broken.
408  * \param[in]  group_locked Whether a group lock was taken or not.
409  * \param[in]  path         Name of the file being processed, for error
410  *                          reporting
411  *
412  * \retval 0       Migration can keep on going.
413  * \retval -errno  Error occurred, abort migration.
414  */
415 static int check_lease(int fd, bool *lease_broken, bool group_locked,
416                        const char *path)
417 {
418         int rc;
419
420         if (!file_lease_supported)
421                 return 0;
422
423         rc = llapi_lease_check(fd);
424         if (rc > 0)
425                 return 0; /* llapi_check_lease returns > 0 on success. */
426
427         if (!group_locked) {
428                 fprintf(stderr, "%s: cannot migrate '%s': file busy\n",
429                         progname, path);
430                 rc = rc ? rc : -EAGAIN;
431         } else {
432                 fprintf(stderr, "%s: external attempt to access file '%s' "
433                         "blocked until migration ends.\n", progname, path);
434                 rc = 0;
435         }
436         *lease_broken = true;
437         return rc;
438 }
439
440 static int migrate_copy_data(int fd_src, int fd_dst, size_t buf_size,
441                              bool group_locked, const char *fname)
442 {
443         void    *buf = NULL;
444         ssize_t  rsize = -1;
445         ssize_t  wsize = 0;
446         size_t   rpos = 0;
447         size_t   wpos = 0;
448         off_t    bufoff = 0;
449         int      rc;
450         bool     lease_broken = false;
451
452         /* Use a page-aligned buffer for direct I/O */
453         rc = posix_memalign(&buf, getpagesize(), buf_size);
454         if (rc != 0)
455                 return -rc;
456
457         while (1) {
458                 /* read new data only if we have written all
459                  * previously read data */
460                 if (wpos == rpos) {
461                         if (!lease_broken) {
462                                 rc = check_lease(fd_src, &lease_broken,
463                                                  group_locked, fname);
464                                 if (rc < 0)
465                                         goto out;
466                         }
467                         rsize = read(fd_src, buf, buf_size);
468                         if (rsize < 0) {
469                                 rc = -errno;
470                                 fprintf(stderr, "%s: %s: read failed: %s\n",
471                                         progname, fname, strerror(-rc));
472                                 goto out;
473                         }
474                         rpos += rsize;
475                         bufoff = 0;
476                 }
477                 /* eof ? */
478                 if (rsize == 0)
479                         break;
480
481                 wsize = write(fd_dst, buf + bufoff, rpos - wpos);
482                 if (wsize < 0) {
483                         rc = -errno;
484                         fprintf(stderr,
485                                 "%s: %s: write failed on volatile: %s\n",
486                                 progname, fname, strerror(-rc));
487                         goto out;
488                 }
489                 wpos += wsize;
490                 bufoff += wsize;
491         }
492
493         rc = fsync(fd_dst);
494         if (rc < 0) {
495                 rc = -errno;
496                 fprintf(stderr, "%s: %s: fsync failed: %s\n",
497                         progname, fname, strerror(-rc));
498         }
499
500 out:
501         free(buf);
502         return rc;
503 }
504
505 static int migrate_copy_timestamps(int fdv, const struct stat *st)
506 {
507         struct timeval  tv[2] = {
508                 {.tv_sec = st->st_atime},
509                 {.tv_sec = st->st_mtime}
510         };
511
512         return futimes(fdv, tv);
513 }
514
515 static int migrate_block(int fd, int fdv, const struct stat *st,
516                          size_t buf_size, const char *name)
517 {
518         __u64   dv1;
519         int     gid;
520         int     rc;
521         int     rc2;
522
523         rc = llapi_get_data_version(fd, &dv1, LL_DV_RD_FLUSH);
524         if (rc < 0) {
525                 fprintf(stderr, "%s: %s: cannot get dataversion: %s\n",
526                         progname, name, strerror(-rc));
527                 return rc;
528         }
529
530         do
531                 gid = random();
532         while (gid == 0);
533
534         /* The grouplock blocks all concurrent accesses to the file.
535          * It has to be taken after llapi_get_data_version as it would
536          * block it too. */
537         rc = llapi_group_lock(fd, gid);
538         if (rc < 0) {
539                 fprintf(stderr, "%s: %s: cannot get group lock: %s\n",
540                         progname, name, strerror(-rc));
541                 return rc;
542         }
543
544         rc = migrate_copy_data(fd, fdv, buf_size, true, name);
545         if (rc < 0) {
546                 fprintf(stderr, "%s: %s: data copy failed\n", progname, name);
547                 goto out_unlock;
548         }
549
550         /* Make sure we keep original atime/mtime values */
551         rc = migrate_copy_timestamps(fdv, st);
552         if (rc < 0) {
553                 fprintf(stderr, "%s: %s: timestamp copy failed\n",
554                         progname, name);
555                 goto out_unlock;
556         }
557
558         /* swap layouts
559          * for a migration we need to check data version on file did
560          * not change.
561          *
562          * Pass in gid=0 since we already own grouplock. */
563         rc = llapi_fswap_layouts_grouplock(fd, fdv, dv1, 0, 0,
564                                            SWAP_LAYOUTS_CHECK_DV1);
565         if (rc == -EAGAIN) {
566                 fprintf(stderr, "%s: %s: dataversion changed during copy, "
567                         "migration aborted\n", progname, name);
568                 goto out_unlock;
569         } else if (rc < 0) {
570                 fprintf(stderr, "%s: %s: cannot swap layouts: %s\n", progname,
571                         name, strerror(-rc));
572                 goto out_unlock;
573         }
574
575 out_unlock:
576         rc2 = llapi_group_unlock(fd, gid);
577         if (rc2 < 0 && rc == 0) {
578                 fprintf(stderr, "%s: %s: putting group lock failed: %s\n",
579                         progname, name, strerror(-rc2));
580                 rc = rc2;
581         }
582
583         return rc;
584 }
585
586 static int migrate_nonblock(int fd, int fdv, const struct stat *st,
587                             size_t buf_size, const char *name)
588 {
589         __u64   dv1;
590         __u64   dv2;
591         int     rc;
592
593         rc = llapi_get_data_version(fd, &dv1, LL_DV_RD_FLUSH);
594         if (rc < 0) {
595                 fprintf(stderr, "%s: %s: cannot get data version: %s\n",
596                         progname, name, strerror(-rc));
597                 return rc;
598         }
599
600         rc = migrate_copy_data(fd, fdv, buf_size, false, name);
601         if (rc < 0) {
602                 fprintf(stderr, "%s: %s: data copy failed\n", progname, name);
603                 return rc;
604         }
605
606         rc = llapi_get_data_version(fd, &dv2, LL_DV_RD_FLUSH);
607         if (rc != 0) {
608                 fprintf(stderr, "%s: %s: cannot get data version: %s\n",
609                         progname, name, strerror(-rc));
610                 return rc;
611         }
612
613         if (dv1 != dv2) {
614                 rc = -EAGAIN;
615                 fprintf(stderr, "%s: %s: data version changed during "
616                                 "migration\n",
617                         progname, name);
618                 return rc;
619         }
620
621         /* Make sure we keep original atime/mtime values */
622         rc = migrate_copy_timestamps(fdv, st);
623         if (rc < 0) {
624                 fprintf(stderr, "%s: %s: timestamp copy failed\n",
625                         progname, name);
626                 return rc;
627         }
628
629         /* Atomically put lease, swap layouts and close.
630          * for a migration we need to check data version on file did
631          * not change. */
632         rc = llapi_fswap_layouts(fd, fdv, 0, 0, SWAP_LAYOUTS_CLOSE);
633         if (rc < 0) {
634                 fprintf(stderr, "%s: %s: cannot swap layouts: %s\n",
635                         progname, name, strerror(-rc));
636                 return rc;
637         }
638
639         return 0;
640 }
641
642 static int lfs_migrate(char *name, __u64 migration_flags,
643                        struct llapi_stripe_param *param)
644 {
645         int                      fd = -1;
646         int                      fdv = -1;
647         char                     parent[PATH_MAX];
648         int                      mdt_index;
649         int                      random_value;
650         char                     volatile_file[sizeof(parent) +
651                                                LUSTRE_VOLATILE_HDR_LEN +
652                                                2 * sizeof(mdt_index) +
653                                                2 * sizeof(random_value) + 4];
654         char                    *ptr;
655         int                      rc;
656         struct lov_user_md      *lum = NULL;
657         int                      lum_size;
658         int                      buf_size;
659         bool                     have_lease_rdlck = false;
660         struct stat              st;
661         struct stat              stv;
662
663         /* find the right size for the IO and allocate the buffer */
664         lum_size = lov_user_md_size(LOV_MAX_STRIPE_COUNT, LOV_USER_MAGIC_V3);
665         lum = malloc(lum_size);
666         if (lum == NULL) {
667                 rc = -ENOMEM;
668                 goto free;
669         }
670
671         rc = llapi_file_get_stripe(name, lum);
672         /* failure can happen for many reasons and some may be not real errors
673          * (eg: no stripe)
674          * in case of a real error, a later call will fail with better
675          * error management */
676         if (rc < 0)
677                 buf_size = 1024 * 1024;
678         else
679                 buf_size = lum->lmm_stripe_size;
680
681         /* open file, direct io */
682         /* even if the file is only read, WR mode is nedeed to allow
683          * layout swap on fd */
684         fd = open(name, O_RDWR | O_DIRECT);
685         if (fd == -1) {
686                 rc = -errno;
687                 fprintf(stderr, "%s: %s: cannot open: %s\n", progname, name,
688                         strerror(-rc));
689                 goto free;
690         }
691
692         if (file_lease_supported) {
693                 rc = llapi_lease_get(fd, LL_LEASE_RDLCK);
694                 if (rc == -EOPNOTSUPP) {
695                         /* Older servers do not support file lease.
696                          * Disable related checks. This opens race conditions
697                          * as explained in LU-4840 */
698                         file_lease_supported = false;
699                 } else if (rc < 0) {
700                         fprintf(stderr, "%s: %s: cannot get open lease: %s\n",
701                                 progname, name, strerror(-rc));
702                         goto error;
703                 } else {
704                         have_lease_rdlck = true;
705                 }
706         }
707
708         /* search for file directory pathname */
709         if (strlen(name) > sizeof(parent)-1) {
710                 rc = -E2BIG;
711                 goto error;
712         }
713         strncpy(parent, name, sizeof(parent));
714         ptr = strrchr(parent, '/');
715         if (ptr == NULL) {
716                 if (getcwd(parent, sizeof(parent)) == NULL) {
717                         rc = -errno;
718                         goto error;
719                 }
720         } else {
721                 if (ptr == parent)
722                         strcpy(parent, "/");
723                 else
724                         *ptr = '\0';
725         }
726
727         rc = llapi_file_fget_mdtidx(fd, &mdt_index);
728         if (rc < 0) {
729                 fprintf(stderr, "%s: %s: cannot get MDT index: %s\n",
730                         progname, name, strerror(-rc));
731                 goto error;
732         }
733
734         do {
735                 random_value = random();
736                 rc = snprintf(volatile_file, sizeof(volatile_file),
737                               "%s/%s:%.4X:%.4X", parent, LUSTRE_VOLATILE_HDR,
738                               mdt_index, random_value);
739                 if (rc >= sizeof(volatile_file)) {
740                         rc = -E2BIG;
741                         goto error;
742                 }
743
744                 /* create, open a volatile file, use caching (ie no directio) */
745                 fdv = llapi_file_open_param(volatile_file,
746                                 O_WRONLY | O_CREAT | O_EXCL | O_NOFOLLOW,
747                                             S_IRUSR | S_IWUSR, param);
748         } while (fdv == -EEXIST);
749
750         if (fdv < 0) {
751                 rc = fdv;
752                 fprintf(stderr, "%s: %s: cannot create volatile file in"
753                                 " directory: %s\n",
754                         progname, parent, strerror(-rc));
755                 goto error;
756         }
757
758         /* Not-owner (root?) special case.
759          * Need to set owner/group of volatile file like original.
760          * This will allow to pass related check during layout_swap.
761          */
762         rc = fstat(fd, &st);
763         if (rc != 0) {
764                 rc = -errno;
765                 fprintf(stderr, "%s: %s: cannot stat: %s\n", progname, name,
766                         strerror(errno));
767                 goto error;
768         }
769         rc = fstat(fdv, &stv);
770         if (rc != 0) {
771                 rc = -errno;
772                 fprintf(stderr, "%s: %s: cannot stat: %s\n", progname,
773                         volatile_file, strerror(errno));
774                 goto error;
775         }
776         if (st.st_uid != stv.st_uid || st.st_gid != stv.st_gid) {
777                 rc = fchown(fdv, st.st_uid, st.st_gid);
778                 if (rc != 0) {
779                         rc = -errno;
780                         fprintf(stderr, "%s: %s: cannot chown: %s\n", progname,
781                                 name, strerror(errno));
782                         goto error;
783                 }
784         }
785
786         if (migration_flags & MIGRATION_NONBLOCK && file_lease_supported) {
787                 rc = migrate_nonblock(fd, fdv, &st, buf_size, name);
788                 if (rc == 0) {
789                         have_lease_rdlck = false;
790                         fdv = -1; /* The volatile file is closed as we put the
791                                    * lease in non-blocking mode. */
792                 }
793         } else {
794                 /* Blocking mode (forced if servers do not support file lease).
795                  * It is also the default mode, since we cannot distinguish
796                  * between a broken lease and a server that does not support
797                  * atomic swap/close (LU-6785) */
798                 rc = migrate_block(fd, fdv, &st, buf_size, name);
799         }
800
801 error:
802         if (have_lease_rdlck)
803                 llapi_lease_put(fd);
804
805         if (fd >= 0)
806                 close(fd);
807
808         if (fdv >= 0)
809                 close(fdv);
810
811 free:
812         if (lum)
813                 free(lum);
814
815         return rc;
816 }
817
818 /**
819  * Parse a string containing an OST index list into an array of integers.
820  *
821  * The input string contains a comma delimited list of individual
822  * indices and ranges, for example "1,2-4,7". Add the indices into the
823  * \a osts array and remove duplicates.
824  *
825  * \param[out] osts    array to store indices in
826  * \param[in] size     size of \a osts array
827  * \param[in] offset   starting index in \a osts
828  * \param[in] arg      string containing OST index list
829  *
830  * \retval positive    number of indices in \a osts
831  * \retval -EINVAL     unable to parse \a arg
832  */
833 static int parse_targets(__u32 *osts, int size, int offset, char *arg)
834 {
835         int rc;
836         int nr = offset;
837         int slots = size - offset;
838         char *ptr = NULL;
839         bool end_of_loop;
840
841         if (arg == NULL)
842                 return -EINVAL;
843
844         end_of_loop = false;
845         while (!end_of_loop) {
846                 int start_index;
847                 int end_index;
848                 int i;
849                 char *endptr = NULL;
850
851                 rc = -EINVAL;
852
853                 ptr = strchrnul(arg, ',');
854
855                 end_of_loop = *ptr == '\0';
856                 *ptr = '\0';
857
858                 start_index = strtol(arg, &endptr, 0);
859                 if (endptr == arg) /* no data at all */
860                         break;
861                 if (*endptr != '-' && *endptr != '\0') /* has invalid data */
862                         break;
863                 if (start_index < 0)
864                         break;
865
866                 end_index = start_index;
867                 if (*endptr == '-') {
868                         end_index = strtol(endptr + 1, &endptr, 0);
869                         if (*endptr != '\0')
870                                 break;
871                         if (end_index < start_index)
872                                 break;
873                 }
874
875                 for (i = start_index; i <= end_index && slots > 0; i++) {
876                         int j;
877
878                         /* remove duplicate */
879                         for (j = 0; j < offset; j++) {
880                                 if (osts[j] == i)
881                                         break;
882                         }
883                         if (j == offset) { /* no duplicate */
884                                 osts[nr++] = i;
885                                 --slots;
886                         }
887                 }
888                 if (slots == 0 && i < end_index)
889                         break;
890
891                 *ptr = ',';
892                 arg = ++ptr;
893                 offset = nr;
894                 rc = 0;
895         }
896         if (!end_of_loop && ptr != NULL)
897                 *ptr = ',';
898
899         return rc < 0 ? rc : nr;
900 }
901
902 /* functions */
903 static int lfs_setstripe(int argc, char **argv)
904 {
905         struct llapi_stripe_param       *param = NULL;
906         struct find_param                migrate_mdt_param = {
907                 .fp_max_depth = -1,
908                 .fp_mdt_index = -1,
909         };
910         char                            *fname;
911         int                              result;
912         int                              result2 = 0;
913         unsigned long long               st_size;
914         int                              st_offset, st_count;
915         char                            *end;
916         int                              c;
917         int                              delete = 0;
918         char                            *stripe_size_arg = NULL;
919         char                            *stripe_off_arg = NULL;
920         char                            *stripe_count_arg = NULL;
921         char                            *pool_name_arg = NULL;
922         char                            *mdt_idx_arg = NULL;
923         unsigned long long               size_units = 1;
924         bool                             migrate_mode = false;
925         bool                             migration_block = false;
926         __u64                            migration_flags = 0;
927         __u32                            osts[LOV_MAX_STRIPE_COUNT] = { 0 };
928         int                              nr_osts = 0;
929
930         struct option            long_opts[] = {
931                 /* --block is only valid in migrate mode */
932                 {"block",        no_argument,       0, 'b'},
933 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 9, 53, 0)
934                 /* This formerly implied "stripe-count", but was explicitly
935                  * made "stripe-count" for consistency with other options,
936                  * and to separate it from "mdt-count" when DNE arrives. */
937                 {"count",        required_argument, 0, 'c'},
938 #endif
939                 {"stripe-count", required_argument, 0, 'c'},
940                 {"stripe_count", required_argument, 0, 'c'},
941                 {"delete",       no_argument,       0, 'd'},
942 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 9, 53, 0)
943                 /* This formerly implied "stripe-index", but was explicitly
944                  * made "stripe-index" for consistency with other options,
945                  * and to separate it from "mdt-index" when DNE arrives. */
946                 {"index",        required_argument, 0, 'i'},
947 #endif
948                 {"stripe-index", required_argument, 0, 'i'},
949                 {"stripe_index", required_argument, 0, 'i'},
950                 {"mdt",          required_argument, 0, 'm'},
951                 {"mdt-index",    required_argument, 0, 'm'},
952                 {"mdt_index",    required_argument, 0, 'm'},
953                 /* --non-block is only valid in migrate mode */
954                 {"non-block",    no_argument,       0, 'n'},
955                 {"ost",          required_argument, 0, 'o'},
956 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 0, 53, 0)
957                 {"ost-list",     required_argument, 0, 'o'},
958                 {"ost_list",     required_argument, 0, 'o'},
959 #endif
960                 {"pool",         required_argument, 0, 'p'},
961 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 9, 53, 0)
962                 /* This formerly implied "--stripe-size", but was confusing
963                  * with "lfs find --size|-s", which means "file size", so use
964                  * the consistent "--stripe-size|-S" for all commands. */
965                 {"size",         required_argument, 0, 's'},
966 #endif
967                 {"stripe-size",  required_argument, 0, 'S'},
968                 {"stripe_size",  required_argument, 0, 'S'},
969                 /* --verbose is only valid in migrate mode */
970                 {"verbose",      no_argument,       0, 'v'},
971                 {0, 0, 0, 0}
972         };
973
974         st_size = 0;
975         st_offset = -1;
976         st_count = 0;
977
978         if (strcmp(argv[0], "migrate") == 0)
979                 migrate_mode = true;
980
981         while ((c = getopt_long(argc, argv, "bc:di:m:no:p:s:S:v",
982                                 long_opts, NULL)) >= 0) {
983                 switch (c) {
984                 case 0:
985                         /* Long options. */
986                         break;
987                 case 'b':
988                         if (!migrate_mode) {
989                                 fprintf(stderr, "--block is valid only for"
990                                                 " migrate mode\n");
991                                 return CMD_HELP;
992                         }
993                         migration_block = true;
994                         break;
995                 case 'c':
996 #if LUSTRE_VERSION_CODE >= OBD_OCD_VERSION(2, 6, 53, 0)
997                         if (strcmp(argv[optind - 1], "--count") == 0)
998                                 fprintf(stderr, "warning: '--count' deprecated"
999                                         ", use '--stripe-count' instead\n");
1000 #endif
1001                         stripe_count_arg = optarg;
1002                         break;
1003                 case 'd':
1004                         /* delete the default striping pattern */
1005                         delete = 1;
1006                         break;
1007                 case 'o':
1008                         nr_osts = parse_targets(osts,
1009                                                 sizeof(osts) / sizeof(__u32),
1010                                                 nr_osts, optarg);
1011                         if (nr_osts < 0) {
1012                                 fprintf(stderr,
1013                                         "error: %s: bad OST indices '%s'\n",
1014                                         argv[0], optarg);
1015                                 return CMD_HELP;
1016                         }
1017
1018                         if (st_offset == -1) /* first in the command line */
1019                                 st_offset = osts[0];
1020                         break;
1021                 case 'i':
1022 #if LUSTRE_VERSION_CODE >= OBD_OCD_VERSION(2, 6, 53, 0)
1023                         if (strcmp(argv[optind - 1], "--index") == 0)
1024                                 fprintf(stderr, "warning: '--index' deprecated"
1025                                         ", use '--stripe-index' instead\n");
1026 #endif
1027                         stripe_off_arg = optarg;
1028                         break;
1029                 case 'm':
1030                         if (!migrate_mode) {
1031                                 fprintf(stderr, "--mdt-index is valid only for"
1032                                                 " migrate mode\n");
1033                                 return CMD_HELP;
1034                         }
1035                         mdt_idx_arg = optarg;
1036                         break;
1037                 case 'n':
1038                         if (!migrate_mode) {
1039                                 fprintf(stderr, "--non-block is valid only for"
1040                                                 " migrate mode\n");
1041                                 return CMD_HELP;
1042                         }
1043                         migration_flags |= MIGRATION_NONBLOCK;
1044                         break;
1045 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 9, 53, 0)
1046                 case 's':
1047 #if LUSTRE_VERSION_CODE >= OBD_OCD_VERSION(2, 6, 53, 0)
1048                         fprintf(stderr, "warning: '--size|-s' deprecated, "
1049                                 "use '--stripe-size|-S' instead\n");
1050 #endif
1051 #endif /* LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 9, 53, 0) */
1052                 case 'S':
1053                         stripe_size_arg = optarg;
1054                         break;
1055                 case 'p':
1056                         pool_name_arg = optarg;
1057                         break;
1058                 case 'v':
1059                         if (!migrate_mode) {
1060                                 fprintf(stderr, "--verbose is valid only for"
1061                                                 " migrate mode\n");
1062                                 return CMD_HELP;
1063                         }
1064                         migrate_mdt_param.fp_verbose = VERBOSE_DETAIL;
1065                         break;
1066                 default:
1067                         return CMD_HELP;
1068                 }
1069         }
1070
1071         fname = argv[optind];
1072
1073         if (delete &&
1074             (stripe_size_arg != NULL || stripe_off_arg != NULL ||
1075              stripe_count_arg != NULL || pool_name_arg != NULL)) {
1076                 fprintf(stderr, "error: %s: cannot specify -d with "
1077                         "-s, -c, -o, or -p options\n",
1078                         argv[0]);
1079                 return CMD_HELP;
1080         }
1081
1082         if (optind == argc) {
1083                 fprintf(stderr, "error: %s: missing filename|dirname\n",
1084                         argv[0]);
1085                 return CMD_HELP;
1086         }
1087
1088         if (mdt_idx_arg != NULL && optind > 3) {
1089                 fprintf(stderr, "error: %s: cannot specify -m with other "
1090                         "options\n", argv[0]);
1091                 return CMD_HELP;
1092         }
1093
1094         if ((migration_flags & MIGRATION_NONBLOCK) && migration_block) {
1095                 fprintf(stderr,
1096                         "error: %s: cannot specify --non-block and --block\n",
1097                         argv[0]);
1098                 return CMD_HELP;
1099         }
1100
1101         if (pool_name_arg != NULL) {
1102                 char    *ptr;
1103                 int     rc;
1104
1105                 ptr = strchr(pool_name_arg, '.');
1106                 if (ptr == NULL) {
1107                         ptr = pool_name_arg;
1108                 } else {
1109                         if ((ptr - pool_name_arg) == 0) {
1110                                 fprintf(stderr, "error: %s: fsname is empty "
1111                                         "in pool name '%s'\n",
1112                                         argv[0], pool_name_arg);
1113                                 return CMD_HELP;
1114                         }
1115
1116                         ++ptr;
1117                 }
1118
1119                 rc = lustre_is_poolname_valid(ptr, 1, LOV_MAXPOOLNAME);
1120                 if (rc == -1) {
1121                         fprintf(stderr, "error: %s: poolname '%s' is "
1122                                 "empty\n",
1123                                 argv[0], pool_name_arg);
1124                         return CMD_HELP;
1125                 } else if (rc == -2) {
1126                         fprintf(stderr, "error: %s: pool name '%s' is too long "
1127                                 "(max is %d characters)\n",
1128                                 argv[0], pool_name_arg, LOV_MAXPOOLNAME);
1129                         return CMD_HELP;
1130                 } else if (rc > 0) {
1131                         fprintf(stderr, "error: %s: char '%c' not allowed in "
1132                                 "pool name '%s'\n",
1133                                 argv[0], rc, pool_name_arg);
1134                         return CMD_HELP;
1135                 }
1136         }
1137
1138         /* get the stripe size */
1139         if (stripe_size_arg != NULL) {
1140                 result = llapi_parse_size(stripe_size_arg, &st_size,
1141                                           &size_units, 0);
1142                 if (result) {
1143                         fprintf(stderr, "error: %s: bad stripe size '%s'\n",
1144                                 argv[0], stripe_size_arg);
1145                         return CMD_HELP;
1146                 }
1147         }
1148         /* get the stripe offset */
1149         if (stripe_off_arg != NULL) {
1150                 st_offset = strtol(stripe_off_arg, &end, 0);
1151                 if (*end != '\0') {
1152                         fprintf(stderr, "error: %s: bad stripe offset '%s'\n",
1153                                 argv[0], stripe_off_arg);
1154                         return CMD_HELP;
1155                 }
1156         }
1157         /* get the stripe count */
1158         if (stripe_count_arg != NULL) {
1159                 st_count = strtoul(stripe_count_arg, &end, 0);
1160                 if (*end != '\0') {
1161                         fprintf(stderr, "error: %s: bad stripe count '%s'\n",
1162                                 argv[0], stripe_count_arg);
1163                         return CMD_HELP;
1164                 }
1165         }
1166
1167         if (mdt_idx_arg != NULL) {
1168                 /* initialize migrate mdt parameters */
1169                 migrate_mdt_param.fp_mdt_index = strtoul(mdt_idx_arg, &end, 0);
1170                 if (*end != '\0') {
1171                         fprintf(stderr, "error: %s: bad MDT index '%s'\n",
1172                                 argv[0], mdt_idx_arg);
1173                         return CMD_HELP;
1174                 }
1175                 migrate_mdt_param.fp_migrate = 1;
1176         } else {
1177                 /* initialize stripe parameters */
1178                 param = calloc(1, offsetof(typeof(*param), lsp_osts[nr_osts]));
1179                 if (param == NULL) {
1180                         fprintf(stderr, "error: %s: run out of memory\n",
1181                                 argv[0]);
1182                         return CMD_HELP;
1183                 }
1184
1185                 param->lsp_stripe_size = st_size;
1186                 param->lsp_stripe_offset = st_offset;
1187                 param->lsp_stripe_count = st_count;
1188                 param->lsp_stripe_pattern = 0;
1189                 param->lsp_pool = pool_name_arg;
1190                 param->lsp_is_specific = false;
1191                 if (nr_osts > 0) {
1192                         if (st_count > 0 && nr_osts != st_count) {
1193                                 fprintf(stderr, "error: %s: stripe count '%d' "
1194                                         "doesn't match the number of OSTs: %d\n"
1195                                         , argv[0], st_count, nr_osts);
1196                                 free(param);
1197                                 return CMD_HELP;
1198                         }
1199
1200                         param->lsp_is_specific = true;
1201                         param->lsp_stripe_count = nr_osts;
1202                         memcpy(param->lsp_osts, osts, sizeof(*osts) * nr_osts);
1203                 }
1204         }
1205
1206         for (fname = argv[optind]; fname != NULL; fname = argv[++optind]) {
1207                 if (!migrate_mode) {
1208                         result = llapi_file_open_param(fname,
1209                                                        O_CREAT | O_WRONLY,
1210                                                        0644, param);
1211                         if (result >= 0) {
1212                                 close(result);
1213                                 result = 0;
1214                         }
1215                 } else if (mdt_idx_arg != NULL) {
1216                         result = llapi_migrate_mdt(fname, &migrate_mdt_param);
1217                 } else {
1218                         result = lfs_migrate(fname, migration_flags, param);
1219                 }
1220                 if (result) {
1221                         /* Save the first error encountered. */
1222                         if (result2 == 0)
1223                                 result2 = result;
1224                         fprintf(stderr, "error: %s: %s file '%s' failed: %s\n",
1225                                 argv[0], migrate_mode ? "migrate" : "create",
1226                                 fname,
1227                                 pool_name_arg != NULL && result == EINVAL ?
1228                                 "OST not in pool?" : strerror(errno));
1229                         continue;
1230                 }
1231         }
1232
1233         free(param);
1234         return result2;
1235 }
1236
1237 static int lfs_poollist(int argc, char **argv)
1238 {
1239         if (argc != 2)
1240                 return CMD_HELP;
1241
1242         return llapi_poollist(argv[1]);
1243 }
1244
1245 static int set_time(time_t *time, time_t *set, char *str)
1246 {
1247         time_t t;
1248         int res = 0;
1249
1250         if (str[0] == '+')
1251                 res = 1;
1252         else if (str[0] == '-')
1253                 res = -1;
1254
1255         if (res)
1256                 str++;
1257
1258         t = strtol(str, NULL, 0);
1259         if (*time < t * 24 * 60 * 60) {
1260                 if (res)
1261                         str--;
1262                 fprintf(stderr, "Wrong time '%s' is specified.\n", str);
1263                 return INT_MAX;
1264         }
1265
1266         *set = *time - t * 24 * 60 * 60;
1267         return res;
1268 }
1269
1270 #define USER 0
1271 #define GROUP 1
1272
1273 static int name2id(unsigned int *id, char *name, int type)
1274 {
1275         if (type == USER) {
1276                 struct passwd *entry;
1277
1278                 if (!(entry = getpwnam(name))) {
1279                         if (!errno)
1280                                 errno = ENOENT;
1281                         return -1;
1282                 }
1283
1284                 *id = entry->pw_uid;
1285         } else {
1286                 struct group *entry;
1287
1288                 if (!(entry = getgrnam(name))) {
1289                         if (!errno)
1290                                 errno = ENOENT;
1291                         return -1;
1292                 }
1293
1294                 *id = entry->gr_gid;
1295         }
1296
1297         return 0;
1298 }
1299
1300 static int id2name(char **name, unsigned int id, int type)
1301 {
1302         if (type == USER) {
1303                 struct passwd *entry;
1304
1305                 if (!(entry = getpwuid(id))) {
1306                         if (!errno)
1307                                 errno = ENOENT;
1308                         return -1;
1309                 }
1310
1311                 *name = entry->pw_name;
1312         } else {
1313                 struct group *entry;
1314
1315                 if (!(entry = getgrgid(id))) {
1316                         if (!errno)
1317                                 errno = ENOENT;
1318                         return -1;
1319                 }
1320
1321                 *name = entry->gr_name;
1322         }
1323
1324         return 0;
1325 }
1326
1327 static int name2layout(__u32 *layout, char *name)
1328 {
1329         char *ptr, *lyt;
1330
1331         *layout = 0;
1332         for (ptr = name; ; ptr = NULL) {
1333                 lyt = strtok(ptr, ",");
1334                 if (lyt == NULL)
1335                         break;
1336                 if (strcmp(lyt, "released") == 0)
1337                         *layout |= LOV_PATTERN_F_RELEASED;
1338                 else if (strcmp(lyt, "raid0") == 0)
1339                         *layout |= LOV_PATTERN_RAID0;
1340                 else
1341                         return -1;
1342         }
1343         return 0;
1344 }
1345
1346 #define FIND_POOL_OPT 3
1347 static int lfs_find(int argc, char **argv)
1348 {
1349         int c, rc;
1350         int ret = 0;
1351         time_t t;
1352         struct find_param param = {
1353                 .fp_max_depth = -1,
1354                 .fp_quiet = 1,
1355         };
1356         struct option long_opts[] = {
1357                 {"atime",        required_argument, 0, 'A'},
1358                 {"stripe-count", required_argument, 0, 'c'},
1359                 {"stripe_count", required_argument, 0, 'c'},
1360                 {"ctime",        required_argument, 0, 'C'},
1361                 {"maxdepth",     required_argument, 0, 'D'},
1362                 {"gid",          required_argument, 0, 'g'},
1363                 {"group",        required_argument, 0, 'G'},
1364                 {"stripe-index", required_argument, 0, 'i'},
1365                 {"stripe_index", required_argument, 0, 'i'},
1366                 {"layout",       required_argument, 0, 'L'},
1367                 {"mdt",          required_argument, 0, 'm'},
1368                 {"mdt-index",    required_argument, 0, 'm'},
1369                 {"mdt_index",    required_argument, 0, 'm'},
1370                 {"mtime",        required_argument, 0, 'M'},
1371                 {"name",         required_argument, 0, 'n'},
1372      /* reserve {"or",           no_argument,     , 0, 'o'}, to match find(1) */
1373                 {"obd",          required_argument, 0, 'O'},
1374                 {"ost",          required_argument, 0, 'O'},
1375                 /* no short option for pool, p/P already used */
1376                 {"pool",         required_argument, 0, FIND_POOL_OPT},
1377                 {"print0",       no_argument,       0, 'p'},
1378                 {"print",        no_argument,       0, 'P'},
1379                 {"size",         required_argument, 0, 's'},
1380                 {"stripe-size",  required_argument, 0, 'S'},
1381                 {"stripe_size",  required_argument, 0, 'S'},
1382                 {"type",         required_argument, 0, 't'},
1383                 {"uid",          required_argument, 0, 'u'},
1384                 {"user",         required_argument, 0, 'U'},
1385                 {0, 0, 0, 0}
1386         };
1387         int pathstart = -1;
1388         int pathend = -1;
1389         int neg_opt = 0;
1390         time_t *xtime;
1391         int *xsign;
1392         int isoption;
1393         char *endptr;
1394
1395         time(&t);
1396
1397         /* when getopt_long_only() hits '!' it returns 1, puts "!" in optarg */
1398         while ((c = getopt_long_only(argc, argv,
1399                                      "-A:c:C:D:g:G:i:L:m:M:n:O:Ppqrs:S:t:u:U:v",
1400                                      long_opts, NULL)) >= 0) {
1401                 xtime = NULL;
1402                 xsign = NULL;
1403                 if (neg_opt)
1404                         --neg_opt;
1405                 /* '!' is part of option */
1406                 /* when getopt_long_only() finds a string which is not
1407                  * an option nor a known option argument it returns 1
1408                  * in that case if we already have found pathstart and pathend
1409                  * (i.e. we have the list of pathnames),
1410                  * the only supported value is "!"
1411                  */
1412                 isoption = (c != 1) || (strcmp(optarg, "!") == 0);
1413                 if (!isoption && pathend != -1) {
1414                         fprintf(stderr, "err: %s: filename|dirname must either "
1415                                         "precede options or follow options\n",
1416                                         argv[0]);
1417                         ret = CMD_HELP;
1418                         goto err;
1419                 }
1420                 if (!isoption && pathstart == -1)
1421                         pathstart = optind - 1;
1422                 if (isoption && pathstart != -1 && pathend == -1)
1423                         pathend = optind - 2;
1424                 switch (c) {
1425                 case 0:
1426                         /* Long options. */
1427                         break;
1428                 case 1:
1429                         /* unknown; opt is "!" or path component,
1430                          * checking done above.
1431                          */
1432                         if (strcmp(optarg, "!") == 0)
1433                                 neg_opt = 2;
1434                         break;
1435                 case 'A':
1436                         xtime = &param.fp_atime;
1437                         xsign = &param.fp_asign;
1438                         param.fp_exclude_atime = !!neg_opt;
1439                         /* no break, this falls through to 'C' for ctime */
1440                 case 'C':
1441                         if (c == 'C') {
1442                                 xtime = &param.fp_ctime;
1443                                 xsign = &param.fp_csign;
1444                                 param.fp_exclude_ctime = !!neg_opt;
1445                         }
1446                         /* no break, this falls through to 'M' for mtime */
1447                 case 'M':
1448                         if (c == 'M') {
1449                                 xtime = &param.fp_mtime;
1450                                 xsign = &param.fp_msign;
1451                                 param.fp_exclude_mtime = !!neg_opt;
1452                         }
1453                         rc = set_time(&t, xtime, optarg);
1454                         if (rc == INT_MAX) {
1455                                 ret = -1;
1456                                 goto err;
1457                         }
1458                         if (rc)
1459                                 *xsign = rc;
1460                         break;
1461                 case 'c':
1462                         if (optarg[0] == '+') {
1463                                 param.fp_stripe_count_sign = -1;
1464                                 optarg++;
1465                         } else if (optarg[0] == '-') {
1466                                 param.fp_stripe_count_sign =  1;
1467                                 optarg++;
1468                         }
1469
1470                         param.fp_stripe_count = strtoul(optarg, &endptr, 0);
1471                         if (*endptr != '\0') {
1472                                 fprintf(stderr,"error: bad stripe_count '%s'\n",
1473                                         optarg);
1474                                 ret = -1;
1475                                 goto err;
1476                         }
1477                         param.fp_check_stripe_count = 1;
1478                         param.fp_exclude_stripe_count = !!neg_opt;
1479                         break;
1480                 case 'D':
1481                         param.fp_max_depth = strtol(optarg, 0, 0);
1482                         break;
1483                 case 'g':
1484                 case 'G':
1485                         rc = name2id(&param.fp_gid, optarg, GROUP);
1486                         if (rc) {
1487                                 param.fp_gid = strtoul(optarg, &endptr, 10);
1488                                 if (*endptr != '\0') {
1489                                         fprintf(stderr, "Group/GID: %s cannot "
1490                                                 "be found.\n", optarg);
1491                                         ret = -1;
1492                                         goto err;
1493                                 }
1494                         }
1495                         param.fp_exclude_gid = !!neg_opt;
1496                         param.fp_check_gid = 1;
1497                         break;
1498                 case 'L':
1499                         ret = name2layout(&param.fp_layout, optarg);
1500                         if (ret)
1501                                 goto err;
1502                         param.fp_exclude_layout = !!neg_opt;
1503                         param.fp_check_layout = 1;
1504                         break;
1505                 case 'u':
1506                 case 'U':
1507                         rc = name2id(&param.fp_uid, optarg, USER);
1508                         if (rc) {
1509                                 param.fp_uid = strtoul(optarg, &endptr, 10);
1510                                 if (*endptr != '\0') {
1511                                         fprintf(stderr, "User/UID: %s cannot "
1512                                                 "be found.\n", optarg);
1513                                         ret = -1;
1514                                         goto err;
1515                                 }
1516                         }
1517                         param.fp_exclude_uid = !!neg_opt;
1518                         param.fp_check_uid = 1;
1519                         break;
1520                 case FIND_POOL_OPT:
1521                         if (strlen(optarg) > LOV_MAXPOOLNAME) {
1522                                 fprintf(stderr,
1523                                         "Pool name %s is too long"
1524                                         " (max is %d)\n", optarg,
1525                                         LOV_MAXPOOLNAME);
1526                                 ret = -1;
1527                                 goto err;
1528                         }
1529                         /* we do check for empty pool because empty pool
1530                          * is used to find V1 lov attributes */
1531                         strncpy(param.fp_poolname, optarg, LOV_MAXPOOLNAME);
1532                         param.fp_poolname[LOV_MAXPOOLNAME] = '\0';
1533                         param.fp_exclude_pool = !!neg_opt;
1534                         param.fp_check_pool = 1;
1535                         break;
1536                 case 'n':
1537                         param.fp_pattern = (char *)optarg;
1538                         param.fp_exclude_pattern = !!neg_opt;
1539                         break;
1540                 case 'm':
1541                 case 'i':
1542                 case 'O': {
1543                         char *buf, *token, *next, *p;
1544                         int len = 1;
1545                         void *tmp;
1546
1547                         buf = strdup(optarg);
1548                         if (buf == NULL) {
1549                                 ret = -ENOMEM;
1550                                 goto err;
1551                         }
1552
1553                         param.fp_exclude_obd = !!neg_opt;
1554
1555                         token = buf;
1556                         while (token && *token) {
1557                                 token = strchr(token, ',');
1558                                 if (token) {
1559                                         len++;
1560                                         token++;
1561                                 }
1562                         }
1563                         if (c == 'm') {
1564                                 param.fp_exclude_mdt = !!neg_opt;
1565                                 param.fp_num_alloc_mdts += len;
1566                                 tmp = realloc(param.fp_mdt_uuid,
1567                                               param.fp_num_alloc_mdts *
1568                                               sizeof(*param.fp_mdt_uuid));
1569                                 if (tmp == NULL) {
1570                                         ret = -ENOMEM;
1571                                         goto err_free;
1572                                 }
1573
1574                                 param.fp_mdt_uuid = tmp;
1575                         } else {
1576                                 param.fp_exclude_obd = !!neg_opt;
1577                                 param.fp_num_alloc_obds += len;
1578                                 tmp = realloc(param.fp_obd_uuid,
1579                                               param.fp_num_alloc_obds *
1580                                               sizeof(*param.fp_obd_uuid));
1581                                 if (tmp == NULL) {
1582                                         ret = -ENOMEM;
1583                                         goto err_free;
1584                                 }
1585
1586                                 param.fp_obd_uuid = tmp;
1587                         }
1588                         for (token = buf; token && *token; token = next) {
1589                                 struct obd_uuid *puuid;
1590                                 if (c == 'm') {
1591                                         puuid =
1592                                         &param.fp_mdt_uuid[param.fp_num_mdts++];
1593                                 } else {
1594                                         puuid =
1595                                         &param.fp_obd_uuid[param.fp_num_obds++];
1596                                 }
1597                                 p = strchr(token, ',');
1598                                 next = 0;
1599                                 if (p) {
1600                                         *p = 0;
1601                                         next = p+1;
1602                                 }
1603
1604                                 if (strlen(token) > sizeof(puuid->uuid) - 1) {
1605                                         ret = -E2BIG;
1606                                         goto err_free;
1607                                 }
1608
1609                                 strncpy(puuid->uuid, token,
1610                                         sizeof(puuid->uuid));
1611                         }
1612 err_free:
1613                         if (buf)
1614                                 free(buf);
1615                         break;
1616                 }
1617                 case 'p':
1618                         param.fp_zero_end = 1;
1619                         break;
1620                 case 'P':
1621                         break;
1622                 case 's':
1623                         if (optarg[0] == '+') {
1624                                 param.fp_size_sign = -1;
1625                                 optarg++;
1626                         } else if (optarg[0] == '-') {
1627                                 param.fp_size_sign =  1;
1628                                 optarg++;
1629                         }
1630
1631                         ret = llapi_parse_size(optarg, &param.fp_size,
1632                                                &param.fp_size_units, 0);
1633                         if (ret) {
1634                                 fprintf(stderr, "error: bad file size '%s'\n",
1635                                         optarg);
1636                                 goto err;
1637                         }
1638                         param.fp_check_size = 1;
1639                         param.fp_exclude_size = !!neg_opt;
1640                         break;
1641                 case 'S':
1642                         if (optarg[0] == '+') {
1643                                 param.fp_stripe_size_sign = -1;
1644                                 optarg++;
1645                         } else if (optarg[0] == '-') {
1646                                 param.fp_stripe_size_sign =  1;
1647                                 optarg++;
1648                         }
1649
1650                         ret = llapi_parse_size(optarg, &param.fp_stripe_size,
1651                                                &param.fp_stripe_size_units, 0);
1652                         if (ret) {
1653                                 fprintf(stderr, "error: bad stripe_size '%s'\n",
1654                                         optarg);
1655                                 goto err;
1656                         }
1657                         param.fp_check_stripe_size = 1;
1658                         param.fp_exclude_stripe_size = !!neg_opt;
1659                         break;
1660                 case 't':
1661                         param.fp_exclude_type = !!neg_opt;
1662                         switch (optarg[0]) {
1663                         case 'b':
1664                                 param.fp_type = S_IFBLK;
1665                                 break;
1666                         case 'c':
1667                                 param.fp_type = S_IFCHR;
1668                                 break;
1669                         case 'd':
1670                                 param.fp_type = S_IFDIR;
1671                                 break;
1672                         case 'f':
1673                                 param.fp_type = S_IFREG;
1674                                 break;
1675                         case 'l':
1676                                 param.fp_type = S_IFLNK;
1677                                 break;
1678                         case 'p':
1679                                 param.fp_type = S_IFIFO;
1680                                 break;
1681                         case 's':
1682                                 param.fp_type = S_IFSOCK;
1683                                 break;
1684                         default:
1685                                 fprintf(stderr, "error: %s: bad type '%s'\n",
1686                                         argv[0], optarg);
1687                                 ret = CMD_HELP;
1688                                 goto err;
1689                         };
1690                         break;
1691                 default:
1692                         ret = CMD_HELP;
1693                         goto err;
1694                 };
1695         }
1696
1697         if (pathstart == -1) {
1698                 fprintf(stderr, "error: %s: no filename|pathname\n",
1699                         argv[0]);
1700                 ret = CMD_HELP;
1701                 goto err;
1702         } else if (pathend == -1) {
1703                 /* no options */
1704                 pathend = argc;
1705         }
1706
1707         do {
1708                 rc = llapi_find(argv[pathstart], &param);
1709                 if (rc != 0 && ret == 0)
1710                         ret = rc;
1711         } while (++pathstart < pathend);
1712
1713         if (ret)
1714                 fprintf(stderr, "error: %s failed for %s.\n",
1715                         argv[0], argv[optind - 1]);
1716 err:
1717         if (param.fp_obd_uuid && param.fp_num_alloc_obds)
1718                 free(param.fp_obd_uuid);
1719
1720         if (param.fp_mdt_uuid && param.fp_num_alloc_mdts)
1721                 free(param.fp_mdt_uuid);
1722
1723         return ret;
1724 }
1725
1726 static int lfs_getstripe_internal(int argc, char **argv,
1727                                   struct find_param *param)
1728 {
1729         struct option long_opts[] = {
1730 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 9, 53, 0)
1731                 /* This formerly implied "stripe-count", but was explicitly
1732                  * made "stripe-count" for consistency with other options,
1733                  * and to separate it from "mdt-count" when DNE arrives. */
1734                 {"count",               no_argument,            0, 'c'},
1735 #endif
1736                 {"stripe-count",        no_argument,            0, 'c'},
1737                 {"stripe_count",        no_argument,            0, 'c'},
1738                 {"directory",           no_argument,            0, 'd'},
1739                 {"default",             no_argument,            0, 'D'},
1740                 {"fid",                 no_argument,            0, 'F'},
1741                 {"generation",          no_argument,            0, 'g'},
1742 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 9, 53, 0)
1743                 /* This formerly implied "stripe-index", but was explicitly
1744                  * made "stripe-index" for consistency with other options,
1745                  * and to separate it from "mdt-index" when DNE arrives. */
1746                 {"index",               no_argument,            0, 'i'},
1747 #endif
1748                 {"stripe-index",        no_argument,            0, 'i'},
1749                 {"stripe_index",        no_argument,            0, 'i'},
1750                 {"layout",              no_argument,            0, 'L'},
1751                 {"mdt",                 no_argument,            0, 'm'},
1752                 {"mdt-index",           no_argument,            0, 'm'},
1753                 {"mdt_index",           no_argument,            0, 'm'},
1754 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 0, 53, 0)
1755                 {"mdt-index",           no_argument,            0, 'M'},
1756                 {"mdt_index",           no_argument,            0, 'M'},
1757 #endif
1758 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 9, 53, 0)
1759                 /* This formerly implied "stripe-index", but was confusing
1760                  * with "file offset" (which will eventually be needed for
1761                  * with different layouts by offset), so deprecate it. */
1762                 {"offset",              no_argument,            0, 'o'},
1763 #endif
1764                 {"obd",                 required_argument,      0, 'O'},
1765                 {"ost",                 required_argument,      0, 'O'},
1766                 {"pool",                no_argument,            0, 'p'},
1767                 {"quiet",               no_argument,            0, 'q'},
1768                 {"recursive",           no_argument,            0, 'r'},
1769                 {"raw",                 no_argument,            0, 'R'},
1770 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 9, 53, 0)
1771                 /* This formerly implied "--stripe-size", but was confusing
1772                  * with "lfs find --size|-s", which means "file size", so use
1773                  * the consistent "--stripe-size|-S" for all commands. */
1774                 {"size",                no_argument,            0, 's'},
1775 #endif
1776                 {"stripe-size",         no_argument,            0, 'S'},
1777                 {"stripe_size",         no_argument,            0, 'S'},
1778                 {"verbose",             no_argument,            0, 'v'},
1779                 {0, 0, 0, 0}
1780         };
1781         int c, rc;
1782
1783         while ((c = getopt_long(argc, argv, "cdDFghiLmMoO:pqrRsSv",
1784                                 long_opts, NULL)) != -1) {
1785                 switch (c) {
1786                 case 'O':
1787                         if (param->fp_obd_uuid) {
1788                                 fprintf(stderr,
1789                                         "error: %s: only one obduuid allowed",
1790                                         argv[0]);
1791                                 return CMD_HELP;
1792                         }
1793                         param->fp_obd_uuid = (struct obd_uuid *)optarg;
1794                         break;
1795                 case 'q':
1796                         param->fp_quiet++;
1797                         break;
1798                 case 'd':
1799                         param->fp_max_depth = 0;
1800                         break;
1801                 case 'D':
1802                         param->fp_get_default_lmv = 1;
1803                         break;
1804                 case 'F':
1805                         if (!(param->fp_verbose & VERBOSE_DETAIL)) {
1806                                 param->fp_verbose |= VERBOSE_DFID;
1807                                 param->fp_max_depth = 0;
1808                         }
1809                         break;
1810                 case 'r':
1811                         param->fp_recursive = 1;
1812                         break;
1813                 case 'v':
1814                         param->fp_verbose = VERBOSE_DEFAULT | VERBOSE_DETAIL;
1815                         break;
1816                 case 'c':
1817 #if LUSTRE_VERSION_CODE >= OBD_OCD_VERSION(2, 6, 53, 0)
1818                         if (strcmp(argv[optind - 1], "--count") == 0)
1819                                 fprintf(stderr, "warning: '--count' deprecated,"
1820                                         " use '--stripe-count' instead\n");
1821 #endif
1822                         if (!(param->fp_verbose & VERBOSE_DETAIL)) {
1823                                 param->fp_verbose |= VERBOSE_COUNT;
1824                                 param->fp_max_depth = 0;
1825                         }
1826                         break;
1827 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 9, 53, 0)
1828                 case 's':
1829 #if LUSTRE_VERSION_CODE >= OBD_OCD_VERSION(2, 6, 53, 0)
1830                         fprintf(stderr, "warning: '--size|-s' deprecated, "
1831                                 "use '--stripe-size|-S' instead\n");
1832 #endif
1833 #endif /* LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 9, 53, 0) */
1834                 case 'S':
1835                         if (!(param->fp_verbose & VERBOSE_DETAIL)) {
1836                                 param->fp_verbose |= VERBOSE_SIZE;
1837                                 param->fp_max_depth = 0;
1838                         }
1839                         break;
1840 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 9, 53, 0)
1841                 case 'o':
1842                         fprintf(stderr, "warning: '--offset|-o' deprecated, "
1843                                 "use '--stripe-index|-i' instead\n");
1844 #endif
1845                 case 'i':
1846 #if LUSTRE_VERSION_CODE >= OBD_OCD_VERSION(2, 6, 53, 0)
1847                         if (strcmp(argv[optind - 1], "--index") == 0)
1848                                 fprintf(stderr, "warning: '--index' deprecated"
1849                                         ", use '--stripe-index' instead\n");
1850 #endif
1851                         if (!(param->fp_verbose & VERBOSE_DETAIL)) {
1852                                 param->fp_verbose |= VERBOSE_OFFSET;
1853                                 param->fp_max_depth = 0;
1854                         }
1855                         break;
1856                 case 'p':
1857                         if (!(param->fp_verbose & VERBOSE_DETAIL)) {
1858                                 param->fp_verbose |= VERBOSE_POOL;
1859                                 param->fp_max_depth = 0;
1860                         }
1861                         break;
1862                 case 'g':
1863                         if (!(param->fp_verbose & VERBOSE_DETAIL)) {
1864                                 param->fp_verbose |= VERBOSE_GENERATION;
1865                                 param->fp_max_depth = 0;
1866                         }
1867                         break;
1868                 case 'L':
1869                         if (!(param->fp_verbose & VERBOSE_DETAIL)) {
1870                                 param->fp_verbose |= VERBOSE_LAYOUT;
1871                                 param->fp_max_depth = 0;
1872                         }
1873                         break;
1874 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 0, 53, 0)
1875                 case 'M':
1876 #if LUSTRE_VERSION_CODE >= OBD_OCD_VERSION(2, 11, 53, 0)
1877                         fprintf(stderr, "warning: '-M' deprecated"
1878                                 ", use '-m' instead\n");
1879 #endif
1880 #endif
1881                 case 'm':
1882                         if (!(param->fp_verbose & VERBOSE_DETAIL))
1883                                 param->fp_max_depth = 0;
1884                         param->fp_verbose |= VERBOSE_MDTINDEX;
1885                         break;
1886                 case 'R':
1887                         param->fp_raw = 1;
1888                         break;
1889                 default:
1890                         return CMD_HELP;
1891                 }
1892         }
1893
1894         if (optind >= argc)
1895                 return CMD_HELP;
1896
1897         if (param->fp_recursive)
1898                 param->fp_max_depth = -1;
1899
1900         if (!param->fp_verbose)
1901                 param->fp_verbose = VERBOSE_DEFAULT;
1902         if (param->fp_quiet)
1903                 param->fp_verbose = VERBOSE_OBJID;
1904
1905         do {
1906                 rc = llapi_getstripe(argv[optind], param);
1907         } while (++optind < argc && !rc);
1908
1909         if (rc)
1910                 fprintf(stderr, "error: %s failed for %s.\n",
1911                         argv[0], argv[optind - 1]);
1912         return rc;
1913 }
1914
1915 static int lfs_tgts(int argc, char **argv)
1916 {
1917         char mntdir[PATH_MAX] = {'\0'}, path[PATH_MAX] = {'\0'};
1918         struct find_param param;
1919         int index = 0, rc=0;
1920
1921         if (argc > 2)
1922                 return CMD_HELP;
1923
1924         if (argc == 2 && !realpath(argv[1], path)) {
1925                 rc = -errno;
1926                 fprintf(stderr, "error: invalid path '%s': %s\n",
1927                         argv[1], strerror(-rc));
1928                 return rc;
1929         }
1930
1931         while (!llapi_search_mounts(path, index++, mntdir, NULL)) {
1932                 /* Check if we have a mount point */
1933                 if (mntdir[0] == '\0')
1934                         continue;
1935
1936                 memset(&param, 0, sizeof(param));
1937                 if (!strcmp(argv[0], "mdts"))
1938                         param.fp_get_lmv = 1;
1939
1940                 rc = llapi_ostlist(mntdir, &param);
1941                 if (rc) {
1942                         fprintf(stderr, "error: %s: failed on %s\n",
1943                                 argv[0], mntdir);
1944                 }
1945                 if (path[0] != '\0')
1946                         break;
1947                 memset(mntdir, 0, PATH_MAX);
1948         }
1949
1950         return rc;
1951 }
1952
1953 static int lfs_getstripe(int argc, char **argv)
1954 {
1955         struct find_param param = { 0 };
1956
1957         param.fp_max_depth = 1;
1958         return lfs_getstripe_internal(argc, argv, &param);
1959 }
1960
1961 /* functions */
1962 static int lfs_getdirstripe(int argc, char **argv)
1963 {
1964         struct find_param param = { 0 };
1965
1966         param.fp_get_lmv = 1;
1967         return lfs_getstripe_internal(argc, argv, &param);
1968 }
1969
1970 /* functions */
1971 static int lfs_setdirstripe(int argc, char **argv)
1972 {
1973         char                    *dname;
1974         int                     result;
1975         unsigned int            stripe_offset = -1;
1976         unsigned int            stripe_count = 1;
1977         enum lmv_hash_type      hash_type;
1978         char                    *end;
1979         int                     c;
1980         char                    *stripe_offset_opt = NULL;
1981         char                    *stripe_count_opt = NULL;
1982         char                    *stripe_hash_opt = NULL;
1983         char                    *mode_opt = NULL;
1984         bool                    default_stripe = false;
1985         mode_t                  mode = S_IRWXU | S_IRWXG | S_IRWXO;
1986         mode_t                  previous_mode = 0;
1987         bool                    delete = false;
1988
1989         struct option long_opts[] = {
1990 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 0, 53, 0)
1991                 {"count",       required_argument, 0, 'c'},
1992 #endif
1993                 {"mdt-count",   required_argument, 0, 'c'},
1994                 {"delete",      no_argument, 0, 'd'},
1995 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 0, 53, 0)
1996                 {"index",       required_argument, 0, 'i'},
1997 #endif
1998                 {"mdt-index",   required_argument, 0, 'i'},
1999                 {"mode",        required_argument, 0, 'm'},
2000 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 0, 53, 0)
2001                 {"hash-type",   required_argument, 0, 't'},
2002 #endif
2003                 {"mdt-hash",    required_argument, 0, 't'},
2004                 {"default_stripe", no_argument, 0, 'D'},
2005                 {0, 0, 0, 0}
2006         };
2007
2008         while ((c = getopt_long(argc, argv, "c:dDi:m:t:", long_opts,
2009                                 NULL)) >= 0) {
2010                 switch (c) {
2011                 case 0:
2012                         /* Long options. */
2013                         break;
2014                 case 'c':
2015 #if LUSTRE_VERSION_CODE >= OBD_OCD_VERSION(2, 11, 53, 0)
2016                         if (strcmp(argv[optind - 1], "--count") == 0)
2017                                 fprintf(stderr, "warning: '--count' deprecated"
2018                                         ", use '--mdt-count' instead\n");
2019 #endif
2020                         stripe_count_opt = optarg;
2021                         break;
2022                 case 'd':
2023                         delete = true;
2024                         default_stripe = true;
2025                         break;
2026                 case 'D':
2027                         default_stripe = true;
2028                         break;
2029                 case 'i':
2030 #if LUSTRE_VERSION_CODE >= OBD_OCD_VERSION(2, 11, 53, 0)
2031                         if (strcmp(argv[optind - 1], "--index") == 0)
2032                                 fprintf(stderr, "warning: '--index' deprecated"
2033                                         ", use '--mdt-index' instead\n");
2034 #endif
2035                         stripe_offset_opt = optarg;
2036                         break;
2037                 case 'm':
2038                         mode_opt = optarg;
2039                         break;
2040                 case 't':
2041 #if LUSTRE_VERSION_CODE >= OBD_OCD_VERSION(2, 11, 53, 0)
2042                         if (strcmp(argv[optind - 1], "--hash-type") == 0)
2043                                 fprintf(stderr, "warning: '--hash-type' "
2044                                         "deprecated, use '--mdt-hash' "
2045                                         "instead\n");
2046 #endif
2047                         stripe_hash_opt = optarg;
2048                         break;
2049                 default:
2050                         fprintf(stderr, "error: %s: option '%s' "
2051                                         "unrecognized\n",
2052                                         argv[0], argv[optind - 1]);
2053                         return CMD_HELP;
2054                 }
2055         }
2056
2057         if (optind == argc) {
2058                 fprintf(stderr, "error: %s: missing dirname\n",
2059                         argv[0]);
2060                 return CMD_HELP;
2061         }
2062
2063         if (!delete && stripe_offset_opt == NULL && stripe_count_opt == NULL) {
2064                 fprintf(stderr, "error: %s: missing stripe offset and count.\n",
2065                         argv[0]);
2066                 return CMD_HELP;
2067         }
2068
2069         if (stripe_offset_opt != NULL) {
2070                 /* get the stripe offset */
2071                 stripe_offset = strtoul(stripe_offset_opt, &end, 0);
2072                 if (*end != '\0') {
2073                         fprintf(stderr, "error: %s: bad stripe offset '%s'\n",
2074                                 argv[0], stripe_offset_opt);
2075                         return CMD_HELP;
2076                 }
2077         }
2078
2079         if (delete) {
2080                 if (stripe_offset_opt != NULL || stripe_count_opt != NULL) {
2081                         fprintf(stderr, "error: %s: cannot specify -d with -s,"
2082                                 " or -i options.\n", argv[0]);
2083                         return CMD_HELP;
2084                 } else {
2085                         stripe_count = 0;
2086                 }
2087         }
2088
2089
2090         if (mode_opt != NULL) {
2091                 mode = strtoul(mode_opt, &end, 8);
2092                 if (*end != '\0') {
2093                         fprintf(stderr, "error: %s: bad mode '%s'\n",
2094                                 argv[0], mode_opt);
2095                         return CMD_HELP;
2096                 }
2097                 previous_mode = umask(0);
2098         }
2099
2100         if (stripe_hash_opt == NULL ||
2101             strcmp(stripe_hash_opt, LMV_HASH_NAME_FNV_1A_64) == 0) {
2102                 hash_type = LMV_HASH_TYPE_FNV_1A_64;
2103         } else if (strcmp(stripe_hash_opt, LMV_HASH_NAME_ALL_CHARS) == 0) {
2104                 hash_type = LMV_HASH_TYPE_ALL_CHARS;
2105         } else {
2106                 fprintf(stderr, "error: %s: bad stripe hash type '%s'\n",
2107                         argv[0], stripe_hash_opt);
2108                 return CMD_HELP;
2109         }
2110
2111         /* get the stripe count */
2112         if (stripe_count_opt != NULL) {
2113                 stripe_count = strtoul(stripe_count_opt, &end, 0);
2114                 if (*end != '\0') {
2115                         fprintf(stderr, "error: %s: bad stripe count '%s'\n",
2116                                 argv[0], stripe_count_opt);
2117                         return CMD_HELP;
2118                 }
2119         }
2120
2121         dname = argv[optind];
2122         do {
2123                 if (default_stripe) {
2124                         result = llapi_dir_set_default_lmv_stripe(dname,
2125                                                     stripe_offset, stripe_count,
2126                                                     hash_type, NULL);
2127                 } else {
2128                         result = llapi_dir_create_pool(dname, mode,
2129                                                        stripe_offset,
2130                                                        stripe_count, hash_type,
2131                                                        NULL);
2132                 }
2133
2134                 if (result) {
2135                         fprintf(stderr, "error: %s: create stripe dir '%s' "
2136                                 "failed\n", argv[0], dname);
2137                         break;
2138                 }
2139                 dname = argv[++optind];
2140         } while (dname != NULL);
2141
2142         if (mode_opt != NULL)
2143                 umask(previous_mode);
2144
2145         return result;
2146 }
2147
2148 /* functions */
2149 static int lfs_rmentry(int argc, char **argv)
2150 {
2151         char *dname;
2152         int   index;
2153         int   result = 0;
2154
2155         if (argc <= 1) {
2156                 fprintf(stderr, "error: %s: missing dirname\n",
2157                         argv[0]);
2158                 return CMD_HELP;
2159         }
2160
2161         index = 1;
2162         dname = argv[index];
2163         while (dname != NULL) {
2164                 result = llapi_direntry_remove(dname);
2165                 if (result) {
2166                         fprintf(stderr, "error: %s: remove dir entry '%s' "
2167                                 "failed\n", argv[0], dname);
2168                         break;
2169                 }
2170                 dname = argv[++index];
2171         }
2172         return result;
2173 }
2174
2175 static int lfs_mv(int argc, char **argv)
2176 {
2177         struct  find_param param = {
2178                 .fp_max_depth = -1,
2179                 .fp_mdt_index = -1,
2180         };
2181         char   *end;
2182         int     c;
2183         int     rc = 0;
2184         struct option long_opts[] = {
2185                 {"mdt-index", required_argument, 0, 'M'},
2186                 {"verbose",     no_argument,       0, 'v'},
2187                 {0, 0, 0, 0}
2188         };
2189
2190         while ((c = getopt_long(argc, argv, "M:v", long_opts, NULL)) != -1) {
2191                 switch (c) {
2192                 case 'M': {
2193                         param.fp_mdt_index = strtoul(optarg, &end, 0);
2194                         if (*end != '\0') {
2195                                 fprintf(stderr, "%s: invalid MDT index'%s'\n",
2196                                         argv[0], optarg);
2197                                 return CMD_HELP;
2198                         }
2199                         break;
2200                 }
2201                 case 'v': {
2202                         param.fp_verbose = VERBOSE_DETAIL;
2203                         break;
2204                 }
2205                 default:
2206                         fprintf(stderr, "error: %s: unrecognized option '%s'\n",
2207                                 argv[0], argv[optind - 1]);
2208                         return CMD_HELP;
2209                 }
2210         }
2211
2212         if (param.fp_mdt_index == -1) {
2213                 fprintf(stderr, "%s: MDT index must be specified\n", argv[0]);
2214                 return CMD_HELP;
2215         }
2216
2217         if (optind >= argc) {
2218                 fprintf(stderr, "%s: missing operand path\n", argv[0]);
2219                 return CMD_HELP;
2220         }
2221
2222         param.fp_migrate = 1;
2223         rc = llapi_migrate_mdt(argv[optind], &param);
2224         if (rc != 0)
2225                 fprintf(stderr, "%s: cannot migrate '%s' to MDT%04x: %s\n",
2226                         argv[0], argv[optind], param.fp_mdt_index,
2227                         strerror(-rc));
2228         return rc;
2229 }
2230
2231 static int lfs_osts(int argc, char **argv)
2232 {
2233         return lfs_tgts(argc, argv);
2234 }
2235
2236 static int lfs_mdts(int argc, char **argv)
2237 {
2238         return lfs_tgts(argc, argv);
2239 }
2240
2241 #define COOK(value)                                                     \
2242 ({                                                                      \
2243         int radix = 0;                                                  \
2244         while (value > 1024) {                                          \
2245                 value /= 1024;                                          \
2246                 radix++;                                                \
2247         }                                                               \
2248         radix;                                                          \
2249 })
2250 #define UUF     "%-20s"
2251 #define CSF     "%11s"
2252 #define CDF     "%11llu"
2253 #define HDF     "%8.1f%c"
2254 #define RSF     "%4s"
2255 #define RDF     "%3d%%"
2256
2257 static int showdf(char *mntdir, struct obd_statfs *stat,
2258                   char *uuid, int ishow, int cooked,
2259                   char *type, int index, int rc)
2260 {
2261         long long avail, used, total;
2262         double ratio = 0;
2263         char *suffix = "KMGTPEZY";
2264         /* Note if we have >2^64 bytes/fs these buffers will need to be grown */
2265         char tbuf[3 * sizeof(__u64)];
2266         char ubuf[3 * sizeof(__u64)];
2267         char abuf[3 * sizeof(__u64)];
2268         char rbuf[3 * sizeof(__u64)];
2269
2270         if (!uuid || !stat)
2271                 return -EINVAL;
2272
2273         switch (rc) {
2274         case 0:
2275                 if (ishow) {
2276                         avail = stat->os_ffree;
2277                         used = stat->os_files - stat->os_ffree;
2278                         total = stat->os_files;
2279                 } else {
2280                         int shift = cooked ? 0 : 10;
2281
2282                         avail = (stat->os_bavail * stat->os_bsize) >> shift;
2283                         used  = ((stat->os_blocks - stat->os_bfree) *
2284                                  stat->os_bsize) >> shift;
2285                         total = (stat->os_blocks * stat->os_bsize) >> shift;
2286                 }
2287
2288                 if ((used + avail) > 0)
2289                         ratio = (double)used / (double)(used + avail);
2290
2291                 if (cooked) {
2292                         int i;
2293                         double cook_val;
2294
2295                         cook_val = (double)total;
2296                         i = COOK(cook_val);
2297                         if (i > 0)
2298                                 sprintf(tbuf, HDF, cook_val, suffix[i - 1]);
2299                         else
2300                                 sprintf(tbuf, CDF, total);
2301
2302                         cook_val = (double)used;
2303                         i = COOK(cook_val);
2304                         if (i > 0)
2305                                 sprintf(ubuf, HDF, cook_val, suffix[i - 1]);
2306                         else
2307                                 sprintf(ubuf, CDF, used);
2308
2309                         cook_val = (double)avail;
2310                         i = COOK(cook_val);
2311                         if (i > 0)
2312                                 sprintf(abuf, HDF, cook_val, suffix[i - 1]);
2313                         else
2314                                 sprintf(abuf, CDF, avail);
2315                 } else {
2316                         sprintf(tbuf, CDF, total);
2317                         sprintf(ubuf, CDF, used);
2318                         sprintf(abuf, CDF, avail);
2319                 }
2320
2321                 sprintf(rbuf, RDF, (int)(ratio * 100 + 0.5));
2322                 printf(UUF" "CSF" "CSF" "CSF" "RSF" %-s",
2323                        uuid, tbuf, ubuf, abuf, rbuf, mntdir);
2324                 if (type)
2325                         printf("[%s:%d]\n", type, index);
2326                 else
2327                         printf("\n");
2328
2329                 break;
2330         case -ENODATA:
2331                 printf(UUF": inactive device\n", uuid);
2332                 break;
2333         default:
2334                 printf(UUF": %s\n", uuid, strerror(-rc));
2335                 break;
2336         }
2337
2338         return 0;
2339 }
2340
2341 struct ll_stat_type {
2342         int   st_op;
2343         char *st_name;
2344 };
2345
2346 static int mntdf(char *mntdir, char *fsname, char *pool, int ishow,
2347                 int cooked, int lazy)
2348 {
2349         struct obd_statfs stat_buf, sum = { .os_bsize = 1 };
2350         struct obd_uuid uuid_buf;
2351         char *poolname = NULL;
2352         struct ll_stat_type types[] = { { LL_STATFS_LMV, "MDT" },
2353                                         { LL_STATFS_LOV, "OST" },
2354                                         { 0, NULL } };
2355         struct ll_stat_type *tp;
2356         __u64 ost_ffree = 0;
2357         __u32 index;
2358         __u32 type;
2359         int rc;
2360
2361         if (pool) {
2362                 poolname = strchr(pool, '.');
2363                 if (poolname != NULL) {
2364                         if (strncmp(fsname, pool, strlen(fsname))) {
2365                                 fprintf(stderr, "filesystem name incorrect\n");
2366                                 return -ENODEV;
2367                         }
2368                         poolname++;
2369                 } else
2370                         poolname = pool;
2371         }
2372
2373         if (ishow)
2374                 printf(UUF" "CSF" "CSF" "CSF" "RSF" %-s\n",
2375                        "UUID", "Inodes", "IUsed", "IFree",
2376                        "IUse%", "Mounted on");
2377         else
2378                 printf(UUF" "CSF" "CSF" "CSF" "RSF" %-s\n",
2379                        "UUID", cooked ? "bytes" : "1K-blocks",
2380                        "Used", "Available", "Use%", "Mounted on");
2381
2382         for (tp = types; tp->st_name != NULL; tp++) {
2383                 for (index = 0; ; index++) {
2384                         memset(&stat_buf, 0, sizeof(struct obd_statfs));
2385                         memset(&uuid_buf, 0, sizeof(struct obd_uuid));
2386                         type = lazy ? tp->st_op | LL_STATFS_NODELAY : tp->st_op;
2387                         rc = llapi_obd_statfs(mntdir, type, index,
2388                                               &stat_buf, &uuid_buf);
2389                         if (rc == -ENODEV)
2390                                 break;
2391
2392                         if (rc == -EAGAIN)
2393                                 continue;
2394
2395                         if (poolname && tp->st_op == LL_STATFS_LOV &&
2396                             llapi_search_ost(fsname, poolname,
2397                                              obd_uuid2str(&uuid_buf)) != 1)
2398                                 continue;
2399
2400                         /* the llapi_obd_statfs() call may have returned with
2401                          * an error, but if it filled in uuid_buf we will at
2402                          * lease use that to print out a message for that OBD.
2403                          * If we didn't get anything in the uuid_buf, then fill
2404                          * it in so that we can print an error message. */
2405                         if (uuid_buf.uuid[0] == '\0')
2406                                 sprintf(uuid_buf.uuid, "%s%04x",
2407                                         tp->st_name, index);
2408                         showdf(mntdir, &stat_buf, obd_uuid2str(&uuid_buf),
2409                                ishow, cooked, tp->st_name, index, rc);
2410
2411                         if (rc == 0) {
2412                                 if (tp->st_op == LL_STATFS_LMV) {
2413                                         sum.os_ffree += stat_buf.os_ffree;
2414                                         sum.os_files += stat_buf.os_files;
2415                                 } else /* if (tp->st_op == LL_STATFS_LOV) */ {
2416                                         sum.os_blocks += stat_buf.os_blocks *
2417                                                 stat_buf.os_bsize;
2418                                         sum.os_bfree  += stat_buf.os_bfree *
2419                                                 stat_buf.os_bsize;
2420                                         sum.os_bavail += stat_buf.os_bavail *
2421                                                 stat_buf.os_bsize;
2422                                         ost_ffree += stat_buf.os_ffree;
2423                                 }
2424                         } else if (rc == -EINVAL || rc == -EFAULT) {
2425                                 break;
2426                         }
2427                 }
2428         }
2429
2430         /* If we don't have as many objects free on the OST as inodes
2431          * on the MDS, we reduce the total number of inodes to
2432          * compensate, so that the "inodes in use" number is correct.
2433          * Matches ll_statfs_internal() so the results are consistent. */
2434         if (ost_ffree < sum.os_ffree) {
2435                 sum.os_files = (sum.os_files - sum.os_ffree) + ost_ffree;
2436                 sum.os_ffree = ost_ffree;
2437         }
2438         printf("\n");
2439         showdf(mntdir, &sum, "filesystem summary:", ishow, cooked, NULL, 0, 0);
2440         printf("\n");
2441         return 0;
2442 }
2443
2444 static int lfs_df(int argc, char **argv)
2445 {
2446         char mntdir[PATH_MAX] = {'\0'}, path[PATH_MAX] = {'\0'};
2447         int ishow = 0, cooked = 0;
2448         int lazy = 0;
2449         int c, rc = 0, index = 0;
2450         char fsname[PATH_MAX] = "", *pool_name = NULL;
2451         struct option long_opts[] = {
2452                 {"pool", required_argument, 0, 'p'},
2453                 {"lazy", 0, 0, 'l'},
2454                 {0, 0, 0, 0}
2455         };
2456
2457         while ((c = getopt_long(argc, argv, "hilp:", long_opts, NULL)) != -1) {
2458                 switch (c) {
2459                 case 'i':
2460                         ishow = 1;
2461                         break;
2462                 case 'h':
2463                         cooked = 1;
2464                         break;
2465                 case 'l':
2466                         lazy = 1;
2467                         break;
2468                 case 'p':
2469                         pool_name = optarg;
2470                         break;
2471                 default:
2472                         return CMD_HELP;
2473                 }
2474         }
2475         if (optind < argc && !realpath(argv[optind], path)) {
2476                 rc = -errno;
2477                 fprintf(stderr, "error: invalid path '%s': %s\n",
2478                         argv[optind], strerror(-rc));
2479                 return rc;
2480         }
2481
2482         while (!llapi_search_mounts(path, index++, mntdir, fsname)) {
2483                 /* Check if we have a mount point */
2484                 if (mntdir[0] == '\0')
2485                         continue;
2486
2487                 rc = mntdf(mntdir, fsname, pool_name, ishow, cooked, lazy);
2488                 if (rc || path[0] != '\0')
2489                         break;
2490                 fsname[0] = '\0'; /* avoid matching in next loop */
2491                 mntdir[0] = '\0'; /* avoid matching in next loop */
2492         }
2493
2494         return rc;
2495 }
2496
2497 static int lfs_getname(int argc, char **argv)
2498 {
2499         char mntdir[PATH_MAX] = "", path[PATH_MAX] = "", fsname[PATH_MAX] = "";
2500         int rc = 0, index = 0, c;
2501         char buf[sizeof(struct obd_uuid)];
2502
2503         while ((c = getopt(argc, argv, "h")) != -1)
2504                 return CMD_HELP;
2505
2506         if (optind == argc) { /* no paths specified, get all paths. */
2507                 while (!llapi_search_mounts(path, index++, mntdir, fsname)) {
2508                         rc = llapi_getname(mntdir, buf, sizeof(buf));
2509                         if (rc < 0) {
2510                                 fprintf(stderr,
2511                                         "cannot get name for `%s': %s\n",
2512                                         mntdir, strerror(-rc));
2513                                 break;
2514                         }
2515
2516                         printf("%s %s\n", buf, mntdir);
2517
2518                         path[0] = fsname[0] = mntdir[0] = 0;
2519                 }
2520         } else { /* paths specified, only attempt to search these. */
2521                 for (; optind < argc; optind++) {
2522                         rc = llapi_getname(argv[optind], buf, sizeof(buf));
2523                         if (rc < 0) {
2524                                 fprintf(stderr,
2525                                         "cannot get name for `%s': %s\n",
2526                                         argv[optind], strerror(-rc));
2527                                 break;
2528                         }
2529
2530                         printf("%s %s\n", buf, argv[optind]);
2531                 }
2532         }
2533         return rc;
2534 }
2535
2536 static int lfs_check(int argc, char **argv)
2537 {
2538         int rc;
2539         char mntdir[PATH_MAX] = {'\0'};
2540         int num_types = 1;
2541         char *obd_types[2];
2542         char obd_type1[4];
2543         char obd_type2[4];
2544
2545         if (argc != 2)
2546                 return CMD_HELP;
2547
2548         obd_types[0] = obd_type1;
2549         obd_types[1] = obd_type2;
2550
2551         if (strcmp(argv[1], "osts") == 0) {
2552                 strcpy(obd_types[0], "osc");
2553         } else if (strcmp(argv[1], "mds") == 0) {
2554                 strcpy(obd_types[0], "mdc");
2555         } else if (strcmp(argv[1], "servers") == 0) {
2556                 num_types = 2;
2557                 strcpy(obd_types[0], "osc");
2558                 strcpy(obd_types[1], "mdc");
2559         } else {
2560                 fprintf(stderr, "error: %s: option '%s' unrecognized\n",
2561                                 argv[0], argv[1]);
2562                         return CMD_HELP;
2563         }
2564
2565         rc = llapi_search_mounts(NULL, 0, mntdir, NULL);
2566         if (rc < 0 || mntdir[0] == '\0') {
2567                 fprintf(stderr, "No suitable Lustre mount found\n");
2568                 return rc;
2569         }
2570
2571         rc = llapi_target_check(num_types, obd_types, mntdir);
2572         if (rc)
2573                 fprintf(stderr, "error: %s: %s status failed\n",
2574                                 argv[0],argv[1]);
2575
2576         return rc;
2577
2578 }
2579
2580 static int lfs_join(int argc, char **argv)
2581 {
2582         fprintf(stderr, "join two lustre files into one.\n"
2583                         "obsolete, HEAD does not support it anymore.\n");
2584         return 0;
2585 }
2586
2587 #ifdef HAVE_SYS_QUOTA_H
2588 #define ARG2INT(nr, str, msg)                                           \
2589 do {                                                                    \
2590         char *endp;                                                     \
2591         nr = strtol(str, &endp, 0);                                     \
2592         if (*endp) {                                                    \
2593                 fprintf(stderr, "error: bad %s: %s\n", msg, str);       \
2594                 return CMD_HELP;                                        \
2595         }                                                               \
2596 } while (0)
2597
2598 #define ADD_OVERFLOW(a,b) ((a + b) < a) ? (a = ULONG_MAX) : (a = a + b)
2599
2600 /* Convert format time string "XXwXXdXXhXXmXXs" into seconds value
2601  * returns the value or ULONG_MAX on integer overflow or incorrect format
2602  * Notes:
2603  *        1. the order of specifiers is arbitrary (may be: 5w3s or 3s5w)
2604  *        2. specifiers may be encountered multiple times (2s3s is 5 seconds)
2605  *        3. empty integer value is interpreted as 0
2606  */
2607 static unsigned long str2sec(const char* timestr)
2608 {
2609         const char spec[] = "smhdw";
2610         const unsigned long mult[] = {1, 60, 60*60, 24*60*60, 7*24*60*60};
2611         unsigned long val = 0;
2612         char *tail;
2613
2614         if (strpbrk(timestr, spec) == NULL) {
2615                 /* no specifiers inside the time string,
2616                    should treat it as an integer value */
2617                 val = strtoul(timestr, &tail, 10);
2618                 return *tail ? ULONG_MAX : val;
2619         }
2620
2621         /* format string is XXwXXdXXhXXmXXs */
2622         while (*timestr) {
2623                 unsigned long v;
2624                 int ind;
2625                 char* ptr;
2626
2627                 v = strtoul(timestr, &tail, 10);
2628                 if (v == ULONG_MAX || *tail == '\0')
2629                         /* value too large (ULONG_MAX or more)
2630                            or missing specifier */
2631                         goto error;
2632
2633                 ptr = strchr(spec, *tail);
2634                 if (ptr == NULL)
2635                         /* unknown specifier */
2636                         goto error;
2637
2638                 ind = ptr - spec;
2639
2640                 /* check if product will overflow the type */
2641                 if (!(v < ULONG_MAX / mult[ind]))
2642                         goto error;
2643
2644                 ADD_OVERFLOW(val, mult[ind] * v);
2645                 if (val == ULONG_MAX)
2646                         goto error;
2647
2648                 timestr = tail + 1;
2649         }
2650
2651         return val;
2652
2653 error:
2654         return ULONG_MAX;
2655 }
2656
2657 #define ARG2ULL(nr, str, def_units)                                     \
2658 do {                                                                    \
2659         unsigned long long limit, units = def_units;                    \
2660         int rc;                                                         \
2661                                                                         \
2662         rc = llapi_parse_size(str, &limit, &units, 1);                  \
2663         if (rc < 0) {                                                   \
2664                 fprintf(stderr, "error: bad limit value %s\n", str);    \
2665                 return CMD_HELP;                                        \
2666         }                                                               \
2667         nr = limit;                                                     \
2668 } while (0)
2669
2670 static inline int has_times_option(int argc, char **argv)
2671 {
2672         int i;
2673
2674         for (i = 1; i < argc; i++)
2675                 if (!strcmp(argv[i], "-t"))
2676                         return 1;
2677
2678         return 0;
2679 }
2680
2681 int lfs_setquota_times(int argc, char **argv)
2682 {
2683         int c, rc;
2684         struct if_quotactl qctl;
2685         char *mnt, *obd_type = (char *)qctl.obd_type;
2686         struct obd_dqblk *dqb = &qctl.qc_dqblk;
2687         struct obd_dqinfo *dqi = &qctl.qc_dqinfo;
2688         struct option long_opts[] = {
2689                 {"block-grace",     required_argument, 0, 'b'},
2690                 {"group",           no_argument,       0, 'g'},
2691                 {"inode-grace",     required_argument, 0, 'i'},
2692                 {"times",           no_argument,       0, 't'},
2693                 {"user",            no_argument,       0, 'u'},
2694                 {0, 0, 0, 0}
2695         };
2696
2697         memset(&qctl, 0, sizeof(qctl));
2698         qctl.qc_cmd  = LUSTRE_Q_SETINFO;
2699         qctl.qc_type = UGQUOTA;
2700
2701         while ((c = getopt_long(argc, argv, "b:gi:tu", long_opts, NULL)) != -1) {
2702                 switch (c) {
2703                 case 'u':
2704                 case 'g':
2705                         if (qctl.qc_type != UGQUOTA) {
2706                                 fprintf(stderr, "error: -u and -g can't be used "
2707                                                 "more than once\n");
2708                                 return CMD_HELP;
2709                         }
2710                         qctl.qc_type = (c == 'u') ? USRQUOTA : GRPQUOTA;
2711                         break;
2712                 case 'b':
2713                         if ((dqi->dqi_bgrace = str2sec(optarg)) == ULONG_MAX) {
2714                                 fprintf(stderr, "error: bad block-grace: %s\n",
2715                                         optarg);
2716                                 return CMD_HELP;
2717                         }
2718                         dqb->dqb_valid |= QIF_BTIME;
2719                         break;
2720                 case 'i':
2721                         if ((dqi->dqi_igrace = str2sec(optarg)) == ULONG_MAX) {
2722                                 fprintf(stderr, "error: bad inode-grace: %s\n",
2723                                         optarg);
2724                                 return CMD_HELP;
2725                         }
2726                         dqb->dqb_valid |= QIF_ITIME;
2727                         break;
2728                 case 't': /* Yes, of course! */
2729                         break;
2730                 default: /* getopt prints error message for us when opterr != 0 */
2731                         return CMD_HELP;
2732                 }
2733         }
2734
2735         if (qctl.qc_type == UGQUOTA) {
2736                 fprintf(stderr, "error: neither -u nor -g specified\n");
2737                 return CMD_HELP;
2738         }
2739
2740         if (optind != argc - 1) {
2741                 fprintf(stderr, "error: unexpected parameters encountered\n");
2742                 return CMD_HELP;
2743         }
2744
2745         mnt = argv[optind];
2746         rc = llapi_quotactl(mnt, &qctl);
2747         if (rc) {
2748                 if (*obd_type)
2749                         fprintf(stderr, "%s %s ", obd_type,
2750                                 obd_uuid2str(&qctl.obd_uuid));
2751                 fprintf(stderr, "setquota failed: %s\n", strerror(-rc));
2752                 return rc;
2753         }
2754
2755         return 0;
2756 }
2757
2758 #define BSLIMIT (1 << 0)
2759 #define BHLIMIT (1 << 1)
2760 #define ISLIMIT (1 << 2)
2761 #define IHLIMIT (1 << 3)
2762
2763 int lfs_setquota(int argc, char **argv)
2764 {
2765         int c, rc;
2766         struct if_quotactl qctl;
2767         char *mnt, *obd_type = (char *)qctl.obd_type;
2768         struct obd_dqblk *dqb = &qctl.qc_dqblk;
2769         struct option long_opts[] = {
2770                 {"block-softlimit", required_argument, 0, 'b'},
2771                 {"block-hardlimit", required_argument, 0, 'B'},
2772                 {"group",           required_argument, 0, 'g'},
2773                 {"inode-softlimit", required_argument, 0, 'i'},
2774                 {"inode-hardlimit", required_argument, 0, 'I'},
2775                 {"user",            required_argument, 0, 'u'},
2776                 {0, 0, 0, 0}
2777         };
2778         unsigned limit_mask = 0;
2779         char *endptr;
2780
2781         if (has_times_option(argc, argv))
2782                 return lfs_setquota_times(argc, argv);
2783
2784         memset(&qctl, 0, sizeof(qctl));
2785         qctl.qc_cmd  = LUSTRE_Q_SETQUOTA;
2786         qctl.qc_type = UGQUOTA; /* UGQUOTA makes no sense for setquota,
2787                                  * so it can be used as a marker that qc_type
2788                                  * isn't reinitialized from command line */
2789
2790         while ((c = getopt_long(argc, argv, "b:B:g:i:I:u:", long_opts, NULL)) != -1) {
2791                 switch (c) {
2792                 case 'u':
2793                 case 'g':
2794                         if (qctl.qc_type != UGQUOTA) {
2795                                 fprintf(stderr, "error: -u and -g can't be used"
2796                                                 " more than once\n");
2797                                 return CMD_HELP;
2798                         }
2799                         qctl.qc_type = (c == 'u') ? USRQUOTA : GRPQUOTA;
2800                         rc = name2id(&qctl.qc_id, optarg,
2801                                      (qctl.qc_type == USRQUOTA) ? USER : GROUP);
2802                         if (rc) {
2803                                 qctl.qc_id = strtoul(optarg, &endptr, 10);
2804                                 if (*endptr != '\0') {
2805                                         fprintf(stderr, "error: can't find id "
2806                                                 "for name %s\n", optarg);
2807                                         return CMD_HELP;
2808                                 }
2809                         }
2810                         break;
2811                 case 'b':
2812                         ARG2ULL(dqb->dqb_bsoftlimit, optarg, 1024);
2813                         dqb->dqb_bsoftlimit >>= 10;
2814                         limit_mask |= BSLIMIT;
2815                         if (dqb->dqb_bsoftlimit &&
2816                             dqb->dqb_bsoftlimit <= 1024) /* <= 1M? */
2817                                 fprintf(stderr, "warning: block softlimit is "
2818                                         "smaller than the miminal qunit size, "
2819                                         "please see the help of setquota or "
2820                                         "Lustre manual for details.\n");
2821                         break;
2822                 case 'B':
2823                         ARG2ULL(dqb->dqb_bhardlimit, optarg, 1024);
2824                         dqb->dqb_bhardlimit >>= 10;
2825                         limit_mask |= BHLIMIT;
2826                         if (dqb->dqb_bhardlimit &&
2827                             dqb->dqb_bhardlimit <= 1024) /* <= 1M? */
2828                                 fprintf(stderr, "warning: block hardlimit is "
2829                                         "smaller than the miminal qunit size, "
2830                                         "please see the help of setquota or "
2831                                         "Lustre manual for details.\n");
2832                         break;
2833                 case 'i':
2834                         ARG2ULL(dqb->dqb_isoftlimit, optarg, 1);
2835                         limit_mask |= ISLIMIT;
2836                         if (dqb->dqb_isoftlimit &&
2837                             dqb->dqb_isoftlimit <= 1024) /* <= 1K inodes? */
2838                                 fprintf(stderr, "warning: inode softlimit is "
2839                                         "smaller than the miminal qunit size, "
2840                                         "please see the help of setquota or "
2841                                         "Lustre manual for details.\n");
2842                         break;
2843                 case 'I':
2844                         ARG2ULL(dqb->dqb_ihardlimit, optarg, 1);
2845                         limit_mask |= IHLIMIT;
2846                         if (dqb->dqb_ihardlimit &&
2847                             dqb->dqb_ihardlimit <= 1024) /* <= 1K inodes? */
2848                                 fprintf(stderr, "warning: inode hardlimit is "
2849                                         "smaller than the miminal qunit size, "
2850                                         "please see the help of setquota or "
2851                                         "Lustre manual for details.\n");
2852                         break;
2853                 default: /* getopt prints error message for us when opterr != 0 */
2854                         return CMD_HELP;
2855                 }
2856         }
2857
2858         if (qctl.qc_type == UGQUOTA) {
2859                 fprintf(stderr, "error: neither -u nor -g was specified\n");
2860                 return CMD_HELP;
2861         }
2862
2863         if (limit_mask == 0) {
2864                 fprintf(stderr, "error: at least one limit must be specified\n");
2865                 return CMD_HELP;
2866         }
2867
2868         if (optind != argc - 1) {
2869                 fprintf(stderr, "error: unexpected parameters encountered\n");
2870                 return CMD_HELP;
2871         }
2872
2873         mnt = argv[optind];
2874
2875         if ((!(limit_mask & BHLIMIT) ^ !(limit_mask & BSLIMIT)) ||
2876             (!(limit_mask & IHLIMIT) ^ !(limit_mask & ISLIMIT))) {
2877                 /* sigh, we can't just set blimits/ilimits */
2878                 struct if_quotactl tmp_qctl = {.qc_cmd  = LUSTRE_Q_GETQUOTA,
2879                                                .qc_type = qctl.qc_type,
2880                                                .qc_id   = qctl.qc_id};
2881
2882                 rc = llapi_quotactl(mnt, &tmp_qctl);
2883                 if (rc < 0) {
2884                         fprintf(stderr, "error: setquota failed while retrieving"
2885                                         " current quota settings (%s)\n",
2886                                         strerror(-rc));
2887                         return rc;
2888                 }
2889
2890                 if (!(limit_mask & BHLIMIT))
2891                         dqb->dqb_bhardlimit = tmp_qctl.qc_dqblk.dqb_bhardlimit;
2892                 if (!(limit_mask & BSLIMIT))
2893                         dqb->dqb_bsoftlimit = tmp_qctl.qc_dqblk.dqb_bsoftlimit;
2894                 if (!(limit_mask & IHLIMIT))
2895                         dqb->dqb_ihardlimit = tmp_qctl.qc_dqblk.dqb_ihardlimit;
2896                 if (!(limit_mask & ISLIMIT))
2897                         dqb->dqb_isoftlimit = tmp_qctl.qc_dqblk.dqb_isoftlimit;
2898
2899                 /* Keep grace times if we have got no softlimit arguments */
2900                 if ((limit_mask & BHLIMIT) && !(limit_mask & BSLIMIT)) {
2901                         dqb->dqb_valid |= QIF_BTIME;
2902                         dqb->dqb_btime = tmp_qctl.qc_dqblk.dqb_btime;
2903                 }
2904
2905                 if ((limit_mask & IHLIMIT) && !(limit_mask & ISLIMIT)) {
2906                         dqb->dqb_valid |= QIF_ITIME;
2907                         dqb->dqb_itime = tmp_qctl.qc_dqblk.dqb_itime;
2908                 }
2909         }
2910
2911         dqb->dqb_valid |= (limit_mask & (BHLIMIT | BSLIMIT)) ? QIF_BLIMITS : 0;
2912         dqb->dqb_valid |= (limit_mask & (IHLIMIT | ISLIMIT)) ? QIF_ILIMITS : 0;
2913
2914         rc = llapi_quotactl(mnt, &qctl);
2915         if (rc) {
2916                 if (*obd_type)
2917                         fprintf(stderr, "%s %s ", obd_type,
2918                                 obd_uuid2str(&qctl.obd_uuid));
2919                 fprintf(stderr, "setquota failed: %s\n", strerror(-rc));
2920                 return rc;
2921         }
2922
2923         return 0;
2924 }
2925
2926 static inline char *type2name(int check_type)
2927 {
2928         if (check_type == USRQUOTA)
2929                 return "user";
2930         else if (check_type == GRPQUOTA)
2931                 return "group";
2932         else
2933                 return "unknown";
2934 }
2935
2936 /* Converts seconds value into format string
2937  * result is returned in buf
2938  * Notes:
2939  *        1. result is in descenting order: 1w2d3h4m5s
2940  *        2. zero fields are not filled (except for p. 3): 5d1s
2941  *        3. zero seconds value is presented as "0s"
2942  */
2943 static char * __sec2str(time_t seconds, char *buf)
2944 {
2945         const char spec[] = "smhdw";
2946         const unsigned long mult[] = {1, 60, 60*60, 24*60*60, 7*24*60*60};
2947         unsigned long c;
2948         char *tail = buf;
2949         int i;
2950
2951         for (i = sizeof(mult) / sizeof(mult[0]) - 1 ; i >= 0; i--) {
2952                 c = seconds / mult[i];
2953
2954                 if (c > 0 || (i == 0 && buf == tail))
2955                         tail += snprintf(tail, 40-(tail-buf), "%lu%c", c, spec[i]);
2956
2957                 seconds %= mult[i];
2958         }
2959
2960         return tail;
2961 }
2962
2963 static void sec2str(time_t seconds, char *buf, int rc)
2964 {
2965         char *tail = buf;
2966
2967         if (rc)
2968                 *tail++ = '[';
2969
2970         tail = __sec2str(seconds, tail);
2971
2972         if (rc && tail - buf < 39) {
2973            &nb