From 9f34d10016fea2eeef6a3286f3eaebc85904f86f Mon Sep 17 00:00:00 2001 From: adilger Date: Sat, 15 Mar 2008 12:27:55 +0000 Subject: [PATCH] Branch b1_6 Add a proper man page for llverdev.8 and llbackup.8. b=14711 --- lustre/doc/Makefile.am | 7 +- lustre/doc/llbackup.8 | 212 ++++++++++++++++++++++++++++++++++++++++++++++++ lustre/doc/llverdev.8 | 103 +++++++++++++++++++++++ lustre/doc/llverdev.txt | 48 ----------- 4 files changed, 319 insertions(+), 51 deletions(-) create mode 100644 lustre/doc/llbackup.8 create mode 100644 lustre/doc/llverdev.8 delete mode 100644 lustre/doc/llverdev.txt diff --git a/lustre/doc/Makefile.am b/lustre/doc/Makefile.am index 1d02c60..dd2cdb9 100644 --- a/lustre/doc/Makefile.am +++ b/lustre/doc/Makefile.am @@ -14,8 +14,10 @@ PS2PDF = ps2pdf TEXEXPAND = texexpand SUFFIXES = .lin .lyx .pdf .ps .sgml .html .txt .tex .fig .eps .dvi +MANFILES = lustre.7 lfs.1 mount.lustre.8 mkfs.lustre.8 tunefs.lustre.8 lctl.8 \ + llverdev.8 if UTILS -man_MANS = lustre.7 lfs.1 mount.lustre.8 mkfs.lustre.8 tunefs.lustre.8 lctl.8 +man_MANS = $(MANFILES) endif LYXFILES= $(filter-out $(patsubst %.lin,%.lyx,$(wildcard *.lin)),\ @@ -23,8 +25,7 @@ LYXFILES= $(filter-out $(patsubst %.lin,%.lyx,$(wildcard *.lin)),\ CLEANFILES = *.aux *.tex *.log *.pdf -EXTRA_DIST = tex2pdf lustre.7 mount.lustre.8 mkfs.lustre.8 tunefs.lustre.8 \ - $(LYXFILES) lfs.1 lctl.8 +EXTRA_DIST = tex2pdf $(MANFILES) $(LYXFILES) all: diff --git a/lustre/doc/llbackup.8 b/lustre/doc/llbackup.8 new file mode 100644 index 0000000..7e99fb6 --- /dev/null +++ b/lustre/doc/llbackup.8 @@ -0,0 +1,212 @@ +.\" -*- nroff -*- +.\" Copyright 2008 by Sun Microsystems. All Rights Reserved. +.\" This file may be copied under the terms of the GNU Public License, v2. +.\" +.TH llbackup 8 "2008 Mar 15" Lustre "System management commands" +.SH NAME +llbackup \- backup a list of files, maybe running on multiple nodes +.SH SYNOPSIS +.B llbackup +.RB [ -chjvxz "] [" -C +.IR directory ] +.RB [ -e +.IR rsh ] +.RB [ -i +.IR inputlist ] +.RB [ -l +.IR logdir ] +.RB [ -n +.IR nodes ] +.RB [ -s +.IR splitmb ] +.RB [ -T +.IR tar ] +.BI -f outputfilebase +.SH DESCRIPTION +.B llbackup +takes a list of files to backup or restore, either from standard input, or +from a list of files supplied as a parameter and produces one or more archive +files containing the files. +.B llbackup +uses the +.BR tar (1) +or +.BR htar (1) +programs to produce archive files, and has many of the same options (though +not identical in some cases). The reason for using llbackup instead of those +programs directly is that llbackup can run in parallel on multiple +nodes to do high-speed backups on clustered or distributed filesystems and +will split the work among multiple client nodes to increase performance. +The archive files produced are identical to normal tar files (having been +created by the local tar command) and can be extracted or listed by tar +directly. +.br +The clients will pause and print status messages if they think there is not +enough free space in the output directory to avoid running out of space. +Backup tarfiles should be moved to offline storage or otherwise purged to +allow the backups to continue. If the client is doing a restore it will +similarly pause and print status messages if the tarfile it was asked to +restore is not available on that node. +.sh OPTIONS +.TP +.B -c +create archive. This is the default operation if no other is given. +.TP +.BI -C directory +change the backup command to run in +.I directory +instead of the current working directory so filenames given in the input +file are backed up or restored relative to +.IR directory . +Note that this affects all pathname components used during backup and restore +so the +.I inputlist +and +.I outputbase +should be specified by absolute pathname if they are not also in +.IR directory . +.TP +.BI -e rsh +specify the passwordless remote shell command (default +.BR ssh (1)). +.TP +.BI -f outputbase +specify base output filename for backup tar archives, to which a suffix will +be appended to ensure that multiple clients doing backup into the same +directory do not have conflicting filenames. For restore or list operations +this is a file containing a list of tarfiles, one per line, to restore or +list the table of contents of. The +.I outputbase +file can be a tape or disk device, but must be the same device on each client +running the backup, in which case only a single process can run on each client. +.TP +.B -h +print this help message and exit. Use +.B -x +.B -h +or +.B -t +.B -h +to get the help options for restore or list operations. +.TP +.BI -i inputlist +list of files to backup when creating an archive (default stdin). The input +file list is just a list of pathnames, one file per line, as generated by +.BR find (8), +.BR "lfs find" (8), +or +.BR e2scan (8) +for example. For restore this option is currently unused. +.TP +.B -j +use +.BR bzip2 (1) +compression on input or output tar file(s). This option is not +currently supported by the underlying +.BR htar (8) +implementation. +.TP +.BI -l logdir +directory for output logs. +.TP +.BI -n nodes +comma-separated list of client nodes to run backups. It is possible to +specify the same client node multiple times in order to have multiple +instances of the backup or restore running in parallel on that node. +This is useful to maximize the bandwidth of the backup or restore process +and avoid client nodes underutilizing their bandwidth while generating +file lists or compressing files. +.TP +.BI -s splitmb +target size for backup chunks in megabytes (default 8192MiB). Increasing this +size can improve performance and compression ratios, but may cause +underutilization of clients if the files are not evenly distributed. +This option has no meaning for restore or list operations. +.TP +.BI -S splitcount +number of files sent to each client before sending files to the next client +(default 200). Increasing the +.I splitcount +keeps more related files together in individual tar files (meaning fewer +archives need to be processed when restoring files for a particular +subdirectory), but can lead to performance imbalance if files vary greatly +in size and one client has to back up many of them due to proximity of +filenames in the input list, while other clients back up smaller files. +Decreasing the +.I splitcount +means a more uniform distribution of files and usage among clients, but +means nearly every archive will have to be extracted to get only a subset +of files needed. +This option has no meaning for restore or list operations. +.TP +.B -t +list table of contents of the list of tarfiles given by +.BR -f . +.TP +.BI -T tar +specify the backup command (default tar). Optionally the +.BI htar (8) +command can be used to back up directly to an HPSS archive, if available. +.TP +.B -v +be verbose - list all files being processed. By default, individual files +being backed up or restored are only reported in the log files because they +would all be intermixed coming from multiple clients in parallel and the +overall progress would be hard to determine. +.TP +.B -V +print version number and exit. +.TP +.B -x +extract tarfiles from the list of tarfiles given by +.B -f +instead of backing them up. +.TP +.B -z +use gzip compression on input or output tar file(s). This option is not +currently supported by the underlying +.BR htar (8) +implementation. +.SH EXAMPLES +.TP +.B lfs find /home -mtime -7 | llbackup -c -n c1,c2 -f /bkup/bk2008-03-15 +Create a backup of the +.B /home +filesystem finding files modified in the last 7 days, compress the output +using +.B gzip +and run on client nodes +.B c1 +nodes, putting the output tar files into the +.B /bkup +directory using filenames starting with bk2008-03-15: +.TP +.B find /bkup -name bk2007-11-28* | llbackup -x -n c1,c1,c2,c2 -C /tmp -f- +Restore +.B gzip +compressed files from the +.B bk2007-11-28 +backup from the +.B /bkup +directory, using two restore processes on each of the client nodes +.B c1 +and +.BR c2 , +reading the list of tar files to restore from standard input and restoring +into the +.B /tmp +directory: +.SH BUGS +It is currently not possible to extract only a subset of files from the +tarfiles. If that is necessary, then tar should be run directly on the +individual tar files after checking in the backup logs which tarfiles +have the relevant files. +.SH SEE ALSO +.BR lustre (7), +.BR tar (8), +.BR htar (8), +.BR ssh (1), +.BR rsh (1), +.BR find (1), +.BR lfs "(1) " find , +.BR e2scan (8). diff --git a/lustre/doc/llverdev.8 b/lustre/doc/llverdev.8 new file mode 100644 index 0000000..fb41540 --- /dev/null +++ b/lustre/doc/llverdev.8 @@ -0,0 +1,103 @@ +.\" -*- nroff -*- +.\" Copyright 2006 by Cluster FileSystems. All Rights Reserved. +.\" Copyright 2008 by Sun Microsystems. All Rights Reserved. +.\" This file may be copied under the terms of the GNU Public License, v2. +.\" +.TH llverdev 8 "2008 Mar 15" Lustre "configuration utilities" +.SH NAME +llverdev - verify a block device is functioning properly over its full size +.SH SYNOPSIS +.BI llverdev +.RB [ -c +.IR chunksize ] +.RB [ -f "] [" -h ] +.RB [ -o +.IR offset ] +.RB [ -l "] [" -p "] [" -r ] +.RB [ -t +.IR timestamp ] +.RB [ -v "] [" -w ] +.I device +.SH DESCRIPTION +Sometimes kernel drivers or hardware devices have bugs that prevent them from +accessing the full device size correctly, or possibly have bad sectors on disk +or other problems which prevent proper data storage. There are often defects +associated with major system boundaries such as 2^32 bytes, 2^31 sectors, +2^31 blocks, 2^32 blocks, etc. +.PP +The +.B llverdev +tool will write and verify a unique test pattern across the entire device in +order to ensure that not only is data accessible after it was written, but +also that data written to one part of the disk is not overwriting data on +another part of the disk. +.PP +It is expected that llverdev tool will be run on large size devices (TB), +so it is always better to run llverdev tool in verbose mode, so that one +can easily restart device testing from the point at which it had stopped. +.PP +Running a full verification can be time consuming for very large devices, +so it is advisable to start with a partial verification to ensure the +device is minimally sane before investing the time in a full verification. +.SH OPTIONS +.TP +.BR -c | --chunksize +IO chunk size in bytes (default=1048576). +.TP +.BR -f | --force +force test to run without confirmation that the device will be overwritten +and all data therein will be permanently destroyed. +.TP +.BR -h | --help +display a brief help message. +.TP +.BI -o offset +offset in kilobytes of start of test (default=0). +.TP +.BR -l | --long +Run a full check, writing and then reading and verifying every block on the +disk. +.TP +.BR -p | --partial +Run a partial check, only doing periodic checks across the device (1GB steps). +.TP +.BR -r | --read +Run test in read (verify) mode only, after having run the test in +.B -w +mode previously. +.TP +.BI -t timestamp +Set test start time as printed at the start of a previously interrupted test +to ensure that the validation data is the same across the whole filesystem +(default=current time()) +.TP +.BR -v | --verbose +Run test in verbose mode, listing each read and write operation. +.TP +.BR -w | --write +Run test in write (test-pattern) mode (default run both read and write) +.SH EXAMPLES +.TP +Run a partial device verification on \fB/dev/sda\fR: +.B llverdev -v -p /dev/sda +.br +llverdev: permanently overwrite all data on /dev/sda (yes/no)? y +.br +llverdev: /dev/sda is 4398046511104 bytes (4096.0 GB) in size +.br +Timestamp: 1009839028 +.br +Current write offset: 4096 kB +.TP +Continue an interrupted verification at offset \fB4096\fRkB from the start of the device, using the same timestamp as the previous run: +.B llverdev -f -v -p --offset=4096 --timestamp=1009839028 /dev/sda +.br +llverdev: /dev/sda is 4398046511104 bytes (4096.0 GB) in size +.br +Timestamp: 1009839028 +.br +write complete +.br +read complete +.SH SEE ALSO +.BR llverfs (8) diff --git a/lustre/doc/llverdev.txt b/lustre/doc/llverdev.txt deleted file mode 100644 index dd0e150..0000000 --- a/lustre/doc/llverdev.txt +++ /dev/null @@ -1,48 +0,0 @@ -BLOCK DEVICE VERIFICATION TOOL. ( bdevt ) -========================================== - -Building tool: - To build this tool you just need to invoke make at command prompt. - e.g. $ make - - this will compile the sources and build bdevt in this directory. - -Usage: -Syntax: - -./bdevt [OPTION]... ... - -[OPTION] - -t {seconds} for --timestamp, set test time (default=current time()) - -o {offset} for --offset, offset in kB of start of test (default=0) - -r run test in read (verify) mode - -w run test in write (test-pattern) mode (default=r&w) - -v for verbose - -p for --partial, for partial check (1GB steps) - -l for --long, full check (default 4k) - -c for --chunksize, IO chunk size (default=1048576) - -f for --force, force test to run without confirmation - --help to display help. - -Guide lines for using this tool: - It is expected that bdevt tool will be run on large size devices (TB), -So it is always better to run bdevt tool in verbose mode, So that one can easily -restart device testing from the point at which it had stoped. -for example: - - [root@tucker bdevt]# ./bdevt -v -f -w --timestamp=1009839028 /dev/hda5 - Number of sectors: 49158837, this makes 23.441 GB - Timestamp: 1009839028 - Current write offset: 5078016 kB - -If due to some reason sombody breaks execution at this point then one can -easily restart device from the same point by picking the same offset -displayed in by verbose as explained below. - - [root@tucker bdevt]# ./bdevt -v -f -w --offset=5078016 --timestamp=1009839028 /dev/hda5 - Number of sectors: 49158837, this makes 23.441 GB - Timestamp: 1009839028 - Current write offset: 9726208 kB - -One can use similar things for read only and read write modes also. - -- 1.8.3.1