From 7e84018b0acf33b5f0375801d7763011511e53a7 Mon Sep 17 00:00:00 2001 From: manoj Date: Fri, 29 May 2009 19:56:08 +0000 Subject: [PATCH] b=16855 r=nathan.rutman r=vladimir.saveliev lreplicate is a tool that uses metadata server changelogs to replicate a lustre filesystem --- lustre/doc/lreplicate.8 | 173 ++++ lustre/include/lustre/Makefile.am | 6 +- lustre/include/lustre/liblustreapi.h | 1 + lustre/include/lustre/lreplicate.h | 68 ++ lustre/tests/Makefile.am | 1 + lustre/tests/acceptance-small.sh | 13 +- lustre/tests/lreplicate-test.sh | 563 ++++++++++++ lustre/utils/Makefile.am | 6 +- lustre/utils/liblustreapi.c | 6 +- lustre/utils/lreplicate.c | 1644 ++++++++++++++++++++++++++++++++++ 10 files changed, 2476 insertions(+), 5 deletions(-) create mode 100644 lustre/doc/lreplicate.8 create mode 100644 lustre/include/lustre/lreplicate.h create mode 100644 lustre/tests/lreplicate-test.sh create mode 100644 lustre/utils/lreplicate.c diff --git a/lustre/doc/lreplicate.8 b/lustre/doc/lreplicate.8 new file mode 100644 index 0000000..7639f59 --- /dev/null +++ b/lustre/doc/lreplicate.8 @@ -0,0 +1,173 @@ +.TH lreplicate 8 "2009 Apr 08" Lustre "Lustre Filesystem replication utility" +.SH NAME +lreplicate \- Utility to replicate a Lustre Filesystem +.SH SYNOPSIS +.br +.B lreplicate --source|-s --target|-t +.br +.B\t\t\t --mdt|-m --user|-u --xattr|-x +.br +.B\t\t\t --verbose|-v --statuslog|-l --dry-run +.br + +.br +.B lreplicate --statuslog|-l +.br + +.br +.B lreplicate --statuslog|-l --source|-s +.br +.br +.B\t\t\t --target|-t --mdt|-m +.SH DESCRIPTION +.B lreplicate +can be used to replicate a lustre filesystem (source filesystem) to +another target filesystem (any filesystem type). It is required that +changelogs be enabled on the source filesystem (see lctl (8)). + +The source and the target filesystems must be identical before +changelogs are enabled. If the source filesystem has been populated +before turning on changelogs, a utility like rsync may be used to make +them identical. + +.SH OPTIONS +.B --source= +.br +The source filesytem which will be replicated. Mandatory if a valid +statuslog created during an previous replication operation +(--statuslog) is not specified. + +.B --target= +.br +The filesystem to which the source filesystem is replicated. Mandatory +if a valid statuslog created during an previous replication operation +(--statuslog) is not specified. This option can be repeated if +multiple replication targets are desired. + +.B --mdt= +.br +The metadata device which is to be replicated. Changelogs must be +turned on on this device. Mandatory if a valid statuslog created +during an previous replication operation (--statuslog) is not +specified. + +.B --user= +.br +The changelog user id. See lctl(8) changelog_register. Mandatory if a +valid statuslog created during an previous replication operation +(--statuslog) is not specified. + +.B --statuslog= +.br +A status log file to which the status of replication is saved. At the +time of initialization, the state from a previous replication +operation which was saved, can be read and reused. + +If a statuslog from a previous replication operation is specified, the +otherwise mandatory options like --source, --target and --mdt may be +skipped. + +By specifying the options like --source, --target and --mdt in +addition to the --statuslog option, the parameters in the statuslog +can be overridden. The command line options take precedence over the +ones from the statuslog. + +.B --xattr +.br +Specify whether extended attributes are replicated or not. The default +is to replicate extended attributes. Disabling xattrs will mean that +striping information will not be replicated. + +.B --verbose +.br +Produce a verbose output. + +.B --dry-run +.br +Shows what the program would do without actually replicating data. + +.SH EXAMPLES + +.TP +Register a changelog consumer for MDT lustre-MDT0000 +$ ssh $MDS lctl changelog_register --device lustre-MDT0000 -n +.br +1 + +.TP +Replicate the lustre filesystem /mnt/lustre to /mnt/target. +$ lreplicate --source=/mnt/lustre --target=/mnt/target \\ +.br + --mdt=lustre-MDT0000 --user=1 \\ +.br + --statuslog replicate.log --verbose +.br +Lustre filesystem: lustre +.br +MDT device: lustre-MDT0000 +.br +Source: /mnt/lustre +.br +Target: /mnt/target +.br +Statuslog: replicate.log +.br +Changelog registration: cl1 +.br +Starting changelog record: 0 +.br +Errors: 0 +.br +lreplicate took 1 seconds +.br +Changelog records consumed: 22 +.br + + +.TP +After the filesystem undergoes some changes, replicate the \ +changes. Only the statuslog needs to be specified as it has all the \ +parameters passed earlier. +.br +$ lreplicate --statuslog replicate.log --verbose +.br +Replicating Lustre filesystem: lustre +.br +MDT device: lustre-MDT0000 +.br +Source: /mnt/lustre +.br +Target: /mnt/target +.br +Statuslog: replicate.log +.br +Changelog registration: cl1 +.br +Starting changelog record: 22 +.br +Errors: 0 +.br +lreplicate took 2 seconds +.br +Changelog records consumed: 42 +.br + +.TP +To replicate the lustre filesystem /mnt/lustre to /mnt/target1 and /mnt/target2. +$ lreplicate --source=/mnt/lustre \\ +.br + --target=/mnt/target1 --target=/mnt/target2 \\ +.br + --mdt=lustre-MDT0000 --user=cl1 +.br + --statuslog replicate.log +.br + + +.SH AUTHOR +The lreplicate command is part of the Lustre filesystem. Contact +http://www.lustre.org/ + +.SH SEE ALSO +.BR lctl (8), +.BR lfs (1) diff --git a/lustre/include/lustre/Makefile.am b/lustre/include/lustre/Makefile.am index 77e9d86..8947cf4 100644 --- a/lustre/include/lustre/Makefile.am +++ b/lustre/include/lustre/Makefile.am @@ -35,7 +35,9 @@ # if UTILS -pkginclude_HEADERS = lustre_idl.h lustre_user.h liblustreapi.h libiam.h ll_fiemap.h +pkginclude_HEADERS = lustre_idl.h lustre_user.h liblustreapi.h libiam.h \ + ll_fiemap.h lreplicate.h endif -EXTRA_DIST = lustre_idl.h lustre_user.h liblustreapi.h libiam.h ll_fiemap.h +EXTRA_DIST = lustre_idl.h lustre_user.h liblustreapi.h libiam.h ll_fiemap.h \ + lreplicate.h diff --git a/lustre/include/lustre/liblustreapi.h b/lustre/include/lustre/liblustreapi.h index f667c00..23f8e3a 100644 --- a/lustre/include/lustre/liblustreapi.h +++ b/lustre/include/lustre/liblustreapi.h @@ -156,6 +156,7 @@ extern int parse_size(char *optarg, unsigned long long *size, unsigned long long *size_units, int bytes_spec); extern int llapi_path2fid(const char *path, unsigned long long *seq, unsigned long *oid, unsigned long *ver); +extern int llapi_search_fsname(const char *pathname, char *fsname); extern void llapi_ping_target(char *obd_type, char *obd_name, char *obd_uuid, void *args); diff --git a/lustre/include/lustre/lreplicate.h b/lustre/include/lustre/lreplicate.h new file mode 100644 index 0000000..c2623c6 --- /dev/null +++ b/lustre/include/lustre/lreplicate.h @@ -0,0 +1,68 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lustre/include/lustre/lreplicate.h + * + */ + +#ifndef _LREPLICATE_H_ +#define _LREPLICATE_H_ + +#define LR_NAME_MAXLEN 64 +#define LR_FID_STR_LEN 128 + +/* Structure used by lreplicate. On-disk structures stored in a log + * file. This is used to determine the next start record and other + * parameters. */ + +struct lreplicate_status { + __u32 ls_version; /* Version of the log entry */ + __u32 ls_size; /* Size of the log entry */ + __u64 ls_last_recno; /* Last replicated record no. */ + char ls_registration[LR_NAME_MAXLEN + 1]; /* Changelog registration*/ + char ls_mdt_device[LR_NAME_MAXLEN + 1]; /* MDT device */ + char ls_source_fs[LR_NAME_MAXLEN + 1]; /* Source Lustre FS */ + char ls_source[PATH_MAX + 1];/* Source FS path */ + __u32 ls_num_targets; /* No of replication targets */ + char ls_targets[0][PATH_MAX + 1]; /* Target FS path */ +}; + +struct lr_parent_child_log { + char pcl_pfid[LR_FID_STR_LEN]; + char pcl_tfid[LR_FID_STR_LEN]; + char pcl_name[PATH_MAX]; +}; + +#endif /* _LREPLICATE_H_ */ diff --git a/lustre/tests/Makefile.am b/lustre/tests/Makefile.am index 237c259..cb76ebc 100644 --- a/lustre/tests/Makefile.am +++ b/lustre/tests/Makefile.am @@ -23,6 +23,7 @@ noinst_SCRIPTS += sanity-sec.sh sanity-gss.sh krb5_login.sh setup_kerberos.sh noinst_SCRIPTS += recovery-mds-scale.sh run_dd.sh run_tar.sh run_iozone.sh noinst_SCRIPTS += run_dbench.sh recovery-double-scale.sh noinst_SCRIPTS += recovery-random-scale.sh parallel-scale.sh +noinst_SCRIPTS += lreplicate-test.sh nobase_noinst_SCRIPTS = cfg/local.sh nobase_noinst_SCRIPTS += acl/make-tree acl/run cfg/ncli.sh nobase_noinst_SCRIPTS += racer/dir_create.sh racer/file_create.sh racer/file_list.sh diff --git a/lustre/tests/acceptance-small.sh b/lustre/tests/acceptance-small.sh index 855ba29..4bb68b7 100755 --- a/lustre/tests/acceptance-small.sh +++ b/lustre/tests/acceptance-small.sh @@ -23,7 +23,7 @@ fi [ "$DEBUG_OFF" ] || DEBUG_OFF="eval lctl set_param debug=\"$DEBUG_LVL\"" [ "$DEBUG_ON" ] || DEBUG_ON="eval lctl set_param debug=0x33f0484" -export TESTSUITE_LIST="RUNTESTS SANITY DBENCH BONNIE IOZONE FSX SANITYN LFSCK LIBLUSTRE RACER REPLAY_SINGLE CONF_SANITY RECOVERY_SMALL REPLAY_OST_SINGLE REPLAY_DUAL REPLAY_VBR INSANITY SANITY_QUOTA SANITY_SEC SANITY_GSS PERFORMANCE_SANITY LARGE_SCALE RECOVERY_MDS_SCALE RECOVERY_DOUBLE_SCALE RECOVERY_RANDOM_SCALE PARALLEL_SCALE" +export TESTSUITE_LIST="RUNTESTS SANITY DBENCH BONNIE IOZONE FSX SANITYN LFSCK LIBLUSTRE RACER REPLAY_SINGLE CONF_SANITY RECOVERY_SMALL REPLAY_OST_SINGLE REPLAY_DUAL REPLAY_VBR INSANITY SANITY_QUOTA SANITY_SEC SANITY_GSS PERFORMANCE_SANITY LARGE_SCALE RECOVERY_MDS_SCALE RECOVERY_DOUBLE_SCALE RECOVERY_RANDOM_SCALE PARALLEL_SCALE LREPLICATE_TEST" if [ "$ACC_SM_ONLY" ]; then for O in $TESTSUITE_LIST; do @@ -435,6 +435,17 @@ if [ "$SANITY_GSS" != "no" ]; then SANITY_GSS="done" fi + +echo replication sanity: $LREPLICATE_TEST +[ "$LREPLICATE_TEST" != "no" ] && skip_remmds lreplicate-test && LREPLICATE_TEST=no && MSKIPPED=1 +[ "$LREPLICATE_TEST" != "no" ] && skip_remost lreplicate-test && LREPLICATE_TEST=no && OSKIPPED=1 +if [ "$LREPLICATE_TEST" != "no" ]; then + title lreplicate-test + bash lreplicate-test.sh + LREPLICATE_TEST="done" +fi + + [ "$SLOW" = no ] && PERFORMANCE_SANITY="no" [ -x "$MDSRATE" ] || PERFORMANCE_SANITY="no" which mpirun > /dev/null 2>&1 || PERFORMANCE_SANITY="no" diff --git a/lustre/tests/lreplicate-test.sh b/lustre/tests/lreplicate-test.sh new file mode 100644 index 0000000..f3918a1 --- /dev/null +++ b/lustre/tests/lreplicate-test.sh @@ -0,0 +1,563 @@ +#!/bin/bash +# +# Run select tests by setting ONLY, or as arguments to the script. +# Skip specific tests by setting EXCEPT. +# +# Run test by setting NOSETUP=true when ltest has setup env for us +set -e + +SRCDIR=`dirname $0` +export PATH=$PWD/$SRCDIR:$SRCDIR:$PWD/$SRCDIR/../utils:$PATH:/sbin + +ONLY=${ONLY:-"$*"} +ALWAYS_EXCEPT="$LREPLICATE_EXCEPT" +# bug number for skipped test: - +# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! + +[ "$ALWAYS_EXCEPT$EXCEPT" ] && \ + echo "Skipping tests: `echo $ALWAYS_EXCEPT $EXCEPT`" + +KILL=/bin/kill + +TMP=${TMP:-/tmp} +LREPL_LOG=$TMP/lreplicate.log +ORIG_PWD=${PWD} + +LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)} +. $LUSTRE/tests/test-framework.sh +init_test_env $@ +. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} + + +REPLLOG=${TESTSUITELOG:-$TMP/$(basename $0 .sh).log} + +[ "$REPLLOG" ] && rm -f $REPLLOG || true + +check_and_setup_lustre + +DIR=${DIR:-$MOUNT} +assert_DIR + + +build_test_filter + +export LREPLICATE=${LREPLICATE:-"$LUSTRE/utils/lreplicate"} +[ ! -f "$LREPLICATE" ] && export LREPLICATE=$(which lreplicate) + +# control the time of tests +DBENCH_TIME=${DBENCH_TIME:-60} # No of seconds to run dbench +TGT=/tmp/target +TGT2=/tmp/target2 +MDT0=$($LCTL get_param -n mdc.*.mds_server_uuid | \ + awk '{gsub(/_UUID/,""); print $1}' | head -1) + +init_changelog() { + CL_USER=$(do_facet $SINGLEMDS lctl --device $MDT0 changelog_register -n) + echo $MDT0: Registered changelog user $CL_USER + CL_USERS=$(( $(do_facet $SINGLEMDS lctl get_param -n \ + mdd.$MDT0.changelog_users | wc -l) - 2 )) + [ $CL_USERS -ne 1 ] && \ + echo "Other changelog users present ($CL_USERS)" +} + +init_src() { + rm -rf $TGT/$tdir $TGT/d*.lreplicate-test 2> /dev/null + rm -rf $TGT2/$tdir $TGT2/d*.lreplicate-test 2> /dev/null + rm -rf ${DIR}/$tdir $DIR/d*.lreplicate-test ${DIR}/tgt 2> /dev/null + rm -f $LREPL_LOG + mkdir -p $TGT + mkdir -p $TGT2 + if [ $? -ne 0 ]; then + error "Failed to create target: " $TGT + fi +} + +cleanup_src_tgt() { + rm -rf $TGT/$tdir + rm -rf $DIR/$tdir + rm -rf $DIR/tgt +} + +fini_changelog() { + $LFS changelog_clear $MDT0 $CL_USER 0 + do_facet $SINGLEMDS lctl --device $MDT0 changelog_deregister $CL_USER +} + +check_xattr() { + local tgt=$1 + local xattr="yes" + touch $tgt + setfattr -n user.foo -v 'bar' $tgt 2> /dev/null + if [ $? -ne 0 ]; then + xattr="no" + fi + rm -f $tgt + echo $xattr +} + +check_diff() { + if [ -e $1 -o -e $2 ]; then + diff -rq -x "dev1" $1 $2 + local RC=$? + if [ $RC -ne 0 ]; then + error "Failure in replication; differences found." + fi + fi +} + +# Test 1 - test basic operations +test_1() { + init_src + init_changelog + local xattr=`check_xattr $TGT/foo` + + # Directory create + mkdir -p ${DIR}/$tdir + mkdir $DIR/$tdir/d1 + mkdir $DIR/$tdir/d2 + + # File create + touch $DIR/$tdir/file1 + cp /etc/hosts $DIR/$tdir/d1/ + touch $DIR/$tdir/d1/"space in filename" + touch $DIR/$tdir/d1/file2 + + # File rename + mv $DIR/$tdir/d1/file2 $DIR/$tdir/d2/file3 + + # File and directory delete + touch $DIR/$tdir/d1/file4 + mkdir $DIR/$tdir/d1/del + touch $DIR/$tdir/d1/del/del1 + touch $DIR/$tdir/d1/del/del2 + rm -rf $DIR/$tdir/d1/del + rm $DIR/$tdir/d1/file4 + + #hard and soft links + cat /etc/hosts > $DIR/$tdir/d1/link1 + ln $DIR/$tdir/d1/link1 $DIR/$tdir/d1/link2 + ln -s $DIR/$tdir/d1/link1 $DIR/$tdir/d1/link3 + + # Device files + mknod $DIR/$tdir/dev1 b 8 1 + + # Replicate + echo "Replication #1" + $LREPLICATE -s $DIR -t $TGT -t $TGT2 -m $MDT0 -u $CL_USER -l $LREPL_LOG -v + + # Set attributes + chmod 000 $DIR/$tdir/d2/file3 + chown nobody:nobody $DIR/$tdir/d2/file3 + + # Set xattrs + if [ "$xattr" == "yes" ]; then + touch $DIR/$tdir/file5 + setfattr -n user.foo -v 'bar' $DIR/$tdir/file5 + fi + + echo "Replication #2" + $LREPLICATE -l $LREPL_LOG -v + + if [ "$xattr" == "yes" ]; then + local xval1=$(getfattr -n user.foo --absolute-names --only-values \ + $TGT/$tdir/file5) + local xval2=$(getfattr -n user.foo --absolute-names --only-values \ + $TGT2/$tdir/file5) + fi + + RC=0 + if [[ ! -b $TGT/$tdir/dev1 ]] || [[ ! -b $TGT2/$tdir/dev1 ]]; then + ls -l $DIR/$tdir/dev1 $TGT/$tdir/dev1 $TGT2/$tdir/dev1 + error "Error replicating block devices" + RC=1 + elif [[ "$xattr" == "yes" ]] && + [[ "$xval1" != "bar" || "$xval2" != "bar" ]]; then + error "Error in replicating xattrs. $xval1, $xval2" + RC=1 + fi + + # Use diff to compare the source and the destination + check_diff $DIR/$tdir $TGT/$tdir + check_diff $DIR/$tdir $TGT2/$tdir + + fini_changelog + cleanup_src_tgt + return $RC + +} +run_test 1 "Simple Replication" + +# Test 2a - Replicate files created by dbench +test_2a() { + [ "$SLOW" = "no" ] && skip "Skipping slow test" && return + init_src + init_changelog + + # Run dbench + sh rundbench -C -D $DIR/$tdir 2 -t $DBENCH_TIME || error "dbench failed!" + + # Replicate the changes to $TGT + $LREPLICATE -s $DIR -t $TGT -t $TGT2 -m $MDT0 -u $CL_USER -l $LREPL_LOG -v + + # Use diff to compare the source and the destination + check_diff $DIR/$tdir $TGT/$tdir + check_diff $DIR/$tdir $TGT2/$tdir + + fini_changelog + cleanup_src_tgt + return 0 +} +run_test 2a "Replicate files created by dbench." + + +# Test 2b - Replicate files changed by dbench. +test_2b() { + [ "$SLOW" = "no" ] && skip "Skipping slow test" && return + + init_src + init_changelog + + # Run dbench + sh rundbench -C -D $DIR/$tdir 2 -t $DBENCH_TIME & + sleep 20 + + local child_pid=$(pgrep dbench) + echo PIDs: $child_pid + echo Stopping dbench + $KILL -SIGSTOP $child_pid + + echo Starting replication + $LREPLICATE -s $DIR -t $TGT -t $TGT2 -m $MDT0 -u $CL_USER -l $LREPL_LOG -v + check_diff $DIR/$tdir $TGT/$tdir + + echo Resuming dbench + $KILL -SIGCONT $child_pid + sleep 10 + + echo Stopping dbench + $KILL -SIGSTOP $child_pid + + echo Starting replication + $LREPLICATE -l $LREPL_LOG -v + check_diff $DIR/$tdir $TGT/$tdir + + echo "Wait for dbench to finish" + $KILL -SIGCONT $child_pid + wait + + # Replicate the changes to $TGT + echo Starting replication + $LREPLICATE -l $LREPL_LOG -v + + check_diff $DIR/$tdir $TGT/$tdir + check_diff $DIR/$tdir $TGT2/$tdir + + fini_changelog + cleanup_src_tgt + return 0 +} +run_test 2b "Replicate files changed by dbench." + +# Test 2c - Replicate files while dbench is running +test_2c() { + [ "$SLOW" = "no" ] && skip "Skipping slow test" && return + init_src + init_changelog + + # Run dbench + sh rundbench -C -D $DIR/$tdir 2 -t $DBENCH_TIME & + + # Replicate the changes to $TGT + sleep 10 # give dbench a headstart + local quit=0 + while [ $quit -le 1 ]; + do + echo "Running lreplicate" + $LREPLICATE -s $DIR -t $TGT -t $TGT2 -m ${mds1_svc} -u $CL_USER -l $LREPL_LOG -v + sleep 5 + pgrep dbench + if [ $? -ne 0 ]; then + quit=$(expr $quit + 1) + fi + done + + # Use diff to compare the source and the destination + check_diff $DIR/$tdir $TGT/$tdir + check_diff $DIR/$tdir $TGT2/$tdir + + fini_changelog + cleanup_src_tgt + return 0 +} +run_test 2c "Replicate files while dbench is running." + +# Test 3a - Replicate files created by createmany +test_3a() { + [ "$SLOW" = "no" ] && skip "Skipping slow test" && return + + init_src + init_changelog + + local numfiles=1000 + mkdir -p ${DIR}/$tdir + createmany -o $DIR/$tdir/$tfile $numfiles || error "createmany failed!" + + # Replicate the changes to $TGT + $LREPLICATE -s $DIR -t $TGT -t $TGT2 -m $MDT0 -u $CL_USER -l $LREPL_LOG -v + check_diff $DIR/$tdir $TGT/$tdir + check_diff $DIR/$tdir $TGT2/$tdir + + fini_changelog + cleanup_src_tgt + return 0 +} +run_test 3a "Replicate files created by createmany" + + +# Test 3b - Replicate files created by writemany +test_3b() { + [ "$SLOW" = "no" ] && skip "Skipping slow test" && return + + init_src + init_changelog + + local time=60 + local threads=5 + mkdir -p ${DIR}/$tdir + writemany -q -a $DIR/$tdir/$tfile $time $threads || error "writemany failed!" + + # Replicate the changes to $TGT + $LREPLICATE -s $DIR -t $TGT -t $TGT2 -m $MDT0 -u $CL_USER -l $LREPL_LOG -v + + check_diff $DIR/$tdir $TGT/$tdir + check_diff $DIR/$tdir $TGT2/$tdir + + fini_changelog + cleanup_src_tgt + return 0 +} +run_test 3b "Replicate files created by writemany" + +# Test 3c - Replicate files created by createmany/unlinkmany +test_3c() { + [ "$SLOW" = "no" ] && skip "Skipping slow test" && return + + init_src + init_changelog + + local numfiles=1000 + mkdir -p ${DIR}/$tdir + createmany -o $DIR/$tdir/$tfile $numfiles || error "createmany failed!" + unlinkmany $DIR/$tdir/$tfile $numfiles || error "unlinkmany failed!" + + # Replicate the changes to $TGT + $LREPLICATE -s $DIR -t $TGT -t $TGT2 -m $MDT0 -u $CL_USER -l $LREPL_LOG -v + check_diff $DIR/$tdir $TGT/$tdir + check_diff $DIR/$tdir $TGT2/$tdir + + fini_changelog + cleanup_src_tgt + return 0 +} +run_test 3c "Replicate files created by createmany/unlinkmany" + +# Test 4 - Replicate files created by iozone +test_4() { + [ "$SLOW" = "no" ] && skip "Skipping slow test" && return + + which iozone > /dev/null 2>&1 + if [ $? -ne 0 ]; then + skip "iozone not found. Skipping test" + return + fi + + init_src + init_changelog + + mkdir -p ${DIR}/$tdir + END_RUN_FILE=${DIR}/$tdir/run LOAD_PID_FILE=${DIR}/$tdir/pid \ + MOUNT=${DIR}/$tdir run_iozone.sh & + sleep 30 + child_pid=$(pgrep iozone) + $KILL -SIGSTOP $child_pid + + # Replicate the changes to $TGT + $LREPLICATE -s $DIR -t $TGT -t $TGT2 -m $MDT0 -u $CL_USER -l $LREPL_LOG -v + check_diff $DIR/$tdir $TGT/$tdir + check_diff $DIR/$tdir $TGT2/$tdir + + $KILL -SIGCONT $child_pid + sleep 60 + $KILL -SIGKILL $child_pid + + $LREPLICATE -l $LREPL_LOG -v + check_diff $DIR/$tdir $TGT/$tdir + check_diff $DIR/$tdir $TGT2/$tdir + + fini_changelog + cleanup_src_tgt + return 0 +} +run_test 4 "Replicate files created by iozone" + +# Test 5a - Stop / start lreplicate +test_5a() { + [ "$SLOW" = "no" ] && skip "Skipping slow test" && return + + init_src + init_changelog + + NUMTEST=2000 + mkdir -p ${DIR}/$tdir + createmany -o $DIR/$tdir/$tfile $NUMTEST + + # Replicate the changes to $TGT + + $LREPLICATE -s $DIR -t $TGT -t $TGT2 -m $MDT0 -u $CL_USER -l $LREPL_LOG -v & + local child_pid=$! + sleep 30 + $KILL -SIGHUP $child_pid + wait + $LREPLICATE -l $LREPL_LOG -v + + check_diff $DIR/$tdir $TGT/$tdir + check_diff $DIR/$tdir $TGT2/$tdir + + fini_changelog + cleanup_src_tgt + return 0 +} +run_test 5a "Stop / start lreplicate" + +# Test 5b - Kill / restart lreplicate +test_5b() { + [ "$SLOW" = "no" ] && skip "Skipping slow test" && return + + init_src + init_changelog + + NUMTEST=2000 + mkdir -p ${DIR}/$tdir + createmany -o $DIR/$tdir/$tfile $NUMTEST + + # Replicate the changes to $TGT + + $LREPLICATE -s $DIR -t $TGT -t $TGT2 -m $MDT0 -u $CL_USER -l $LREPL_LOG -v & + local child_pid=$! + sleep 30 + $KILL -SIGKILL $child_pid + wait + $LREPLICATE -l $LREPL_LOG -v + + check_diff $DIR/$tdir $TGT/$tdir + check_diff $DIR/$tdir $TGT2/$tdir + + fini_changelog + cleanup_src_tgt + return 0 +} +run_test 5b "Kill / restart lreplicate" + +# Test 6 - lreplicate large no of hard links +test_6() { + init_src + init_changelog + + local NUMLINKS=128 + mkdir -p ${DIR}/$tdir + touch $DIR/$tdir/link0 + local i=1 + while [ $i -lt $NUMLINKS ]; + do + ln $DIR/$tdir/link0 $DIR/$tdir/link${i} + i=$(expr $i + 1) + done + + # Replicate the changes to $TGT + $LREPLICATE -s $DIR -t $TGT -t $TGT2 -m $MDT0 -u $CL_USER -l $LREPL_LOG -v + check_diff $DIR/$tdir $TGT/$tdir + check_diff $DIR/$tdir $TGT2/$tdir + + local count1=$(ls -l $TGT/$tdir/link0 | sed -r 's/ +/ /g' | cut -f 2 -d ' ') + local count2=$(ls -l $TGT/$tdir/link0 | sed -r 's/ +/ /g' | cut -f 2 -d ' ') + if [[ $count1 -ne $NUMLINKS ]] || [[ $count2 -ne $NUMLINKS ]]; then + ls -l $TGT/$tdir/link0 $TGT2/$tdir/link0 + error "Incorrect no of hard links found $count1, $count2" + fi + fini_changelog + cleanup_src_tgt + return 0 +} +run_test 6 "lreplicate large no of hard links" + +# Test 7 - lreplicate stripesize +test_7() { + init_src + init_changelog + + local NUMFILES=100 + mkdir -p ${DIR}/$tdir + lfs setstripe -c 2 ${DIR}/$tdir + createmany -o $DIR/$tdir/$tfile $NUMFILES + + # To simulate replication to another lustre filesystem, replicate + # the changes to $DIR/tgt. Disable changelogs before replication + # so that the files created as part of replication are not logged. + do_facet $SINGLEMDS lctl set_param -n mdd.$MDT0.changelog off + mkdir $DIR/tgt + + $LREPLICATE -s $DIR -t $DIR/tgt -m $MDT0 -u $CL_USER -l $LREPL_LOG -v + check_diff ${DIR}/$tdir $DIR/tgt/$tdir + + local i=0 + while [ $i -lt $NUMFILES ]; + do + local count=$(( $(lfs getstripe -q $DIR/tgt/$tdir/${tfile}$i | wc -l) - 1)) + if [ $count -ne 2 ]; then + error "Stripe size not replicated" + fi + i=$(expr $i + 1) + done + fini_changelog + cleanup_src_tgt + return 0 +} +run_test 7 "lreplicate stripesize" + +# Test 8 - Replicate multiple file/directory moves +test_8() { + init_src + init_changelog + + mkdir -p ${DIR}/$tdir + + for i in 1 2 3 4 5 6 7 8 9; do + mkdir $DIR/$tdir/d$i + for j in 1 2 3 4 5 6 7 8 9; do + mkdir $DIR/$tdir/d$i/d$i$j + createmany -o $DIR/$tdir/d$i/d$i$j/a 10 \ + > /dev/null + mv $DIR/$tdir/d$i/d$i$j $DIR/$tdir/d$i/d0$i$j + createmany -o $DIR/$tdir/d$i/d0$i$j/b 10 \ + > /dev/null + mv $DIR/$tdir/d$i/d0$i$j/a0 $DIR/$tdir/d$i/d0$i$j/c0 + done + mv $DIR/$tdir/d$i $DIR/$tdir/d0$i + done + + $LREPLICATE -s $DIR -t $TGT -m $MDT0 -u $CL_USER -l $LREPL_LOG -v + + check_diff ${DIR}/$tdir $TGT/$tdir + + fini_changelog + cleanup_src_tgt + return 0 +} +run_test 8 "Replicate multiple file/directory moves" + +log "cleanup: ======================================================" +cd $ORIG_PWD +check_and_cleanup_lustre +echo '=========================== finished ===============================' +[ -f "$REPLOG" ] && cat $REPLLOG && grep -q FAIL $REPLLOG && exit 1 || true +echo "$0: completed" diff --git a/lustre/utils/Makefile.am b/lustre/utils/Makefile.am index 6b4f57c..c24b253 100644 --- a/lustre/utils/Makefile.am +++ b/lustre/utils/Makefile.am @@ -21,7 +21,7 @@ EXTRA_PROGRAMS = wirecheck rootsbin_PROGRAMS = mount.lustre sbin_PROGRAMS = mkfs.lustre tunefs.lustre lctl wiretest \ l_getidentity llverfs llverdev \ - llog_reader lr_reader lshowmount + llog_reader lr_reader lshowmount lreplicate if LIBPTHREAD sbin_PROGRAMS += loadgen endif @@ -44,6 +44,10 @@ loadgen_SOURCES = loadgen.c lustre_cfg.c obd.c loadgen_LDADD := $(LIBREADLINE) liblustreapi.a $(LIBPTLCTL) $(PTHREAD_LIBS) loadgen_DEPENDENCIES := $(LIBPTLCTL) +lreplicate_SOURCES = lreplicate.c obd.c lustre_cfg.c +lreplicate_LDADD := $(LIBREADLINE) liblustreapi.a $(LIBPTLCTL) +lreplicate_DEPENDENCIES := $(LIBPTLCTL) liblustreapi.a + lshowmount_SOURCES = lshowmount.c nidlist.c nidlist.h if EXT2FS_DEVEL diff --git a/lustre/utils/liblustreapi.c b/lustre/utils/liblustreapi.c index 800bba5..c5c80ee 100644 --- a/lustre/utils/liblustreapi.c +++ b/lustre/utils/liblustreapi.c @@ -397,7 +397,7 @@ static int print_pool_members(char *fs, char *pool_dir, char *pool_file) /* * Resolve lustre fsname from pathname */ -static int search_fsname(char *pathname, char *fsname) +static int search_fsname(const char *pathname, char *fsname) { char *ptr; FILE *fp; @@ -2574,6 +2574,10 @@ static int dev_name2dev(char *name) return data.ioc_dev; } +int llapi_search_fsname(const char *pathname, char *fsname) +{ + return search_fsname(pathname, fsname); +} static void do_get_mdcname(char *obd_type_name, char *obd_name, char *obd_uuid, void *name) diff --git a/lustre/utils/lreplicate.c b/lustre/utils/lreplicate.c new file mode 100644 index 0000000..28b025e --- /dev/null +++ b/lustre/utils/lreplicate.c @@ -0,0 +1,1644 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lustre/utils/lreplicate.c + * + * Author: Kalpak Shah + * Author: Manoj Joseph + */ + +/* + * - lreplicate is a tool for replicating a lustre filesystem. + * + * - The source-fs is a live lustre filesystem. It is not a + * snapshot. It is mounted and undergoing changes + * + * - The target-fs is a copy of the source-fs from the past. Let's + * call this point, the 'sync point'. + * + * - There is a changelog of all metadata operations that happened on + * the filesystem since the 'sync point'. + * + * - lreplicate replicates all the operations saved in the changelog + * on to the target filesystem to make it identical to the source. + * + * To facilitate replication, the lustre filesystem provides + * a) a way to get the current filesystem path of a given FID + * b) a way to open files by specifying its FID + * + * The changelog only has a limited amount of information. + * tfid - The FID of the target file + * pfid - The FID of the parent of the target file (at the time of + * the operation) + * name - The name of the target file (at the time of the operation) + * + * With just this information, it is not alwasy possible to determine + * the file paths for each operation. For instance, if pfid does not + * exist on the source-fs (due to a subsequent deletion), its path + * cannot be queried. In such cases, lreplicate keeps the files in a + * special directory ("/.lustrerepl"). Once all the operations in a + * changelog are replayed, all the files in this special directory + * will get moved to the location as in the source-fs. + * + * Shorthand used: f2p(tfid) = fid2path(tfid) + * + * The following are the metadata operations of interest. + * 1. creat + * If tfid is absent on the source-fs, ignore this operation + * If pfid is absent on the source-fs [or] + * if f2p(pfid) is not present on target-fs [or] + * if f2p(pfid)+name != f2p(tfid) + * creat .lustrerepl/tfid + * track [pfid,tfid,name] + * Else + * creat f2p[tfid] + * + * 2. remove + * If .lustrerepl/[tfid] is present on the target + * rm .lustrerepl/[tfid] + * Else if pfid is present on the source-fs, + * if f2p(pfid)+name is present, + * rm f2p(pfid)+name(pfid,name) + * + * 3. move (pfid1,name1) to (pfid2,name2) + * If pfid2 is present + * if pfid1 is also present, mv (pfid1,name1) to (pfid2,name2) + * else mv .lustrerepl/[tfid] to (pfid2,name2) + * If pfid2 is not present, + * if pfid1 is present, mv (pfid1,name1) .lustrerepl/[tfid] + * If moving out of .lustrerepl + * move out all its children in .lustrerepl. + * [pfid,tfid,name] tracked from (1) is used for this. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#define REPLICATE_STATUS_VER 1 +#define CLEAR_INTERVAL 100 +#define DEFAULT_RSYNC_THRESHOLD 0xA00000 /* 10 MB */ + +#define TYPE_STR_LEN 16 + +#define DEFAULT_MDT "-MDT0000" +#define SPECIAL_DIR ".lustrerepl" +#define RSYNC "rsync" +#define TYPE "type" + +/* Debug flags */ +#define DINFO 1 +#define DTRACE 2 + +/* Not used; declared for fulfilling obd.c's dependency. */ +command_t cmdlist[0]; +extern int obd_initialize(int argc, char **argv); + +/* Information for processing a changelog record. This structure is + allocated on the heap instead of allocating large variables on the + stack. */ +struct lr_info { + long long recno; + int target_no; + enum changelog_rec_type type; + char pfid[LR_FID_STR_LEN]; + char tfid[LR_FID_STR_LEN]; + char name[PATH_MAX + 1]; + char src[PATH_MAX + 1]; + char dest[PATH_MAX + 1]; + char path[PATH_MAX + 1]; + char savedpath[PATH_MAX + 1]; + char link[PATH_MAX + 1]; + char linktmp[PATH_MAX + 1]; + char cmd[PATH_MAX]; + int bufsize; + char *buf; + + /* Variables for querying the xattributes */ + char *xlist; + size_t xsize; + char *xvalue; + size_t xvsize; +}; + +struct lr_parent_child_list { + struct lr_parent_child_log pc_log; + struct lr_parent_child_list *pc_next; +}; + +struct lreplicate_status *status; +char *statuslog; /* Name of the status log file */ +int logbackedup; +int noxattr; /* Flag to turn off replicating xattrs */ +int noclear; /* Flag to turn off clearing changelogs */ +int debug; /* Flag to turn debugging information on and off */ +int verbose; /* Verbose output */ +long long rec_count; /* No of changelog records that were processed */ +int errors; +int dryrun; +int use_rsync; /* Flag to turn on use of rsync to copy data */ +long long rsync_threshold = DEFAULT_RSYNC_THRESHOLD; +int quit; /* Flag to stop processing the changelog; set on the + receipt of a signal */ +char rsync[PATH_MAX]; +char rsync_ver[PATH_MAX]; +struct lr_parent_child_list *parents; + +/* Command line options */ +struct option long_opts[] = { + {"source", required_argument, 0, 's'}, + {"target", required_argument, 0, 't'}, + {"mdt", required_argument, 0, 'm'}, + {"user", required_argument, 0, 'u'}, + {"statuslog", required_argument, 0, 'l'}, + {"verbose", no_argument, 0, 'v'}, + {"xattr", required_argument, 0, 'x'}, + {"dry-run", no_argument, 0, 'z'}, + /* Undocumented options follow */ + {"cl-clear", required_argument, 0, 'c'}, + {"use-rsync", no_argument, 0, 'r'}, + {"rsync-threshold", required_argument, 0, 'y'}, + {"start-recno", required_argument, 0, 'n'}, + {"debug", required_argument, 0, 'd'}, + {0, 0, 0, 0} +}; + +/* Command line usage */ +void lr_usage() +{ + fprintf(stderr, "\tlreplicate -s -t " + "-m -r -l \n" + "lreplicate can also pick up parameters from a " + "status log created earlier.\n" + "\tlreplicate -l \n"); +} + +/* Print debug information. This is controlled by the value of the + global variable 'debug' */ +void lr_debug(int level, const char *fmt, ...) +{ + va_list ap; + + if (level > debug) + return; + + va_start(ap, fmt); + vprintf(fmt, ap); + va_end(ap); +} + + +void * lr_grow_buf(void *buf, int size) +{ + void *ptr; + + ptr = realloc(buf, size); + if (ptr == NULL) + free(buf); + return ptr; +} + + +/* Use rsync to replicate file data */ +int lr_rsync_data(struct lr_info *info) +{ + int rc; + struct stat st_src, st_dest; + char cmd[PATH_MAX]; + + lr_debug(DTRACE, "Syncing data%s\n", info->tfid); + + rc = stat(info->src, &st_src); + if (rc == -1) { + fprintf(stderr, "Error: Unable to stat src=%s %s\n", + info->src, info->name); + if (errno == ENOENT) + return 0; + else + return -errno; + } + rc = stat(info->dest, &st_dest); + if (rc == -1) { + fprintf(stderr, "Error: Unable to stat dest=%s\n", + info->dest); + return -errno; + } + + if (st_src.st_mtime != st_dest.st_mtime || + st_src.st_size != st_dest.st_size) { + /* XXX spawning off an rsync for every data sync and + * waiting synchronously is bad for performance. + * librsync could possibly used here. But it does not + * seem to be of production grade. Multi-threaded + * replication is also to be considered. + */ + int status; + snprintf(cmd, PATH_MAX, "%s --inplace %s %s", rsync, info->src, + info->dest); + lr_debug(DTRACE, "\t%s %s\n", cmd, info->tfid); + status = system(cmd); + if (status == -1) { + rc = -errno; + } else if (WIFEXITED(status)) { + status = WEXITSTATUS(status); + if (!status) + rc = 0; + else if (status == 23 || status == 24) + /* Error due to vanished source files; + Ignore this error*/ + rc = 0; + else + rc = -EINVAL; + if (status) + lr_debug(DINFO, "rsync %s exited with %d %d\n", + info->src, status, rc); + } else { + rc = -EINTR; + } + } else { + lr_debug(DTRACE, "Not syncing %s and %s %s\n", info->src, + info->dest, info->tfid); + } + + return rc; +} + +int lr_copy_data(struct lr_info *info) +{ + int fd_src = -1; + int fd_dest = -1; + int bufsize; + int rsize; + int rc = 0; + struct stat st_src; + struct stat st_dest; + + fd_src = open(info->src, O_RDONLY); + if (fd_src == -1) + return -errno; + if (fstat(fd_src, &st_src) == -1 || + stat(info->dest, &st_dest) == -1) + goto out; + + if (st_src.st_mtime == st_dest.st_mtime && + st_src.st_size == st_dest.st_size) + goto out; + + if (st_src.st_size > rsync_threshold && rsync[0] != '\0') { + /* It is more efficient to use rsync to replicate + large files. Any file larger than rsync_threshold + is handed off to rsync. */ + lr_debug(DTRACE, "Using rsync to replicate %s\n", info->tfid); + rc = lr_rsync_data(info); + goto out; + } + + fd_dest = open(info->dest, O_WRONLY | O_TRUNC, st_src.st_mode); + if (fd_dest == -1) { + rc = -errno; + goto out; + } + bufsize = st_dest.st_blksize; + + if (info->bufsize < bufsize) { + /* Grow buffer */ + info->buf = lr_grow_buf(info->buf, bufsize); + if (info->buf == NULL) { + rc = -ENOMEM; + goto out; + } + info->bufsize = bufsize; + } + + while (1) { + rsize = read(fd_src, info->buf, bufsize); + if (rsize == 0) { + break; + } else if (rsize < 0) { + rc = -errno; + goto out; + } + errno = 0; + if (write(fd_dest, info->buf, rsize) != rsize) { + if (errno != 0) + rc = -errno; + else + rc = -EINTR; + } + } + fsync(fd_dest); + +out: + if (fd_src != -1) + close(fd_src); + if (fd_dest != -1) + close(fd_dest); + + return rc; +} + +/* Copy data from source to destination */ +int lr_sync_data(struct lr_info *info) +{ + if (use_rsync) + return lr_rsync_data(info); + else + return lr_copy_data(info); +} + +/* Copy all attributes from file src to file dest */ +int lr_copy_attr(char *src, char *dest) +{ + struct stat st; + struct utimbuf time; + + if (stat(src, &st) == -1 || + chmod(dest, st.st_mode) == -1 || + chown(dest, st.st_uid, st.st_gid) == -1) + return -errno; + + time.actime = st.st_atime; + time.modtime = st.st_mtime; + if (utime(dest, &time) == -1) + return -errno; + return 0; +} + +/* Copy all xattrs from file info->src to info->dest */ +int lr_copy_xattr(struct lr_info *info) +{ + size_t size = info->xsize; + int start; + int len; + int rc; + + if (noxattr) + return 0; + + errno = 0; + rc = llistxattr(info->src, info->xlist, size); + lr_debug(DTRACE, "llistxattr(%s,%p) returned %d, errno=%d\n", + info->src, info->xlist, rc, errno); + if ((rc > 0 && info->xlist == NULL) || errno == ERANGE) { + size = rc > PATH_MAX ? rc : PATH_MAX; + info->xlist = lr_grow_buf(info->xlist, size); + if (info->xlist == NULL) + return -ENOMEM; + info->xsize = size; + rc = llistxattr(info->src, info->xlist, size); + lr_debug(DTRACE, "llistxattr %s returned %d, errno=%d\n", + info->src, rc, errno); + } + if (rc < 0) + return rc; + + len = rc; + start = 0; + while (start < len) { + size = info->xvsize; + rc = lgetxattr(info->src, info->xlist + start, + info->xvalue, size); + if (info->xvalue == NULL || errno == ERANGE) { + size = rc > PATH_MAX ? rc : PATH_MAX; + info->xvalue = lr_grow_buf(info->xvalue, size); + if (info->xvalue == NULL) + return -ENOMEM; + info->xvsize = size; + rc = lgetxattr(info->src, info->xlist + start, + info->xvalue, size); + } + lr_debug(DTRACE, "\t(%s,%d) rc=%p\n", info->xlist + start, + info->xvalue, rc); + if (rc > 0) { + size = rc; + rc = lsetxattr(info->dest, info->xlist + start, + info->xvalue, size, 0); + lr_debug(DTRACE, "\tlsetxattr(), rc=%d, errno=%d\n", + rc, errno); + if (rc == -1) { + if (errno != ENOTSUP) { + fprintf(stderr, "Error replicating " + " xattr for %s: %d\n", + info->dest, errno); + errors++; + } + rc = 0; + } + } + start += strlen(info->xlist + start) + 1; + } + + lr_debug(DINFO, "setxattr: %s %s\n", info->src, info->dest); + + return rc; +} + +/* Retrieve the filesystem path for a given FID and a given + linkno. The path is returned in info->path */ +int lr_get_path_ln(struct lr_info *info, char *fidstr, int linkno) +{ + long long recno = -1; + int rc; + + rc = llapi_fid2path(status->ls_mdt_device, fidstr, info->path, + PATH_MAX, &recno, &linkno); + if (rc < 0 && rc != -ENOENT) { + fprintf(stderr, "fid2path error: (%s, %s) %d %s\n", + status->ls_mdt_device, fidstr, + -rc, strerror(errno = -rc)); + } + + return rc; +} + +/* Retrieve the filesystem path for a given FID. The path is returned + in info->path */ +int lr_get_path(struct lr_info *info, char *fidstr) +{ + return lr_get_path_ln(info, fidstr, 0); +} + +/* Generate the path for opening by FID */ +void lr_get_FID_PATH(char *mntpt, char *fidstr, char *buf, int bufsize) +{ + /* Open-by-FID path is /.lustre/fid/[SEQ:OID:VER] */ + snprintf(buf, bufsize, "%s/%s/fid/%s", mntpt, mdd_dot_lustre_name, + fidstr + 2); + return; +} + +/* Read the symlink information into 'info->link' */ +int lr_get_symlink(struct lr_info *info) +{ + int rc; + char *link; + + lr_get_FID_PATH(status->ls_source, info->tfid, info->src, PATH_MAX); + rc = readlink(info->src, info->linktmp, PATH_MAX); + if (rc > 0) + info->linktmp[rc] = '\0'; + else + return rc; + lr_debug(DTRACE, "symlink: readlink returned %s\n", info->linktmp); + + if (strncmp(info->linktmp, status->ls_source, + strlen(status->ls_source)) == 0) { + /* Strip source fs path and replace with target fs path. */ + link = info->linktmp + strlen(status->ls_source); + snprintf(info->src, PATH_MAX, "%s%s", + status->ls_targets[info->target_no], link); + link = info->src; + } else { + link = info->linktmp; + } + strncpy(info->link, link, PATH_MAX); + info->link[PATH_MAX] = '\0'; + + return rc; +} + +/* Create file/directory/device file/symlink. */ +int lr_mkfile(struct lr_info *info) +{ + struct stat st; + int rc = 0; + + errno = 0; + lr_debug(DINFO, "mkfile(%d) %s \n", info->type, info->dest); + if (info->type == CL_MKDIR) { + rc = mkdir(info->dest, 0777); + } else if (info->type == CL_SOFTLINK) { + lr_get_symlink(info); + rc = symlink(info->link, info->dest); + } else if (info->type == CL_MKNOD) { + lr_get_FID_PATH(status->ls_source, info->tfid, + info->src, PATH_MAX); + rc = stat(info->src, &st); + if (rc == -1) { + if (errno == ENOENT) + return 0; + else + return -errno; + } + rc = mknod(info->dest, st.st_mode, st.st_rdev); + } else { + rc = mknod(info->dest, S_IFREG | 0777, 0); + } + if (rc) + return -errno; + + /* Sync data and attributes */ + if (info->type == CL_CREATE || info->type == CL_MKDIR) { + lr_debug(DTRACE, "Syncing data and attributes %s\n", + info->tfid); + (void) lr_copy_xattr(info); + if (info->type == CL_CREATE) + rc = lr_sync_data(info); + if (!rc) + rc = lr_copy_attr(info->src, info->dest); + + if (rc == -ENOENT) + /* Source file has disappeared. Not an error. */ + rc = 0; + } else { + lr_debug(DTRACE, "Not syncing data and attributes %s\n", + info->tfid); + } + + return rc; +} + +int lr_add_pc(const char *pfid, const char *tfid, const char *name) +{ + struct lr_parent_child_list *p; + + p = calloc(1, sizeof(*p)); + if (!p) + return -ENOMEM; + strcpy(p->pc_log.pcl_pfid, pfid + 2); + strcpy(p->pc_log.pcl_tfid, tfid + 2); + strcpy(p->pc_log.pcl_name, name); + + p->pc_next = parents; + parents = p; + return 0; +} + +void lr_cascade_move(const char *fid, const char *dest, struct lr_info *info) +{ + struct lr_parent_child_list *curr, *prev; + char *d; + int rc; + + d = calloc(1, PATH_MAX + 1); + prev = curr = parents; + while (curr) { + if (strcmp(curr->pc_log.pcl_pfid, fid) == 0) { + snprintf(d, PATH_MAX, "%s/%s", dest, + curr->pc_log.pcl_name); + snprintf(info->src, PATH_MAX, "%s/%s/%s", + status->ls_targets[info->target_no], + SPECIAL_DIR, curr->pc_log.pcl_tfid); + rc = rename(info->src, d); + if (rc == -1) { + fprintf(stderr, "Error renaming file " + " %s to %s: %d\n", + info->src, d, errno); + errors++; + } + lr_cascade_move(curr->pc_log.pcl_tfid, d, info); + if (curr == parents) + parents = curr->pc_next; + else + prev->pc_next = curr->pc_next; + free(curr); + prev = curr = parents; + + } else { + prev = curr; + curr = curr->pc_next; + } + } + + free(d); +} + +/* remove [info->pfid, ext->tfid] from parents */ +int lr_remove_pc(const char *pfid, const char *tfid) +{ + struct lr_parent_child_list *curr, *prev; + + for (prev = curr = parents; curr; prev = curr, curr = curr->pc_next) { + if (strcmp(curr->pc_log.pcl_pfid, pfid + 2) == 0 && + strcmp(curr->pc_log.pcl_tfid, tfid + 2) == 0) { + if (curr == parents) + parents = curr->pc_next; + else + prev->pc_next = curr->pc_next; + free(curr); + break; + } + } + return 0; +} + +/* Create file under SPECIAL_DIR with its tfid as its name. */ +int lr_mk_special(struct lr_info *info) +{ + int rc; + + snprintf(info->dest, PATH_MAX, "%s/%s/%s", + status->ls_targets[info->target_no], SPECIAL_DIR, + info->tfid + 2); + + rc = lr_mkfile(info); + if (rc) + return rc; + + rc = lr_add_pc(info->pfid, info->tfid, info->name); + return rc; +} + +/* Remove a file or directory */ +int lr_rmfile(struct lr_info *info) +{ + int rc; + + if (info->type == CL_RMDIR) + rc = rmdir(info->dest); + else + rc = unlink(info->dest); + if (rc == -1) + rc = -errno; + return rc; +} + +/* Remove a file under SPECIAL_DIR with its tfid as its name. */ +int lr_rm_special(struct lr_info *info) +{ + int rc; + + snprintf(info->dest, PATH_MAX, "%s/%s/%s", + status->ls_targets[info->target_no], SPECIAL_DIR, + info->tfid + 2); + rc = lr_rmfile(info); + + if (rc) + lr_debug(DINFO, "remove: %s; rc=%d, errno=%d\n", + info->dest, rc, errno); + return rc; +} + +/* Replicate file and directory create events */ +int lr_create(struct lr_info *info) +{ + int len; + int rc1 = 0; + int rc; + int mkspecial = 0; + + /* Is target FID present on the source? */ + rc = lr_get_path(info, info->tfid + 3); + if (rc == -ENOENT) { + /* Source file has disappeared. Not an error. */ + lr_debug(DINFO, "create: tfid %s not found on" + "source-fs\n", info->tfid); + return 0; + } else if (rc) { + return rc; + } + strcpy(info->savedpath, info->path); + + /* Is parent FID present on the source */ + rc = lr_get_path(info, info->pfid + 3); + if (rc == -ENOENT) { + lr_debug(DINFO, "create: pfid %s not found on source-fs\n", + info->tfid); + mkspecial = 1; + } else if (rc < 0) { + return rc; + } + + /* Is f2p(pfid)+name != f2p(tfid)? If not the file has moved. */ + len = strlen(info->path); + if (len - 1 >= 0 && info->path[len - 1] == '/') + snprintf(info->dest, PATH_MAX, "%s%s", info->path, info->name); + else + snprintf(info->dest, PATH_MAX, "%s/%s", info->path, info->name); + + lr_debug(DTRACE, "dest = %s; savedpath = %s\n", info->dest, + info->savedpath); + if (strncmp(info->dest, info->savedpath, PATH_MAX) != 0) { + lr_debug(DTRACE, "create: file moved (%s). %s != %s\n", + info->tfid, info->dest, info->savedpath); + mkspecial = 1; + } + + /* Is f2p(pfid) present on the target? If not, the parent has + moved */ + if (!mkspecial) { + snprintf(info->dest, PATH_MAX, "%s%s", status->ls_targets[0], + info->path); + if (access(info->dest, F_OK) != 0) + mkspecial = 1; + } + for (info->target_no = 0; info->target_no < status->ls_num_targets; + info->target_no++) { + snprintf(info->dest, PATH_MAX, "%s%s", + status->ls_targets[info->target_no], info->savedpath); + lr_get_FID_PATH(status->ls_source, info->tfid, info->src, + PATH_MAX); + + if (!mkspecial) + rc1 = lr_mkfile(info); + if (mkspecial || rc1 == -ENOENT) { + rc1 = lr_mk_special(info); + } + if (rc1) + rc = rc1; + } + return rc; +} + +/* Replicate a file remove (rmdir/unlink) operation */ +int lr_remove(struct lr_info *info) +{ + int rc = 0; + int rc1; + + for (info->target_no = 0; info->target_no < status->ls_num_targets; + info->target_no++) { + + rc1 = lr_rm_special(info); + if (!rc1) + continue; + + rc1 = lr_get_path(info, info->pfid + 3); + if (rc1 == -ENOENT) { + lr_debug(DINFO, "remove: pfid %s not found\n", + info->pfid); + continue; + } + if (rc1) { + rc = rc1; + continue; + } + snprintf(info->dest, PATH_MAX, "%s%s/%s", + status->ls_targets[info->target_no], info->path, + info->name); + + rc1 = lr_rmfile(info); + lr_debug(DINFO, "remove: %s; rc1=%d, errno=%d\n", + info->dest, rc1, errno); + if (rc1) { + rc = rc1; + continue; + } + } + return rc; +} + +/* Replicate a rename/move operation. This operations are tracked by + two changelog records. */ +int lr_move(struct lr_info *info, struct lr_info *ext) +{ + int rc = 0; + int rc1; + int rc_dest, rc_src; + int special_src = 0; + int special_dest = 0; + + rc_dest = lr_get_path(ext, ext->pfid + 3); + if (rc_dest < 0 && rc_dest != -ENOENT) + return rc_dest; + + rc_src = lr_get_path(info, info->pfid + 3); + if (rc_src < 0 && rc_src != -ENOENT) + return rc_src; + + for (info->target_no = 0; info->target_no < status->ls_num_targets; + info->target_no++) { + + if (!rc_dest) { + snprintf(info->dest, PATH_MAX, "%s%s", + status->ls_targets[info->target_no], + ext->path); + if (access(info->dest, F_OK) != 0) { + rc_dest = -errno; + } else { + snprintf(info->dest, PATH_MAX, "%s%s/%s", + status->ls_targets[info->target_no], + ext->path, ext->name); + } + } + if (rc_dest == -ENOENT) { + snprintf(info->dest, PATH_MAX, "%s/%s/%s", + status->ls_targets[info->target_no], + SPECIAL_DIR, info->tfid + 2); + special_dest = 1; + } + + if (!rc_src) + snprintf(info->src, PATH_MAX, "%s%s/%s", + status->ls_targets[info->target_no], + info->path, info->name); + if (rc_src == -ENOENT || (access(info->src, F_OK) != 0 && + errno == ENOENT)) { + snprintf(info->src, PATH_MAX, "%s/%s/%s", + status->ls_targets[info->target_no], + SPECIAL_DIR, info->tfid + 2); + special_src = 1; + } + + rc1 = 0; + if (strcmp(info->src, info->dest) != 0) { + rc1 = rename(info->src, info->dest); + if (rc1 == -1) + rc1 = -errno; + } + + if (special_src) { + lr_remove_pc(info->pfid, info->tfid); + if (!special_dest) + lr_cascade_move(info->tfid + 2, info->dest, info); + } + if (special_dest) + lr_add_pc(ext->pfid, info->tfid, ext->name); + + lr_debug(DINFO, "move: %s [to] %s rc1=%d, errno=%d\n", + info->src, info->dest, rc1, errno); + if (rc1) + rc = rc1; + } + return rc; +} + +/* Replicate a hard link */ +int lr_link(struct lr_info *info) +{ + int i; + int len; + int rc; + int rc1; + struct stat st; + + lr_get_FID_PATH(status->ls_source, info->tfid, info->src, PATH_MAX); + rc = stat(info->src, &st); + if (rc == -1) + return -errno; + + for (info->target_no = 0; info->target_no < status->ls_num_targets; + info->target_no++) { + + info->src[0] = 0; + info->dest[0] = 0; + rc1 = 0; + + /* Search through the hardlinks to get the src and dest */ + for (i = 0; i < st.st_nlink && (info->src[0] == 0 || + info->dest[0] == 0); i++) { + rc1 = lr_get_path_ln(info, info->tfid + 3, i); + if (rc1) + break; + else + lr_debug(DTRACE, "\tfid2path %s, %s, %d\n", + info->path, info->name, i); + + len = strlen(info->path) - strlen(info->name); + if (len > 0 && strcmp(info->path + len, + info->name) == 0) + snprintf(info->dest, PATH_MAX, "%s%s", + status->ls_targets[info->target_no], + info->path); + else if (info->src[0] == 0) + snprintf(info->src, PATH_MAX, "%s%s", + status->ls_targets[info->target_no], + info->path); + } + + if (rc1) { + rc = rc1; + continue; + } + + if (info->src[0] == 0 || info->dest[0] == 0) + /* Could not find the source or destination. + This can happen when some links don't exist + anymore. */ + return -EINVAL; + + if (info->src[0] == 0) + snprintf(info->src, PATH_MAX, "%s/%s/%s", + status->ls_targets[info->target_no], + SPECIAL_DIR, info->tfid + 2); + else if (info->dest[0] == 0) + snprintf(info->dest, PATH_MAX, "%s/%s/%s", + status->ls_targets[info->target_no], + SPECIAL_DIR, info->tfid + 2); + + rc1 = link(info->src, info->dest); + lr_debug(DINFO, "link: %s [to] %s; rc1=%d,errno=%d\n", + info->src, info->dest, rc1, errno); + + if (rc1) + rc = rc1; + } + return rc; +} + +/* Replicate file attributes */ +int lr_setattr(struct lr_info *info) +{ + int rc1; + int rc; + + lr_get_FID_PATH(status->ls_source, info->tfid, info->src, PATH_MAX); + + rc = lr_get_path(info, info->tfid + 3); + if (rc == -ENOENT) + lr_debug(DINFO, "setattr: %s not present on source-fs\n", + info->src); + if (rc) + return rc; + + for (info->target_no = 0; info->target_no < status->ls_num_targets; + info->target_no++) { + + snprintf(info->dest, PATH_MAX, "%s%s", + status->ls_targets[info->target_no], info->path); + lr_debug(DINFO, "setattr: %s %s %s", info->src, info->dest, + info->tfid); + + rc1 = lr_sync_data(info); + if (!rc1) + rc1 = lr_copy_attr(info->src, info->dest); + if (rc1) + rc = rc1; + } + return rc; +} + +/* Replicate xattrs */ +int lr_setxattr(struct lr_info *info) +{ + int rc, rc1; + + lr_get_FID_PATH(status->ls_source, info->tfid, info->src, PATH_MAX); + + rc = lr_get_path(info, info->tfid + 3); + if (rc == -ENOENT) + lr_debug(DINFO, "setxattr: %s not present on source-fs\n", + info->src); + if (rc) + return rc; + + for (info->target_no = 0; info->target_no < status->ls_num_targets; + info->target_no++) { + + snprintf(info->dest, PATH_MAX, "%s%s", + status->ls_targets[info->target_no], info->path); + lr_debug(DINFO, "setxattr: %s %s %s\n", info->src, info->dest, + info->tfid); + + rc1 = lr_copy_xattr(info); + if (rc1) + rc = rc1; + } + + return rc; +} + +/* Parse a line of changelog entry */ +int lr_parse_line(struct lr_info *info, FILE *fp) +{ + unsigned long long time; + unsigned int flags; + char typestr[TYPE_STR_LEN]; + char line[PATH_MAX]; + char *str; + int i; + + if (fgets(line, sizeof(line), fp) != NULL) { + if (sscanf(line, "%llu %s %llu %x %s %s", + &info->recno, typestr, &time, + &flags, info->tfid, info->pfid) < 4) { + fprintf(stderr, "error: unexpected changelog record " + "format - %s\n", line); + return -1; + } + typestr[2] = '\0'; + info->type = atoi(typestr); + + /* The filename could have spaces in it. scanf would + have ignored it. Parse for the complete + filename. */ + if (info->type != CL_SETATTR && + info->type != CL_XATTR && + info->type != CL_MARK) { + for (i = 0, str = line; str != NULL && i <= 5; + i++, str++){ + str = strchr(str, ' '); + } + if (str) { + strncpy(info->name, str, PATH_MAX); + str = strchr(info->name, '\n'); + if (str) + str[0] = '\0'; + } else { + fprintf(stderr, "error: unexpected changelog " + "record format - %s\n", line); + return -1; + } + } + rec_count++; + } else { + return -1; + } + return 0; +} + +/* Initialize the replication parameters */ +int lr_init_status() +{ + size_t size = sizeof(struct lreplicate_status) + PATH_MAX; + + if (status != NULL) + return 0; + status = calloc(size, 1); + if (status == NULL) + return -ENOMEM; + status->ls_version = REPLICATE_STATUS_VER; + status->ls_size = size; + status->ls_last_recno = -1; + return 0; +} + +/* Make a backup of the statuslog */ +void lr_backup_log() +{ + char backupfile[PATH_MAX]; + + if (logbackedup) + return; + snprintf(backupfile, PATH_MAX, "%s.old", statuslog); + (void) rename(statuslog, backupfile); + logbackedup = 1; + + return; +} + +/* Save replication parameters to a statuslog. */ +int lr_write_log() +{ + int fd; + size_t size; + size_t write_size = status->ls_size; + struct lr_parent_child_list *curr; + int rc = 0; + + if (statuslog == NULL) + return 0; + + lr_backup_log(); + + fd = open(statuslog, O_WRONLY | O_CREAT | O_SYNC); + if (fd == -1) { + fprintf(stderr, "Error opening log file for writing (%s)\n", + statuslog); + return -1; + } + errno = 0; + size = write(fd, status, write_size); + if (size != write_size) { + fprintf(stderr, "Error writing to log file (%s) %d\n", + statuslog, errno); + close(fd); + return -1; + } + + for (curr = parents; curr; curr = curr->pc_next) { + size = write(fd, &curr->pc_log, sizeof(curr->pc_log)); + if (size != sizeof(curr->pc_log)) { + fprintf(stderr, "Error writing to log file (%s) %d\n", + statuslog, errno); + rc = -1; + break; + } + } + close(fd); + return rc; +} + +/* Read statuslog and populate the replication parameters. Command + * line parameters take precedence over parameters in the log file.*/ +int lr_read_log() +{ + struct lr_parent_child_list *tmp; + struct lr_parent_child_log rec; + struct lreplicate_status *s; + int fd = -1; + size_t size; + size_t read_size = sizeof(struct lreplicate_status) + PATH_MAX; + int rc = 0; + + if (statuslog == NULL) + return 0; + + s = calloc(1, read_size); + if (s == NULL) + GOTO(out, rc = -ENOMEM); + + fd = open(statuslog, O_RDONLY); + if (fd == -1) + GOTO(out, rc = -errno); + size = read(fd, s, read_size); + if (size != read_size) + GOTO(out, rc = -EINVAL); + if (read_size < s->ls_size) { + read_size = s->ls_size; + s = lr_grow_buf(s, read_size); + if (s == NULL) + GOTO(out, rc = -ENOMEM); + if (lseek(fd, 0, SEEK_SET) == -1) + GOTO(out, rc = -errno); + size = read(fd, s, read_size); + if (size != read_size) + GOTO(out, rc = -EINVAL); + } + + while (read(fd, &rec, sizeof(rec)) != 0) { + tmp = calloc(1, sizeof(*tmp)); + if (!tmp) + GOTO(out, rc = -ENOMEM); + tmp->pc_log = rec; + tmp->pc_next = parents; + parents = tmp; + } + + /* copy uninitialized fields to status */ + if (status->ls_num_targets == 0) { + if (status->ls_size != s->ls_size) { + status = lr_grow_buf(status, s->ls_size); + if (status == NULL) + GOTO(out, rc = -ENOMEM); + status->ls_size = s->ls_size; + } + status->ls_num_targets = s->ls_num_targets; + memcpy(status->ls_targets, s->ls_targets, + PATH_MAX * s->ls_num_targets); + } + if (status->ls_last_recno == -1) + status->ls_last_recno = s->ls_last_recno; + + if (status->ls_registration[0] == '\0') + strncpy(status->ls_registration, s->ls_registration, + LR_NAME_MAXLEN); + + if (status->ls_mdt_device[0] == '\0') + strncpy(status->ls_mdt_device, s->ls_mdt_device, + LR_NAME_MAXLEN); + + if (status->ls_source_fs[0] == '\0') + strncpy(status->ls_source_fs, s->ls_source_fs, + LR_NAME_MAXLEN); + + if (status->ls_source[0] == '\0') + strncpy(status->ls_source, s->ls_source, PATH_MAX); + + out: + if (fd != -1) + close(fd); + if (s) + free(s); + return rc; +} + +/* Clear changelogs every CLEAR_INTERVAL records or at the end of + processing. */ +int lr_clear_cl(struct lr_info *info, int force) +{ + char mdt_device[LR_NAME_MAXLEN + 1]; + long long rec; + int rc = 0; + + if (force || info->recno > status->ls_last_recno + CLEAR_INTERVAL) { + if (info->type == CL_RENAME) + rec = info->recno + 1; + else + rec = info->recno; + if (!noclear && !dryrun) { + /* llapi_changelog_clear modifies the mdt + * device name so make a copy of it until this + * is fixed. + */ + strncpy(mdt_device, status->ls_mdt_device, + LR_NAME_MAXLEN); + rc = llapi_changelog_clear(mdt_device, + status->ls_registration, + rec); + if (rc) + printf("Changelog clear (%s, %s, %lld) " + "returned %d\n", status->ls_mdt_device, + status->ls_registration, rec, rc); + } + if (!rc && !dryrun) { + status->ls_last_recno = rec; + lr_write_log(); + + } + } + + return rc; +} + +/* Locate a usable version of rsync. At this point we'll use any + version. */ +int lr_locate_rsync() +{ + FILE *fp; + int len; + + /* Locate rsync */ + snprintf(rsync, PATH_MAX, "%s -p %s", TYPE, RSYNC); + fp = popen(rsync, "r"); + if (fp == NULL) + return -1; + + if (fgets(rsync, PATH_MAX, fp) == NULL) { + fclose(fp); + return -1; + } + + len = strlen(rsync); + if (len > 0 && rsync[len - 1] == '\n') + rsync[len - 1] = '\0'; + fclose(fp); + + /* Determine the version of rsync */ + snprintf(rsync_ver, PATH_MAX, "%s --version", rsync); + fp = popen(rsync_ver, "r"); + if (fp == NULL) + return -1; + + if (fgets(rsync_ver, PATH_MAX, fp) == NULL) { + fclose(fp); + return -1; + } + len = strlen(rsync_ver); + if (len > 0 && rsync_ver[len - 1] == '\n') + rsync_ver[len - 1] = '\0'; + fclose(fp); + + return 0; + +} + +/* Print the replication parameters */ +void lr_print_status(struct lr_info *info) +{ + int i; + + if (!verbose) + return; + + printf("Lustre filesystem: %s\n", status->ls_source_fs); + printf("MDT device: %s\n", status->ls_mdt_device); + printf("Source: %s\n", status->ls_source); + for (i = 0; i < status->ls_num_targets; i++) + printf("Target: %s\n", status->ls_targets[i]); + if (statuslog != NULL) + printf("Statuslog: %s\n", statuslog); + printf("Changelog registration: %s\n", status->ls_registration); + printf("Starting changelog record: %lld\n", status->ls_last_recno); + if (noxattr) + printf("Replicate xattrs: no\n"); + if (noclear) + printf("Clear changelog after use: no\n"); + if (use_rsync) + printf("Using rsync: %s (%s)\n", rsync, rsync_ver); +} + +/* Replicate filesystem operations from src_path to target_path */ +int lr_replicate() +{ + int fd; + FILE *fp; + long long startrec; + struct lr_info *info; + struct lr_info *ext; + time_t start; + int xattr_not_supp; + int i; + int rc; + + start = time(NULL); + + info = calloc(1, sizeof(struct lr_info)); + if (info == NULL) + return -ENOMEM; + + rc = llapi_search_fsname(status->ls_source, status->ls_source_fs); + if (rc) { + fprintf(stderr, "Source path is not a valid Lustre client " + "mountpoint.\n"); + return rc; + } + if (status->ls_mdt_device[0] == '\0') + snprintf(status->ls_mdt_device, LR_NAME_MAXLEN, "%s%s", + status->ls_source_fs, DEFAULT_MDT); + + ext = calloc(1, sizeof(struct lr_info)); + if (ext == NULL) + return -ENOMEM; + memcpy(ext, info, sizeof(struct lr_info)); + + for (i = 0, xattr_not_supp = 0; i < status->ls_num_targets; i++) { + snprintf(info->dest, PATH_MAX, "%s/%s", status->ls_targets[i], + SPECIAL_DIR); + rc = mkdir(info->dest, 0777); + if (rc == -1 && errno != EEXIST) { + fprintf(stderr, "Error writing to target path %s.\n", + status->ls_targets[i]); + return -errno; + } + rc = llistxattr(info->src, info->xlist, info->xsize); + if (rc == -1 && errno == ENOTSUP) { + fprintf(stderr, "xattrs not supported on %s\n", + status->ls_targets[i]); + xattr_not_supp++; + } + } + if (xattr_not_supp == status->ls_num_targets) + /* None of the targets support xattrs. */ + noxattr = 1; + + lr_print_status(info); + + /* Open changelogs for consumption*/ + startrec = status->ls_last_recno; + fd = llapi_changelog_open(status->ls_source_fs, startrec); + if (fd < 0) { + fprintf(stderr, "Error opening changelog file for fs %s.\n", + status->ls_source_fs); + return fd; + } + if ((fp = fdopen(fd, "r")) == NULL) { + fprintf(stderr, "Error: fdopen failed."); + close(fd); + return -errno; + } + + while (!quit && lr_parse_line(info, fp) == 0) { + rc = 0; + if (info->type == CL_RENAME) + /* Rename operations have an additional changelog + record of information. */ + lr_parse_line(ext, fp); + + if (dryrun) + continue; + + switch(info->type) { + case CL_CREATE: + case CL_MKDIR: + case CL_MKNOD: + case CL_SOFTLINK: + rc = lr_create(info); + break; + case CL_RMDIR: + case CL_UNLINK: + rc = lr_remove(info); + break; + case CL_RENAME: + rc = lr_move(info, ext); + break; + case CL_HARDLINK: + rc = lr_link(info); + break; + case CL_TRUNC: + case CL_SETATTR: + rc = lr_setattr(info); + break; + case CL_XATTR: + rc = lr_setxattr(info); + break; + case CL_CLOSE: + case CL_EXT: + case CL_OPEN: + case CL_IOCTL: + case CL_MARK: + /* Nothing needs to be done for these entries */ + default: + break; + } + if (rc && rc != -ENOENT) { + fprintf(stderr, "Replication of operation %d, " + "index %lld failed: %d\n", + info->type, info->recno, rc); + errors++; + } + lr_clear_cl(info, 0); + if (debug) { + bzero(info, sizeof(struct lr_info)); + bzero(ext, sizeof(struct lr_info)); + } + } + + if (errors || verbose) + printf("Errors: %d\n", errors); + + /* Clear changelog records used so far */ + lr_clear_cl(info, 1); + + if (verbose) { + printf("lreplicate took %ld seconds\n", time(NULL) - start); + printf("Changelog records consumed: %lld\n", rec_count); + } + + close(fd); + fclose(fp); + + return 0; +} + +void +termination_handler (int signum) +{ + /* Set a flag for the replicator to gracefully shutdown */ + quit = 1; + printf("lreplicate halting.\n"); +} + +int main(int argc, char *argv[]) +{ + char c; + int newsize; + int numtargets = 0; + int rc = 0; + + if ((rc = lr_init_status()) != 0) + return rc; + + while ((c = getopt_long(argc, argv, "s:t:m:u:l:vx:zc:ry:n:d:", + long_opts, NULL)) >= 0) { + switch (c) { + case 's': + /* Assume absolute paths */ + strncpy(status->ls_source, optarg, PATH_MAX); + break; + case 't': + status->ls_num_targets++; + numtargets++; + if (numtargets != status->ls_num_targets) { + /* Targets were read from a log + file. The ones specified on the + command line take precedence. The + ones from the log file will be + ignored. */ + status->ls_num_targets = numtargets; + } + newsize = sizeof (struct lreplicate_status) + + (status->ls_num_targets * PATH_MAX); + if (status->ls_size != newsize) { + status->ls_size = newsize; + status = lr_grow_buf(status, newsize); + if (status == NULL) + return -ENOMEM; + } + strncpy(status->ls_targets[status->ls_num_targets - 1], + optarg, + PATH_MAX); + break; + case 'm': + strncpy(status->ls_mdt_device, optarg, LR_NAME_MAXLEN); + break; + case 'u': + strncpy(status->ls_registration, optarg, + LR_NAME_MAXLEN); + break; + case 'l': + statuslog = optarg; + (void) lr_read_log(); + break; + case 'v': + verbose = 1; + break; + case 'x': + if (strcmp("no", optarg) == 0) { + noxattr = 1; + } else if (strcmp("yes", optarg) != 0) { + printf("Invalid parameter %s. " + "Specify --xattr=no or --xattr=yes\n", + optarg); + return -1; + } + break; + case 'z': + dryrun = 1; + break; + case 'c': + /* Undocumented option cl-clear */ + if (strcmp("no", optarg) == 0) { + noclear = 1; + } else if (strcmp("yes", optarg) != 0) { + printf("Invalid parameter %s. " + "Specify --cl-clear=no " + "or --cl-clear=yes\n", + optarg); + return -1; + } + break; + case 'r': + /* Undocumented option use-rsync */ + use_rsync = 1; + break; + case 'y': + /* Undocumented option rsync-threshold */ + rsync_threshold = atol(optarg); + break; + case 'n': + /* Undocumented option start-recno */ + status->ls_last_recno = atol(optarg); + break; + case 'd': + /* Undocumented option debug */ + debug = atoi(optarg); + if (debug < 0 || debug > 2) + debug = 0; + break; + default: + fprintf(stderr, "error: %s: option '%s' " + "unrecognized.\n", argv[0], argv[optind - 1]); + lr_usage(); + return -1; + } + } + + if (status->ls_last_recno == -1) + status->ls_last_recno = 0; + if (strnlen(status->ls_registration, LR_NAME_MAXLEN) == 0) { + /* No registration ID was passed in. */ + printf("Please specify changelog consumer registration id.\n"); + lr_usage(); + return -1; + } + if (strnlen(status->ls_source, PATH_MAX) == 0) { + fprintf(stderr, "Please specify the source path.\n"); + lr_usage(); + return -1; + } + if (strnlen(status->ls_targets[0], PATH_MAX) == 0) { + fprintf(stderr, "Please specify the target path.\n"); + lr_usage(); + return -1; + } + + /* This plumbing is needed for some of the ioctls behind + llapi calls to work. */ + if (obd_initialize(argc, argv) < 0) { + fprintf(stderr, "obd_initialize failed.\n"); + exit(-1); + } + + rc = lr_locate_rsync(); + if (use_rsync && rc != 0) { + fprintf(stderr, "Error: unable to locate %s.\n", RSYNC); + exit(-1); + } + + signal(SIGINT, termination_handler); + signal(SIGHUP, termination_handler); + signal(SIGTERM, termination_handler); + + rc = lr_replicate(); + + return rc; +} -- 1.8.3.1