9 LUSTRE=${LUSTRE:-`dirname $0`/..}
10 UPCALL=${UPCALL:-$PWD/recovery-small-upcall.sh}
11 . $LUSTRE/tests/test-framework.sh
15 . ${CONFIG:=$LUSTRE/tests/cfg/lmv.sh}
20 # Allow us to override the setup if we already have a mounted system by
21 # setting SETUP=" " and CLEANUP=" "
22 SETUP=${SETUP:-"setup"}
23 CLEANUP=${CLEANUP:-"cleanup"}
28 if [ "$MDSCOUNT" -gt 1 ]; then
30 for num in `seq $MDSCOUNT`; do
31 MDSDEV=$TMP/mds${num}-`hostname`
32 add_mds mds$num --dev $MDSDEV --size $MDSSIZE --master --lmv lmv1
34 add_lov_to_lmv lov1 lmv1 --stripe_sz $STRIPE_BYTES \
35 --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0
36 add_ost ost --lov lov1 --dev $OSTDEV --size $OSTSIZE
37 add_ost ost2 --lov lov1 --dev ${OSTDEV}-2 --size $OSTSIZE
38 add_client client --lmv lmv1 --lov lov1 --path $MOUNT
40 add_mds mds1 --dev $MDSDEV --size $MDSSIZE
41 add_lov lov1 mds1 --stripe_sz $STRIPE_BYTES \
42 --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0
43 add_ost ost --lov lov1 --dev $OSTDEV --size $OSTSIZE
44 add_ost ost2 --lov lov1 --dev ${OSTDEV}-2 --size $OSTSIZE
45 add_client client --mds mds1_svc --lov lov1 --path $MOUNT
51 start ost --reformat $OSTLCONFARGS
52 start ost2 --reformat $OSTLCONFARGS
53 [ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE
54 if [ "$MDSCOUNT" -gt 1 ]; then
55 for num in `seq $MDSCOUNT`; do
56 start mds$num $MDSLCONFARGS --reformat
59 start mds $MDSLCONFARGS --reformat
61 zconf_mount `hostname` $MOUNT
65 zconf_umount `hostname` $MOUNT
66 if [ "$MDSCOUNT" -gt 1 ]; then
67 for num in `seq $MDSCOUNT`; do
68 stop mds$num ${FORCE} $MDSLCONFARGS
71 stop mds ${FORCE} $MDSLCONFARGS
73 stop ost2 ${FORCE} --dump cleanup.log
74 stop ost ${FORCE} --dump cleanup.log
77 if [ ! -z "$EVAL" ]; then
82 if [ "$ONLY" == "setup" ]; then
87 if [ "$ONLY" == "cleanup" ]; then
88 sysctl -w portals.debug=0 || true
93 REFORMAT=--reformat $SETUP
97 drop_request "mcreate $MOUNT/1" || return 1
98 drop_reint_reply "mcreate $MOUNT/2" || return 2
100 run_test 1 "mcreate: drop req, drop rep"
103 drop_request "tchmod 111 $MOUNT/2" || return 1
104 drop_reint_reply "tchmod 666 $MOUNT/2" || return 2
106 run_test 2 "chmod: drop req, drop rep"
109 drop_request "statone $MOUNT/2" || return 1
110 drop_reply "statone $MOUNT/2" || return 2
112 run_test 3 "stat: drop req, drop rep"
115 do_facet client "cp /etc/resolv.conf $MOUNT/resolv.conf" || return 1
116 drop_request "cat $MOUNT/resolv.conf > /dev/null" || return 2
117 drop_reply "cat $MOUNT/resolv.conf > /dev/null" || return 3
119 run_test 4 "open: drop req, drop rep"
122 drop_request "mv $MOUNT/resolv.conf $MOUNT/renamed" || return 1
123 drop_reint_reply "mv $MOUNT/renamed $MOUNT/renamed-again" || return 2
124 do_facet client "checkstat -v $MOUNT/renamed-again" || return 3
126 run_test 5 "rename: drop req, drop rep"
129 drop_request "mlink $MOUNT/renamed-again $MOUNT/link1" || return 1
130 drop_reint_reply "mlink $MOUNT/renamed-again $MOUNT/link2" || return 2
132 run_test 6 "link: drop req, drop rep"
135 drop_request "munlink $MOUNT/link1" || return 1
136 drop_reint_reply "munlink $MOUNT/link2" || return 2
138 run_test 7 "unlink: drop req, drop rep"
143 drop_reint_reply "touch $MOUNT/renamed" || return 1
145 run_test 8 "touch: drop rep (bug 1423)"
150 pause_bulk "cp /etc/profile $MOUNT" || return 1
151 do_facet client "cp /etc/termcap $MOUNT" || return 2
152 do_facet client "sync"
153 do_facet client "rm $MOUNT/termcap $MOUNT/profile" || return 3
155 run_test 9 "pause bulk on OST (bug 1420)"
159 do_facet client mcreate $MOUNT/f10 || return 1
160 drop_bl_callback "chmod 0777 $MOUNT/f10" || return 2
161 # wait for the mds to evict the client
162 #echo "sleep $(($TIMEOUT*2))"
163 #sleep $(($TIMEOUT*2))
164 do_facet client touch $MOUNT/f10 || echo "touch failed, evicted"
165 do_facet client checkstat -v -p 0777 $MOUNT/f10 || return 3
166 do_facet client "munlink $MOUNT/f10"
168 run_test 10 "finish request on server after client eviction (bug 1521)"
171 # wake up a thead waiting for completion after eviction
173 do_facet client multiop $MOUNT/$tfile Ow || return 1
174 do_facet client multiop $MOUNT/$tfile or || return 2
178 do_facet client multiop $MOUNT/$tfile or || return 3
179 drop_bl_callback multiop $MOUNT/$tfile Ow ||
180 echo "client evicted, as expected"
182 do_facet client munlink $MOUNT/$tfile || return 4
184 run_test 11 "wake up a thead waiting for completion after eviction (b=2460)"
188 $LCTL mark multiop $MOUNT/$tfile OS_c
189 do_facet mds "sysctl -w lustre.fail_loc=0x115"
190 clear_failloc mds $((TIMEOUT * 2)) &
191 multiop $MOUNT/$tfile OS_c &
193 #define OBD_FAIL_MDS_CLOSE_NET 0x115
196 echo "waiting for multiop $PID"
197 wait $PID || return 2
198 do_facet client munlink $MOUNT/$tfile || return 3
200 run_test 12 "recover from timed out resend in ptlrpcd (b=2494)"
202 # Bug 113, check that readdir lost recv timeout works.
204 mkdir /mnt/lustre/readdir
205 touch /mnt/lustre/readdir/newentry
206 # OBD_FAIL_MDS_READPAGE_NET|OBD_FAIL_ONCE
207 do_facet mds "sysctl -w lustre.fail_loc=0x80000104"
208 ls /mnt/lustre/readdir || return 1
209 do_facet mds "sysctl -w lustre.fail_loc=0"
210 rm -rf /mnt/lustre/readdir
212 run_test 13 "mdc_readpage restart test (bug 1138)"
214 # Bug 113, check that readdir lost send timeout works.
216 mkdir /mnt/lustre/readdir
217 touch /mnt/lustre/readdir/newentry
218 # OBD_FAIL_MDS_SENDPAGE|OBD_FAIL_ONCE
219 do_facet mds "sysctl -w lustre.fail_loc=0x80000106"
220 ls /mnt/lustre/readdir || return 1
221 do_facet mds "sysctl -w lustre.fail_loc=0"
223 run_test 14 "mdc_readpage resend test (bug 1138)"
226 do_facet mds "sysctl -w lustre.fail_loc=0x80000128"
227 touch $DIR/$tfile && return 1
230 run_test 15 "failed open (-ENOMEM)"
233 # OBD_FAIL_PTLRPC_BULK_PUT_NET | OBD_FAIL_ONCE
234 do_facet client cp /etc/termcap $MOUNT
237 sysctl -w lustre.fail_loc=0x80000504
239 # wil get evicted here
240 do_facet client "diff /etc/termcap $MOUNT/termcap" && return 1
241 sysctl -w lustre.fail_loc=0
242 do_facet client "diff /etc/termcap $MOUNT/termcap" || return 2
245 run_test 16 "timeout bulk put, evict client (2732)"
248 # OBD_FAIL_PTLRPC_BULK_GET_NET | OBD_FAIL_ONCE
249 # wil get evicted here
250 sysctl -w lustre.fail_loc=0x80000503
251 do_facet client cp /etc/termcap $MOUNT && return 1
253 do_facet client "diff /etc/termcap $MOUNT/termcap" && return 1
254 sysctl -w lustre.fail_loc=0
255 do_facet client "diff /etc/termcap $MOUNT/termcap" || return 2
258 run_test 17 "timeout bulk get, evict client (2732)"
261 # OBD_FAIL_PTLRPC_BULK_PUT_NET|OBD_FAIL_ONCE
262 do_facet client mkdir -p $MOUNT/$tdir
263 f=$MOUNT/$tdir/$tfile
264 f2=$MOUNT/$tdir/${tfile}-2
267 for a in /proc/fs/lustre/llite/*/dump_page_cache; do
268 if [ `wc -l $a | awk '{print $1}'` -gt 1 ]; then
269 echo there is still data in page cache $a ?
275 # shouldn't have to set stripe size of count==1
276 lfs setstripe $f $((128 * 1024)) 0 1
277 lfs setstripe $f2 $((128 * 1024)) 0 1
279 do_facet client cp /etc/termcap $f
281 # just use this write to trigger the client's eviction from the ost
282 sysctl -w lustre.fail_loc=0x80000503
283 do_facet client dd if=/dev/zero of=$f2 bs=4k count=1
285 sysctl -w lustre.fail_loc=0
286 # allow recovery to complete
288 # my understanding is that there should be nothing in the page
289 # cache after the client reconnects?
290 for a in /proc/fs/lustre/llite/*/dump_page_cache; do
291 if [ `wc -l $a | awk '{print $1}'` -gt 1 ]; then
292 echo there is still data in page cache $a ?
298 run_test 18 "eviction and reconnect clears page cache (2766)"