range_locked = false;
}
- /*
- * In order to move forward AIO, ci_nob was increased,
- * but that doesn't mean io have been finished, it just
- * means io have been submited, we will always return
- * EIOCBQUEUED to the caller, So we could only return
- * number of bytes in non-AIO case.
- */
if (io->ci_nob > 0) {
- if (!is_aio) {
- if (rc2 == 0) {
- result += io->ci_nob;
- *ppos = io->u.ci_wr.wr.crw_pos; /* for splice */
- } else if (rc2) {
- result = 0;
- }
+ if (rc2 == 0) {
+ result += io->ci_nob;
+ *ppos = io->u.ci_wr.wr.crw_pos; /* for splice */
+ } else if (rc2) {
+ result = 0;
}
count -= io->ci_nob;
}
if (io->ci_dio_aio) {
+ /* set the number of bytes successfully moved in the aio */
+ if (result > 0)
+ io->ci_dio_aio->cda_bytes = result;
/*
* VFS will call aio_complete() if no -EIOCBQUEUED
* is returned for AIO, so we can not call aio_complete()
- * in our end_io().
+ * in our end_io(). (cda_no_aio_complete is always set for
+ * normal DIO.)
*
- * NB: This is safe because the atomic_dec_and_lock in
- * cl_sync_io_init has implicit memory barriers, so this will
- * be seen by whichever thread completes the DIO/AIO, even if
- * it's not this one
+ * NB: Setting cda_no_aio_complete like this is safe because
+ * the atomic_dec_and_lock in cl_sync_io_note has implicit
+ * memory barriers, so this will be seen by whichever thread
+ * completes the DIO/AIO, even if it's not this one.
*/
- if (rc != -EIOCBQUEUED)
+ if (is_aio && rc != -EIOCBQUEUED)
io->ci_dio_aio->cda_no_aio_complete = 1;
+ /* if an aio enqueued successfully (-EIOCBQUEUED), then Lustre
+ * will call aio_complete rather than the vfs, so we return 0
+ * to tell the VFS we're handling it
+ */
+ else if (is_aio) /* rc == -EIOCBQUEUED */
+ result = 0;
/**
- * Drop one extra reference so that end_io() could be
- * called for this IO context, we could call it after
- * we make sure all AIO requests have been proceed.
+ * Drop the reference held by the llite layer on this top level
+ * IO context.
+ *
+ * For DIO, this frees it here, since IO is complete, and for
+ * AIO, we will call aio_complete() (and then free this top
+ * level context) once all the outstanding chunks of this AIO
+ * have completed.
*/
cl_sync_io_note(env, &io->ci_dio_aio->cda_sync,
rc == -EIOCBQUEUED ? 0 : rc);
}
run_test 398o "right kms with DIO"
+test_398p()
+{
+ (( $OSTCOUNT >= 2 )) || skip "needs >= 2 OSTs"
+ which aiocp || skip_env "no aiocp installed"
+
+ local stripe_size=$((1024 * 1024)) #1 MiB
+ # Max i/o below is ~ 4 * stripe_size, so this gives ~5 i/os
+ local file_size=$((25 * stripe_size))
+
+ $LFS setstripe -c 2 -S $stripe_size $DIR/$tfile.1
+ stack_trap "rm -f $DIR/$tfile*"
+ # Just a bit bigger than the largest size in the test set below
+ dd if=/dev/urandom bs=$file_size count=1 of=$DIR/$tfile.1 ||
+ error "buffered i/o to create file failed"
+
+ for bs in $PAGE_SIZE $((PAGE_SIZE * 4)) $stripe_size \
+ $((stripe_size * 4)); do
+
+ $LFS setstripe -c 2 -S $stripe_size $DIR/$tfile.2
+
+ echo "bs: $bs, file_size $file_size"
+ aiocp -a $PAGE_SIZE -b $bs -s $file_size -f O_DIRECT \
+ $DIR/$tfile.1 $DIR/$tfile.2 &
+ pid_dio1=$!
+ # Buffered I/O with similar but not the same block size
+ dd if=$DIR/$tfile.1 bs=$((bs * 2)) of=$DIR/$tfile.2 \
+ conv=notrunc &
+ pid_bio2=$!
+ wait $pid_dio1
+ rc1=$?
+ wait $pid_bio2
+ rc2=$?
+ if (( rc1 != 0 )); then
+ error "aio copy 1 w/bsize $bs failed: $rc1"
+ fi
+ if (( rc2 != 0 )); then
+ error "buffered copy 2 w/bsize $bs failed: $rc2"
+ fi
+
+ $CHECKSTAT -t file -s $file_size $DIR/$tfile.2 ||
+ error "size incorrect"
+ cmp --verbose $DIR/$tfile.1 $DIR/$tfile.2 ||
+ error "files differ, bsize $bs"
+ rm -f $DIR/$tfile.2
+ done
+}
+run_test 398p "race aio with buffered i/o"
+
test_fake_rw() {
local read_write=$1
if [ "$read_write" = "write" ]; then