.SH OPTIONS
.TP
\fB\-a\fR, \fB\-\-advice\fR=\fIADVICE\fR
-Give advice or hint of type \fIADVICE\fR.
+Give advice or hint of type \fIADVICE\fR. Advice types are:
+.RS 1.2i
+.TP
+\fBwillread\fR to prefetch data into server cache
+.RE
.TP
\fB\-b\fR, \fB\-\-background
Enable the advices to be sent and handled asynchronously.
fadvise() is only a client side mechanism that does not pass the advice to the
filesystem, while ladvise can send advices or hints to Lustre server sides.
+.SH EXAMPLES
+.TP
+.B $ lfs ladvise -a willread -s 0 -e 1048576000 /mnt/lustre/file1
+This gives the OST(s) holding the first 1GB of \fB/mnt/lustre/file1\fR a hint
+that the first 1GB of the file will be read soon.
.SH AVAILABILITY
The lfs ladvise command is part of the Lustre filesystem.
.SH SEE ALSO
enum lu_ladvise_type {
LU_LADVISE_INVALID = 0,
+ LU_LADVISE_WILLREAD = 1,
};
#define LU_LADVISE_NAMES { \
+ [LU_LADVISE_WILLREAD] = "willread", \
}
/* This is the userspace argument for ladvise. It is currently the same as
#define OBD_FAIL_OST_READ_SIZE 0x234
#define OBD_FAIL_OST_LADVISE_NET 0x235
#define OBD_FAIL_OST_PAUSE_PUNCH 0x236
+#define OBD_FAIL_OST_LADVISE_PAUSE 0x237
#define OBD_FAIL_LDLM 0x300
#define OBD_FAIL_LDLM_NAMESPACE_NEW 0x301
return rc;
}
+static int ofd_ladvise_prefetch(const struct lu_env *env,
+ struct ofd_object *fo,
+ __u64 start, __u64 end)
+{
+ struct ofd_thread_info *info = ofd_info(env);
+ pgoff_t start_index, end_index, pages;
+ struct niobuf_remote rnb;
+ unsigned long nr_local;
+ struct niobuf_local *lnb;
+ int rc = 0;
+
+ if (end <= start)
+ RETURN(-EINVAL);
+
+ OBD_ALLOC_LARGE(lnb, sizeof(*lnb) * PTLRPC_MAX_BRW_PAGES);
+ if (lnb == NULL)
+ RETURN(-ENOMEM);
+
+ ofd_read_lock(env, fo);
+ if (!ofd_object_exists(fo))
+ GOTO(out_unlock, rc = -ENOENT);
+
+ rc = ofd_attr_get(env, fo, &info->fti_attr);
+ if (rc)
+ GOTO(out_unlock, rc);
+
+ if (end > info->fti_attr.la_size)
+ end = info->fti_attr.la_size;
+
+ if (end == 0)
+ GOTO(out_unlock, rc);
+
+ /* We need page aligned offset and length */
+ start_index = start >> PAGE_CACHE_SHIFT;
+ end_index = (end - 1) >> PAGE_CACHE_SHIFT;
+ pages = end_index - start_index + 1;
+ while (pages > 0) {
+ nr_local = pages <= PTLRPC_MAX_BRW_PAGES ? pages :
+ PTLRPC_MAX_BRW_PAGES;
+ rnb.rnb_offset = start_index << PAGE_CACHE_SHIFT;
+ rnb.rnb_len = nr_local << PAGE_CACHE_SHIFT;
+ rc = dt_bufs_get(env, ofd_object_child(fo), &rnb, lnb, 0);
+ if (unlikely(rc < 0))
+ break;
+ nr_local = rc;
+ rc = dt_read_prep(env, ofd_object_child(fo), lnb, nr_local);
+ dt_bufs_put(env, ofd_object_child(fo), lnb, nr_local);
+ if (unlikely(rc))
+ break;
+ start_index += nr_local;
+ pages -= nr_local;
+ }
+
+out_unlock:
+ ofd_read_unlock(env, fo);
+ OBD_FREE_LARGE(lnb, sizeof(*lnb) * PTLRPC_MAX_BRW_PAGES);
+ RETURN(rc);
+}
+
/**
* OFD request handler for OST_LADVISE RPC.
*
struct lu_ladvise *ladvise;
int num_advise;
struct ladvise_hdr *ladvise_hdr;
+ struct obd_ioobj ioo;
+ struct lustre_handle lockh = { 0 };
+ __u64 flags = 0;
int i;
ENTRY;
+ CFS_FAIL_TIMEOUT(OBD_FAIL_OST_LADVISE_PAUSE, cfs_fail_val);
body = tsi->tsi_ost_body;
if ((body->oa.o_valid & OBD_MD_FLID) != OBD_MD_FLID)
num_advise = req_capsule_get_size(&req->rq_pill,
&RMF_OST_LADVISE, RCL_CLIENT) /
- sizeof(*ladvise);
+ sizeof(*ladvise);
if (num_advise < ladvise_hdr->lah_count)
RETURN(err_serious(-EPROTO));
default:
rc = -ENOTSUPP;
break;
+ case LU_LADVISE_WILLREAD:
+ ioo.ioo_oid = body->oa.o_oi;
+ ioo.ioo_bufcnt = 1;
+ rc = tgt_extent_lock(exp->exp_obd->obd_namespace,
+ &tsi->tsi_resid,
+ ladvise->lla_start,
+ ladvise->lla_end - 1,
+ &lockh, LCK_PR, &flags);
+ if (rc != 0)
+ break;
+
+ req->rq_status = ofd_ladvise_prefetch(env,
+ fo,
+ ladvise->lla_start,
+ ladvise->lla_end);
+ tgt_extent_unlock(&lockh, LCK_PR);
+ break;
}
if (rc != 0)
break;
(long long)(int)offsetof(struct lu_ladvise, lla_value4));
LASSERTF((int)sizeof(((struct lu_ladvise *)0)->lla_value4) == 4, "found %lld\n",
(long long)(int)sizeof(((struct lu_ladvise *)0)->lla_value4));
+ LASSERTF(LU_LADVISE_WILLREAD == 1, "found %lld\n",
+ (long long)LU_LADVISE_WILLREAD);
/* Checks for struct ladvise_hdr */
LASSERTF(LADVISE_MAGIC == 0x1ADF1CE0, "found 0x%.8x\n",
(long long)(int)offsetof(struct ladvise_hdr, lah_advise));
LASSERTF((int)sizeof(((struct ladvise_hdr *)0)->lah_advise) == 0, "found %lld\n",
(long long)(int)sizeof(((struct ladvise_hdr *)0)->lah_advise));
+ LASSERTF(LF_ASYNC == 0x00000001UL, "found 0x%.8xUL\n",
+ (unsigned)LF_ASYNC);
}
}
run_test 254 "Check changelog size"
+ladvise_no_type()
+{
+ local type=$1
+ local file=$2
+
+ lfs ladvise -a invalid $file 2>&1 | grep "Valid types" |
+ awk -F: '{print $2}' | grep $type > /dev/null
+ if [ $? -ne 0 ]; then
+ return 0
+ fi
+ return 1
+}
+
+ladvise_no_ioctl()
+{
+ local file=$1
+
+ lfs ladvise -a willread $file > /dev/null 2>&1
+ if [ $? -eq 0 ]; then
+ return 1
+ fi
+
+ lfs ladvise -a willread $file 2>&1 |
+ grep "Inappropriate ioctl for device" > /dev/null
+ if [ $? -eq 0 ]; then
+ return 0
+ fi
+ return 1
+}
+
+ladvise_willread_performance()
+{
+ local repeat=10
+ local average_cache=0
+ local average_ladvise=0
+ for ((i = 1; i <= $repeat; i++)); do
+ echo "Iter $i/$repeat: reading without willread hint"
+ cancel_lru_locks osc
+ do_nodes $(comma_list $(osts_nodes)) \
+ "echo 3 > /proc/sys/vm/drop_caches"
+ local speed_origin=$($READS -f $DIR/$tfile -s $size \
+ -b 4096 -n $((size / 4096)) -t 60 |
+ sed -e '/^$/d' -e 's#.*s, ##' -e 's#MB/s##')
+
+ echo "Iter $i/$repeat: Reading again without willread hint"
+ cancel_lru_locks osc
+ local speed_cache=$($READS -f $DIR/$tfile -s $size \
+ -b 4096 -n $((size / 4096)) -t 60 |
+ sed -e '/^$/d' -e 's#.*s, ##' -e 's#MB/s##')
+
+ echo "Iter $i/$repeat: reading with willread hint"
+ cancel_lru_locks osc
+ do_nodes $(comma_list $(osts_nodes)) \
+ "echo 3 > /proc/sys/vm/drop_caches"
+ lfs ladvise -a willread $DIR/$tfile ||
+ error "Ladvise failed"
+ local speed_ladvise=$($READS -f $DIR/$tfile -s $size \
+ -b 4096 -n $((size / 4096)) -t 60 |
+ sed -e '/^$/d' -e 's#.*s, ##' -e 's#MB/s##')
+
+ local cache_speedup=$(echo "scale=2; \
+ ($speed_cache-$speed_origin)/$speed_origin*100" | bc)
+ cache_speedup=$(echo ${cache_speedup%.*})
+ echo "Iter $i/$repeat: cache speedup: $cache_speedup%"
+ average_cache=$((average_cache + cache_speedup))
+
+ local ladvise_speedup=$(echo "scale=2; \
+ ($speed_ladvise-$speed_origin)/$speed_origin*100" | bc)
+ ladvise_speedup=$(echo ${ladvise_speedup%.*})
+ echo "Iter $i/$repeat: ladvise speedup: $ladvise_speedup%"
+ average_ladvise=$((average_ladvise + ladvise_speedup))
+ done
+ average_cache=$((average_cache / repeat))
+ average_ladvise=$((average_ladvise / repeat))
+
+ if [ $average_cache -lt 20 ]; then
+ echo "Speedup with cache is less than 20% ($average_cache%),"\
+ "skipping check of speedup with willread:"\
+ "$average_ladvise%"
+ return 0
+ fi
+
+ local lowest_speedup=$((average_cache / 2))
+ [ $average_ladvise -gt $lowest_speedup ] ||
+ error "Speedup with willread is less than $lowest_speedup%,"\
+ "got $average_ladvise%"
+ echo "Speedup with willread ladvise: $average_ladvise%"
+ echo "Speedup with cache: $average_cache%"
+}
+
+test_255a() {
+ lfs setstripe -c -1 -i 0 $DIR/$tfile || error "$tfile failed"
+
+ ladvise_no_type willread $DIR/$tfile &&
+ skip "willread ladvise is not supported" && return
+
+ ladvise_no_ioctl $DIR/$tfile &&
+ skip "ladvise ioctl is not supported" && return
+
+ [ $(lustre_version_code ost1) -lt $(version_code 2.8.54) ] &&
+ skip "lustre < 2.8.54 does not support ladvise " && return
+
+ local size_mb=100
+ local size=$((size_mb * 1048576))
+ dd if=/dev/zero of=$DIR/$tfile bs=1048576 count=$size_mb ||
+ error "dd to $DIR/$tfile failed"
+
+ lfs ladvise -a willread $DIR/$tfile ||
+ error "Ladvise failed with no range argument"
+
+ lfs ladvise -a willread -s 0 $DIR/$tfile ||
+ error "Ladvise failed with no -l or -e argument"
+
+ lfs ladvise -a willread -e 1 $DIR/$tfile ||
+ error "Ladvise failed with only -e argument"
+
+ lfs ladvise -a willread -l 1 $DIR/$tfile ||
+ error "Ladvise failed with only -l argument"
+
+ lfs ladvise -a willread -s 2 -e 1 $DIR/$tfile &&
+ error "End offset should not be smaller than start offset"
+
+ lfs ladvise -a willread -s 2 -e 2 $DIR/$tfile &&
+ error "End offset should not be equal to start offset"
+
+ lfs ladvise -a willread -s $size -l 1 $DIR/$tfile ||
+ error "Ladvise failed with overflowing -s argument"
+
+ lfs ladvise -a willread -s 1 -e $((size + 1)) $DIR/$tfile ||
+ error "Ladvise failed with overflowing -e argument"
+
+ lfs ladvise -a willread -s 1 -l $size $DIR/$tfile ||
+ error "Ladvise failed with overflowing -l argument"
+
+ lfs ladvise -a willread -l 1 -e 2 $DIR/$tfile &&
+ error "Ladvise succeeded with conflicting -l and -e arguments"
+
+ echo "Synchronous ladvise should wait"
+ local delay=4
+#define OBD_FAIL_OST_LADVISE_PAUSE 0x237
+ do_nodes $(comma_list $(osts_nodes)) \
+ $LCTL set_param fail_val=$delay fail_loc=0x237
+
+ local start_ts=$SECONDS
+ lfs ladvise -a willread $DIR/$tfile ||
+ error "Ladvise failed with no range argument"
+ local end_ts=$SECONDS
+ local inteval_ts=$((end_ts - start_ts))
+
+ if [ $inteval_ts -lt $(($delay - 1)) ]; then
+ error "Synchronous advice didn't wait reply"
+ fi
+
+ echo "Asynchronous ladvise shouldn't wait"
+ local start_ts=$SECONDS
+ lfs ladvise -a willread -b $DIR/$tfile ||
+ error "Ladvise failed with no range argument"
+ local end_ts=$SECONDS
+ local inteval_ts=$((end_ts - start_ts))
+
+ if [ $inteval_ts -gt $(($delay / 2)) ]; then
+ error "Asynchronous advice blocked"
+ fi
+
+ do_nodes $(comma_list $(osts_nodes)) $LCTL set_param fail_loc=0
+ ladvise_willread_performance
+}
+run_test 255a "check 'lfs ladvise -a willread'"
+
test_256() {
local cl_user
local cat_sl
CHECK_MEMBER(lu_ladvise, lla_end);
CHECK_MEMBER(lu_ladvise, lla_value3);
CHECK_MEMBER(lu_ladvise, lla_value4);
+ CHECK_VALUE(LU_LADVISE_WILLREAD);
CHECK_VALUE(LF_ASYNC);
CHECK_VALUE(LADVISE_MAGIC);
(long long)(int)offsetof(struct lu_ladvise, lla_value4));
LASSERTF((int)sizeof(((struct lu_ladvise *)0)->lla_value4) == 4, "found %lld\n",
(long long)(int)sizeof(((struct lu_ladvise *)0)->lla_value4));
+ LASSERTF(LU_LADVISE_WILLREAD == 1, "found %lld\n",
+ (long long)LU_LADVISE_WILLREAD);
/* Checks for struct ladvise_hdr */
LASSERTF(LADVISE_MAGIC == 0x1ADF1CE0, "found 0x%.8x\n",
(long long)(int)offsetof(struct ladvise_hdr, lah_advise));
LASSERTF((int)sizeof(((struct ladvise_hdr *)0)->lah_advise) == 0, "found %lld\n",
(long long)(int)sizeof(((struct ladvise_hdr *)0)->lah_advise));
+ LASSERTF(LF_ASYNC == 0x00000001UL, "found 0x%.8xUL\n",
+ (unsigned)LF_ASYNC);
}