more information, please refer to bugzilla 17630.
Severity : enhancement
+Bugzilla : 17817
+Description: Make read-ahead stripe size aligned.
+
+Severity : enhancement
Bugzilla : 17536
Description: MDS create should not wait for statfs RPC while holding DLM lock.
#define KEY_ASYNC "async"
#define KEY_CAPA_KEY "capa_key"
#define KEY_GRANT_SHRINK "grant_shrink"
+#define KEY_OFF_RPCSIZE "off_rpcsize"
struct obd_ops {
struct module *o_owner;
int lvfs_check_io_health(struct obd_device *obd, struct file *file);
/* Requests for obd_extent_calc() */
-#define OBD_CALC_STRIPE_START 1
-#define OBD_CALC_STRIPE_END 2
+#define OBD_CALC_STRIPE_START 0x0001
+#define OBD_CALC_STRIPE_END 0x0010
+#define OBD_CALC_STRIPE_RPC_ALIGN 0x0100
+
+#define OBD_CALC_STRIPE_RPC_START_ALIGN (OBD_CALC_STRIPE_START | \
+ OBD_CALC_STRIPE_RPC_ALIGN)
+#define OBD_CALC_STRIPE_RPC_END_ALIGN (OBD_CALC_STRIPE_START | \
+ OBD_CALC_STRIPE_RPC_ALIGN)
static inline void obd_transno_commit_cb(struct obd_device *obd, __u64 transno,
struct obd_export *exp, int error)
/* initialize read-ahead window once per syscall */
if (ra == 0) {
ra = 1;
- bead.lrr_start = *ppos >> CFS_PAGE_SHIFT;
- bead.lrr_count = (count + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT;
- ll_ra_read_in(file, &bead);
+ ll_ra_read_init(file, &bead, *ppos, count);
}
/* BUG: 5972 */
CDEBUG(D_INFO, "Send ino %lu, "LPSZ" bytes, offset %lld, i_size %llu\n",
inode->i_ino, count, *ppos, i_size_read(inode));
- bead.lrr_start = *ppos >> CFS_PAGE_SHIFT;
- bead.lrr_count = (count + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT;
- ll_ra_read_in(in_file, &bead);
+ ll_ra_read_init(in_file, &bead, *ppos, count);
/* BUG: 5972 */
file_accessed(in_file);
rc = generic_file_sendfile(in_file, ppos, count, actor, target);
CDEBUG(D_INFO, "Send ino %lu, "LPSZ" bytes, offset %lld, i_size %llu\n",
inode->i_ino, count, *ppos, i_size_read(inode));
- bead.lrr_start = *ppos >> CFS_PAGE_SHIFT;
- bead.lrr_count = (count + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT;
- ll_ra_read_in(in_file, &bead);
+ ll_ra_read_init(in_file, &bead, *ppos, count);
/* BUG: 5972 */
file_accessed(in_file);
rc = generic_file_splice_read(in_file, ppos, pipe, count, flags);
#define ll_unregister_cache(cache) do {} while (0)
#endif
-void ll_ra_read_in(struct file *f, struct ll_ra_read *rar);
+void ll_ra_read_init(struct file *f, struct ll_ra_read *rar,
+ loff_t offset, size_t count);
void ll_ra_read_ex(struct file *f, struct ll_ra_read *rar);
struct ll_ra_read *ll_ra_read_get(struct file *f);
return &fd->fd_ras;
}
-void ll_ra_read_in(struct file *f, struct ll_ra_read *rar)
+void ll_ra_read_init(struct file *f, struct ll_ra_read *rar,
+ loff_t offset, size_t count)
{
struct ll_readahead_state *ras;
ras = ll_ras_get(f);
+ rar->lrr_start = offset >> CFS_PAGE_SHIFT;
+ rar->lrr_count = (count + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT;
+
spin_lock(&ras->ras_lock);
ras->ras_requests++;
ras->ras_request_index = 0;
ria->ria_start, ria->ria_end, ria->ria_stoff, ria->ria_length,\
ria->ria_pages)
-#define RAS_INCREASE_STEP (1024 * 1024 >> CFS_PAGE_SHIFT)
+#define INIT_RAS_WINDOW_PAGES PTLRPC_MAX_BRW_PAGES
static inline int stride_io_mode(struct ll_readahead_state *ras)
{
/* Enlarge the RA window to encompass the full read */
if (bead != NULL && ras->ras_window_start + ras->ras_window_len <
bead->lrr_start + bead->lrr_count) {
- ras->ras_window_len = bead->lrr_start + bead->lrr_count -
+ obd_off read_end = (bead->lrr_start + bead->lrr_count) <<
+ CFS_PAGE_SHIFT;
+ obd_extent_calc(exp, lsm, OBD_CALC_STRIPE_RPC_END_ALIGN,
+ &read_end);
+ ras->ras_window_len = ((read_end + 1) >> CFS_PAGE_SHIFT) -
ras->ras_window_start;
}
/* Reserve a part of the read-ahead window that we'll be issuing */
static void ras_set_start(struct ll_readahead_state *ras, unsigned long index)
{
- ras->ras_window_start = index & (~(RAS_INCREASE_STEP - 1));
+ ras->ras_window_start = index & (~(INIT_RAS_WINDOW_PAGES - 1));
}
/* called with the ras_lock held or from places where it doesn't matter */
RAS_CDEBUG(ras);
}
+static void ras_increase_window(struct ll_readahead_state *ras,
+ struct ll_ra_info *ra, struct inode *inode)
+{
+ __u64 step;
+ __u32 size;
+ int rc;
+
+ step = ((loff_t)(ras->ras_window_start +
+ ras->ras_window_len)) << CFS_PAGE_SHIFT;
+ size = sizeof(step);
+ /*Get rpc_size for this offset (step) */
+ rc = obd_get_info(ll_i2obdexp(inode), sizeof(KEY_OFF_RPCSIZE),
+ KEY_OFF_RPCSIZE, &size, &step,
+ ll_i2info(inode)->lli_smd);
+ if (rc)
+ step = INIT_RAS_WINDOW_PAGES;
+
+ if (stride_io_mode(ras))
+ ras_stride_increase_window(ras, ra, (unsigned long)step);
+ else
+ ras->ras_window_len = min(ras->ras_window_len + (unsigned long)step,
+ ra->ra_max_pages);
+}
+
static void ras_update(struct ll_sb_info *sbi, struct inode *inode,
struct ll_readahead_state *ras, unsigned long index,
unsigned hit)
/* Trigger RA in the mmap case where ras_consecutive_requests
* is not incremented and thus can't be used to trigger RA */
if (!ras->ras_window_len && ras->ras_consecutive_pages == 4) {
- ras->ras_window_len = RAS_INCREASE_STEP;
+ ras->ras_window_len = INIT_RAS_WINDOW_PAGES;
GOTO(out_unlock, 0);
}
* uselessly reading and discarding pages for random IO the window is
* only increased once per consecutive request received. */
if ((ras->ras_consecutive_requests > 1 &&
- !ras->ras_request_index) || stride_detect) {
- if (stride_io_mode(ras))
- ras_stride_increase_window(ras, ra, RAS_INCREASE_STEP);
- else
- ras->ras_window_len = min(ras->ras_window_len +
- RAS_INCREASE_STEP,
- ra->ra_max_pages);
- }
+ !ras->ras_request_index) || stride_detect)
+ ras_increase_window(ras, ra, inode);
EXIT;
out_unlock:
RAS_CDEBUG(ras);
} else if (KEY_IS(KEY_FIEMAP)) {
rc = lov_fiemap(lov, keylen, key, vallen, val, lsm);
GOTO(out, rc);
- }
+ } else if (KEY_IS(KEY_OFF_RPCSIZE)) {
+ __u64 *offset = val;
+ struct lov_tgt_desc *tgt;
+ struct lov_oinfo *loi;
+ int stripe;
+
+ LASSERT(*vallen == sizeof(__u64));
+ stripe = lov_stripe_number(lsm, *offset);
+ loi = lsm->lsm_oinfo[stripe];
+ tgt = lov->lov_tgts[loi->loi_ost_idx];
+ if (!tgt || !tgt->ltd_active)
+ GOTO(out, rc = -ESRCH);
+ rc = obd_get_info(tgt->ltd_exp, keylen, key, vallen, val, NULL);
+ GOTO(out, rc);
+ }
rc = -EINVAL;
out:
__u64 start;
__u32 ssize = lsm->lsm_stripe_size;
+ if (cmd & OBD_CALC_STRIPE_RPC_ALIGN)
+ ssize = ssize > PTLRPC_MAX_BRW_SIZE ?
+ PTLRPC_MAX_BRW_SIZE : ssize;
+
start = *offset;
do_div(start, ssize);
start = start * ssize;
CDEBUG(D_DLMTRACE, "offset "LPU64", stripe %u, start "LPU64
", end "LPU64"\n", *offset, ssize, start, start + ssize - 1);
- if (cmd == OBD_CALC_STRIPE_END) {
+ if (cmd & OBD_CALC_STRIPE_END)
*offset = start + ssize - 1;
- } else if (cmd == OBD_CALC_STRIPE_START) {
+ else if (cmd & OBD_CALC_STRIPE_START)
*offset = start;
- } else {
+ else
LBUG();
- }
RETURN(0);
}
*vallen = sizeof(*stripe);
*stripe = 0;
RETURN(0);
- } else if (KEY_IS(KEY_LAST_ID)) {
+ } else if (KEY_IS(KEY_OFF_RPCSIZE)) {
+ struct client_obd *cli = &exp->exp_obd->u.cli;
+ __u64 *rpcsize = val;
+ LASSERT(*vallen == sizeof(__u64));
+ *rpcsize = (__u64)cli->cl_max_pages_per_rpc;
+ RETURN(0);
+ } else if (KEY_IS(KEY_LAST_ID)) {
struct ptlrpc_request *req;
obd_id *reply;
char *bufs[2] = { NULL, key };
STRIPE_COUNT=$OSTCOUNT
STRIPE_OFFSET=0
- trap cleanup_101b EXIT
+ trap cleanup_101 EXIT
# prepare the read-ahead file
$SETSTRIPE $DIR/$tfile -s $STRIPE_SIZE -i $STRIPE_OFFSET -c $OSTCOUNT
SETUP_TEST101b=yes
}
-cleanup_101b() {
+cleanup_101() {
trap 0
rm -rf $DIR/$tdir $DIR/$tfile
SETUP_TEST101b=no
cancel_lru_locks osc
ra_check_101b $BSIZE
done
- cleanup_101b
true
}
run_test 101b "check stride-io mode read-ahead ================="
+
+test_101c() {
+ local STRIPE_SIZE=1048576
+ local FILE_LENGTH=$((STRIPE_SIZE*100))
+ local nreads=10000
+
+ setup_test101
+
+ cancel_lru_locks osc
+ $LCTL set_param osc.*.rpc_stats 0
+ $READS -f $DIR/$tfile -s$FILE_LENGTH -b65536 -n$nreads -t 180
+ for OSC in `$LCTL get_param -N osc.*`
+ do
+ if [ "$OSC" == "osc.num_refs" ]; then
+ continue
+ fi
+ lines=`$LCTL get_param -n ${OSC}.rpc_stats | wc | awk '{print $1}'`
+ if [ $lines -le 20 ]; then
+ continue
+ fi
+
+ rpc4k=$($LCTL get_param -n $OSC | awk '$1 == "1:" { print $2; exit; }')
+ rpc8k=$($LCTL get_param -n $OSC | awk '$1 == "2:" { print $2; exit; }')
+ rpc16k=$($LCTL get_param -n $OSC | awk '$1 == "4:" { print $2; exit; }')
+ rpc32k=$($LCTL get_param -n $OSC | awk '$1 == "8:" { print $2; exit; }')
+
+ [ $rpc4k != 0 ] && error "Small 4k read IO ${rpc4k}!"
+ [ $rpc8k != 0 ] && error "Small 8k read IO ${rpc8k}!"
+ [ $rpc16k != 0 ] && error "Small 16k read IO ${rpc16k}!"
+ [ $rpc32k != 0 ] && error "Small 32k read IO ${rpc32k}!"
+
+ echo "Small rpc check passed!"
+ rpc64k=$($LCTL get_param -n $OSC | awk '$1 == "16:" { print $2; exit; }')
+ rpc128k=$($LCTL get_param -n $OSC | awk '$1 == "32:" { print $2; exit; }')
+ rpc256k=$($LCTL get_param -n $OSC | awk '$1 == "64:" { print $2; exit; }')
+ rpc512k=$($LCTL get_param -n $OSC | awk '$1 == "128:" { print $2; exit; }')
+ rpc1024k=$($LCTL get_param -n $OSC | awk '$1 == "256:" { print $2; exit; }')
+
+ [ $rpc64k == 0 ] && error "No 64k readahead IO ${rpc64k}"
+ [ $rpc128k == 0 ] && error "No 128k readahead IO ${rpc128k}"
+ [ $rpc256k == 0 ] && error "No 256k readahead IO ${rpc256k}"
+ [ $rpc512k == 0 ] && error "No 512k readahead IO ${rpc256k}"
+ [ $rpc1024k == 0 ] && error "No 1024k readahead IO ${rpc1024k}"
+ echo "Big rpc check passed!"
+ done
+ cleanup_101
+ true
+}
+run_test 101c "check stripe_size aligned read-ahead ================="
export SETUP_TEST102=no
setup_test102() {