In ll_do_fast_read, looking up a cl_env adds some overhead,
and can also cause spinlock contention on older kernels.
Fast read can safely use the preallocated percpu cl_env, so
do that to reduce overhead.
SLES numbers on recent Xeon, CentOS numbers on VMs on
older hardware. SLES has queued spinlocks and scales
perfectly with multiple threads, with or without this
patch. CentOS scales poorly at small I/O sizes without
this patch.
SLES is SLES12SP2, CentOS is CentOS 7.3.
SLES:
1 thread
8b 1K 1M
Without: 23 2200 6800
With: 27.5 2500 7200
4 threads
8b 1K 1M
Without: 90 8700 27000
With: 108 10000 28000
Earlier kernel (CentOS 7.3):
1 thread
8b 1K 1M
Without: 9 1000 5100
with: 12 1300 5800
4 threads
8b 1K 1M
Without: 22 2400 17000
With: 48 4900 20000
Signed-off-by: Patrick Farrell <paf@cray.com>
Change-Id: Ice5d653ace5ce76bc8911501a9b15c11b7a3234a
Reviewed-on: https://review.whamcloud.com/27970
Tested-by: Jenkins
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Jinshan Xiong <jinshan.xiong@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Dmitry Eremin <dmitry.eremin@intel.com>
* \retval - number of bytes have been read, or error code if error occurred.
*/
static ssize_t
* \retval - number of bytes have been read, or error code if error occurred.
*/
static ssize_t
-ll_do_fast_read(const struct lu_env *env, struct kiocb *iocb,
- struct iov_iter *iter)
+ll_do_fast_read(struct kiocb *iocb, struct iov_iter *iter)
if (iocb->ki_filp->f_flags & O_DIRECT)
return 0;
if (iocb->ki_filp->f_flags & O_DIRECT)
return 0;
- ll_cl_add(iocb->ki_filp, env, NULL, LCC_RW);
result = generic_file_read_iter(iocb, iter);
result = generic_file_read_iter(iocb, iter);
- ll_cl_remove(iocb->ki_filp, env);
/* If the first page is not in cache, generic_file_aio_read() will be
* returned with -ENODATA.
/* If the first page is not in cache, generic_file_aio_read() will be
* returned with -ENODATA.
ssize_t rc2;
__u16 refcheck;
ssize_t rc2;
__u16 refcheck;
+ result = ll_do_fast_read(iocb, to);
+ if (result < 0 || iov_iter_count(to) == 0)
+ GOTO(out, result);
+
env = cl_env_get(&refcheck);
if (IS_ERR(env))
return PTR_ERR(env);
env = cl_env_get(&refcheck);
if (IS_ERR(env))
return PTR_ERR(env);
- result = ll_do_fast_read(env, iocb, to);
- if (result < 0 || iov_iter_count(to) == 0)
- GOTO(out, result);
-
args = ll_env_args(env, IO_NORMAL);
args->u.normal.via_iter = to;
args->u.normal.via_iocb = iocb;
args = ll_env_args(env, IO_NORMAL);
args->u.normal.via_iter = to;
args->u.normal.via_iocb = iocb;
else if (result == 0)
result = rc2;
else if (result == 0)
result = rc2;
cl_env_put(env, &refcheck);
cl_env_put(env, &refcheck);
static ssize_t ll_file_read(struct file *file, char __user *buf, size_t count,
loff_t *ppos)
{
static ssize_t ll_file_read(struct file *file, char __user *buf, size_t count,
loff_t *ppos)
{
struct iovec iov = { .iov_base = buf, .iov_len = count };
struct iovec iov = { .iov_base = buf, .iov_len = count };
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- RETURN(PTR_ERR(env));
-
- kiocb = &ll_env_info(env)->lti_kiocb;
- init_sync_kiocb(kiocb, file);
- kiocb->ki_pos = *ppos;
+ init_sync_kiocb(&kiocb, file);
+ kiocb.ki_pos = *ppos;
#ifdef HAVE_KIOCB_KI_LEFT
#ifdef HAVE_KIOCB_KI_LEFT
- kiocb->ki_left = count;
#elif defined(HAVE_KI_NBYTES)
#elif defined(HAVE_KI_NBYTES)
- kiocb->ki_nbytes = count;
+ kiocb.i_nbytes = count;
- result = ll_file_aio_read(kiocb, &iov, 1, kiocb->ki_pos);
- *ppos = kiocb->ki_pos;
+ result = ll_file_aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
+ *ppos = kiocb.ki_pos;
- cl_env_put(env, &refcheck);
struct inode *inode = file_inode(file);
struct cl_object *clob = ll_i2info(inode)->lli_clob;
struct ll_cl_context *lcc;
struct inode *inode = file_inode(file);
struct cl_object *clob = ll_i2info(inode)->lli_clob;
struct ll_cl_context *lcc;
- const struct lu_env *env;
- struct cl_io *io;
+ const struct lu_env *env = NULL;
+ struct cl_io *io = NULL;
struct cl_page *page;
int result;
ENTRY;
lcc = ll_cl_find(file);
struct cl_page *page;
int result;
ENTRY;
lcc = ll_cl_find(file);
- if (lcc == NULL) {
- unlock_page(vmpage);
- RETURN(-EIO);
+ if (lcc != NULL) {
+ env = lcc->lcc_env;
+ io = lcc->lcc_io;
- env = lcc->lcc_env;
- io = lcc->lcc_io;
if (io == NULL) { /* fast read */
struct inode *inode = file_inode(file);
struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
struct ll_readahead_state *ras = &fd->fd_ras;
if (io == NULL) { /* fast read */
struct inode *inode = file_inode(file);
struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
struct ll_readahead_state *ras = &fd->fd_ras;
+ struct lu_env *local_env = NULL;
struct vvp_page *vpg;
result = -ENODATA;
struct vvp_page *vpg;
result = -ENODATA;
+ if (!env) {
+ local_env = cl_env_percpu_get();
+ env = local_env;
+ }
+
vpg = cl2vvp_page(cl_object_page_slice(page->cp_obj, page));
if (vpg->vpg_defer_uptodate) {
enum ras_update_flags flags = LL_RAS_HIT;
vpg = cl2vvp_page(cl_object_page_slice(page->cp_obj, page));
if (vpg->vpg_defer_uptodate) {
enum ras_update_flags flags = LL_RAS_HIT;
- if (lcc->lcc_type == LCC_MMAP)
+ if (lcc && lcc->lcc_type == LCC_MMAP)
flags |= LL_RAS_MMAP;
/* For fast read, it updates read ahead state only
flags |= LL_RAS_MMAP;
/* For fast read, it updates read ahead state only
+ /* release page refcount before unlocking the page to ensure
+ * the object won't be destroyed in the calling path of
+ * cl_page_put(). Please see comment in ll_releasepage(). */
+ unlock_page(vmpage);
+ if (local_env)
+ cl_env_percpu_put(local_env);
+