o->od_read_cache = 1;
o->od_writethrough_cache = 1;
o->od_readcache_max_filesize = OSD_MAX_CACHE_SIZE;
-
+ o->od_readcache_max_iosize = OSD_READCACHE_MAX_IO_MB << 20;
+ o->od_writethrough_max_iosize = OSD_WRITECACHE_MAX_IO_MB << 20;
o->od_auto_scrub_interval = AS_DEFAULT;
cplen = strlcpy(o->od_svname, lustre_cfg_string(cfg, 4),
struct osd_obj_map *od_ost_map;
struct osd_mdobj_map *od_mdt_map;
+ /* objects with size > od_readcache_max_filesize will be
+ * served bypassing pagecache unless already cached */
unsigned long long od_readcache_max_filesize;
+
+ /* reads > od_readcache_max_iosize will be
+ * served bypassing pagecache unless already cached */
+ unsigned long od_readcache_max_iosize;
+
+ /* writes > od_writethough_max_iosize will be
+ * served bypassing pagecache unless already cached */
+ unsigned long od_writethrough_max_iosize;
+
int od_read_cache;
int od_writethrough_cache;
#endif
#define OSD_MAX_CACHE_SIZE OBD_OBJECT_EOF
+#define OSD_READCACHE_MAX_IO_MB 8
+#define OSD_WRITECACHE_MAX_IO_MB 8
extern const struct dt_index_operations osd_otable_ops;
}
static struct page *osd_get_page(const struct lu_env *env, struct dt_object *dt,
- loff_t offset, gfp_t gfp_mask)
+ loff_t offset, gfp_t gfp_mask, bool cache)
{
struct osd_thread_info *oti = osd_oti_get(env);
struct inode *inode = osd_dt_obj(dt)->oo_inode;
struct osd_device *d = osd_obj2dev(osd_dt_obj(dt));
struct page *page;
- int cur = oti->oti_dio_pages_used;
+ int cur;
LASSERT(inode);
- if (osd_use_page_cache(d)) {
+ if (cache) {
page = find_or_create_page(inode->i_mapping,
- offset >> PAGE_SHIFT,
- gfp_mask);
+ offset >> PAGE_SHIFT, gfp_mask);
if (likely(page))
LASSERT(!test_bit(PG_private_2, &page->flags));
else
lprocfs_counter_add(d->od_stats, LPROC_OSD_NO_PAGE, 1);
- } else {
- LASSERT(oti->oti_dio_pages);
+ return page;
+ }
- if (unlikely(!oti->oti_dio_pages[cur])) {
- LASSERT(cur < PTLRPC_MAX_BRW_PAGES);
- page = alloc_page(gfp_mask);
- if (!page)
- return NULL;
- oti->oti_dio_pages[cur] = page;
- }
+ if (inode->i_mapping->nrpages) {
+ /* consult with pagecache, but do not create new pages */
+ /* this is normally used once */
+ page = find_lock_page(inode->i_mapping, offset >> PAGE_SHIFT);
+ if (page)
+ return page;
+ }
- page = oti->oti_dio_pages[cur];
- LASSERT(!test_bit(PG_private_2, &page->flags));
- set_bit(PG_private_2, &page->flags);
- oti->oti_dio_pages_used++;
+ LASSERT(oti->oti_dio_pages);
+ cur = oti->oti_dio_pages_used;
- LASSERT(!PageLocked(page));
- lock_page(page);
+ if (unlikely(!oti->oti_dio_pages[cur])) {
+ LASSERT(cur < PTLRPC_MAX_BRW_PAGES);
+ page = alloc_page(gfp_mask);
+ if (!page)
+ return NULL;
+ oti->oti_dio_pages[cur] = page;
+ }
- LASSERT(!page->mapping);
- LASSERT(!PageWriteback(page));
- ClearPageUptodate(page);
+ page = oti->oti_dio_pages[cur];
+ LASSERT(!test_bit(PG_private_2, &page->flags));
+ set_bit(PG_private_2, &page->flags);
+ oti->oti_dio_pages_used++;
- page->index = offset >> PAGE_SHIFT;
- }
+ LASSERT(!PageLocked(page));
+ lock_page(page);
+
+ LASSERT(!page->mapping);
+ LASSERT(!PageWriteback(page));
+ ClearPageUptodate(page);
+
+ page->index = offset >> PAGE_SHIFT;
return page;
}
{
struct osd_thread_info *oti = osd_oti_get(env);
struct osd_object *obj = osd_dt_obj(dt);
- int npages, i, rc = 0;
+ struct osd_device *osd = osd_obj2dev(obj);
+ int npages, i, iosize, rc = 0;
+ bool cache, write;
+ loff_t fsize;
gfp_t gfp_mask;
LASSERT(obj->oo_inode);
- if (!osd_use_page_cache(osd_obj2dev(obj))) {
- if (unlikely(!oti->oti_dio_pages)) {
- OBD_ALLOC(oti->oti_dio_pages,
- sizeof(struct page *) * PTLRPC_MAX_BRW_PAGES);
- if (!oti->oti_dio_pages)
- return -ENOMEM;
- }
- }
-
rc = osd_map_remote_to_local(pos, len, &npages, lnb, maxlnb);
if (rc)
RETURN(rc);
+ write = rw & DT_BUFS_TYPE_WRITE;
+
+ fsize = lnb[npages - 1].lnb_file_offset + lnb[npages - 1].lnb_len;
+ iosize = fsize - lnb[0].lnb_file_offset;
+ fsize = max(fsize, i_size_read(obj->oo_inode));
+
+ cache = rw & DT_BUFS_TYPE_READAHEAD;
+ if (cache)
+ goto bypass_checks;
+
+ cache = osd_use_page_cache(osd);
+ while (cache) {
+ if (write) {
+ if (!osd->od_writethrough_cache) {
+ cache = false;
+ break;
+ }
+ if (iosize > osd->od_writethrough_max_iosize) {
+ cache = false;
+ break;
+ }
+ } else {
+ if (!osd->od_read_cache) {
+ cache = false;
+ break;
+ }
+ if (iosize > osd->od_readcache_max_iosize) {
+ cache = false;
+ break;
+ }
+ }
+ /* don't use cache on large files */
+ if (osd->od_readcache_max_filesize &&
+ fsize > osd->od_readcache_max_filesize)
+ cache = false;
+ break;
+ }
+
+bypass_checks:
+ if (!cache && unlikely(!oti->oti_dio_pages)) {
+ OBD_ALLOC(oti->oti_dio_pages,
+ sizeof(struct page *) * PTLRPC_MAX_BRW_PAGES);
+ if (!oti->oti_dio_pages)
+ return -ENOMEM;
+ }
+
/* this could also try less hard for DT_BUFS_TYPE_READAHEAD pages */
gfp_mask = rw & DT_BUFS_TYPE_LOCAL ? (GFP_NOFS | __GFP_HIGHMEM) :
GFP_HIGHUSER;
for (i = 0; i < npages; i++, lnb++) {
lnb->lnb_page = osd_get_page(env, dt, lnb->lnb_file_offset,
- gfp_mask);
+ gfp_mask, cache);
if (lnb->lnb_page == NULL)
GOTO(cleanup, rc = -ENOMEM);
lu_object_get(&dt->do_lu);
}
+#if 0
+ /* XXX: this version doesn't invalidate cached pages, but use them */
+ if (!cache && write && obj->oo_inode->i_mapping->nrpages) {
+ /* do not allow data aliasing, invalidate pagecache */
+ /* XXX: can be quite expensive in mixed case */
+ invalidate_mapping_pages(obj->oo_inode->i_mapping,
+ lnb[0].lnb_file_offset >> PAGE_SHIFT,
+ lnb[npages - 1].lnb_file_offset >> PAGE_SHIFT);
+ }
+#endif
+
RETURN(i);
cleanup:
struct osd_iobuf *iobuf = &oti->oti_iobuf;
struct inode *inode = osd_dt_obj(dt)->oo_inode;
struct osd_device *osd = osd_obj2dev(osd_dt_obj(dt));
- ktime_t start;
- ktime_t end;
+ ktime_t start, end;
s64 timediff;
- ssize_t isize;
- __s64 maxidx;
- int rc = 0;
- int i;
- int cache = 0;
+ ssize_t isize;
+ __s64 maxidx;
+ int i, rc = 0;
LASSERT(inode);
isize = i_size_read(inode);
maxidx = ((isize + PAGE_SIZE - 1) >> PAGE_SHIFT) - 1;
- if (osd->od_writethrough_cache)
- cache = 1;
- if (isize > osd->od_readcache_max_filesize)
- cache = 0;
-
start = ktime_get();
for (i = 0; i < npages; i++) {
- if (cache == 0)
- generic_error_remove_page(inode->i_mapping,
- lnb[i].lnb_page);
-
/*
* till commit the content of the page is undefined
* we'll set it uptodate once bulk is done. otherwise
struct osd_iobuf *iobuf = &oti->oti_iobuf;
struct inode *inode = osd_dt_obj(dt)->oo_inode;
struct osd_device *osd = osd_obj2dev(osd_dt_obj(dt));
- int rc = 0, i, cache = 0, cache_hits = 0, cache_misses = 0;
+ int rc = 0, i, cache_hits = 0, cache_misses = 0;
ktime_t start, end;
s64 timediff;
loff_t isize;
isize = i_size_read(inode);
- if (osd->od_read_cache)
- cache = 1;
- if (isize > osd->od_readcache_max_filesize)
- cache = 0;
-
start = ktime_get();
for (i = 0; i < npages; i++) {
if (OBD_FAIL_CHECK(OBD_FAIL_OST_FAKE_RW))
SetPageUptodate(lnb[i].lnb_page);
- if (cache == 0)
- generic_error_remove_page(inode->i_mapping,
- lnb[i].lnb_page);
-
if (PageUptodate(lnb[i].lnb_page)) {
cache_hits++;
unlock_page(lnb[i].lnb_page);
static int osd_ladvise(const struct lu_env *env, struct dt_object *dt,
__u64 start, __u64 end, enum lu_ladvise_type advice)
{
- int rc = 0;
- struct inode *inode = osd_dt_obj(dt)->oo_inode;
+ struct osd_object *obj = osd_dt_obj(dt);
+ int rc = 0;
ENTRY;
switch (advice) {
case LU_LADVISE_DONTNEED:
- if (end == 0)
- break;
- invalidate_mapping_pages(inode->i_mapping,
- start >> PAGE_SHIFT,
- (end - 1) >> PAGE_SHIFT);
+ if (end)
+ invalidate_mapping_pages(obj->oo_inode->i_mapping,
+ start >> PAGE_SHIFT,
+ (end - 1) >> PAGE_SHIFT);
break;
default:
rc = -ENOTSUPP;
LDEBUGFS_SEQ_FOPS(ldiskfs_osd_readcache);
+static int ldiskfs_osd_readcache_max_io_seq_show(struct seq_file *m, void *data)
+{
+ struct osd_device *osd = osd_dt_dev((struct dt_device *)m->private);
+
+ LASSERT(osd != NULL);
+ if (unlikely(osd->od_mnt == NULL))
+ return -EINPROGRESS;
+
+ seq_printf(m, "%lu\n", osd->od_readcache_max_iosize >> 20);
+ return 0;
+}
+
+static ssize_t
+ldiskfs_osd_readcache_max_io_seq_write(struct file *file,
+ const char __user *buffer,
+ size_t count, loff_t *off)
+{
+ struct seq_file *m = file->private_data;
+ struct dt_device *dt = m->private;
+ struct osd_device *osd = osd_dt_dev(dt);
+ s64 val;
+ int rc;
+
+ LASSERT(osd != NULL);
+ if (unlikely(osd->od_mnt == NULL))
+ return -EINPROGRESS;
+
+ rc = lprocfs_str_with_units_to_s64(buffer, count, &val, 'M');
+ if (rc)
+ return rc;
+ if (val < 0)
+ return -ERANGE;
+
+ if (val > PTLRPC_MAX_BRW_SIZE)
+ return -ERANGE;
+ osd->od_readcache_max_iosize = val;
+ return count;
+}
+
+LDEBUGFS_SEQ_FOPS(ldiskfs_osd_readcache_max_io);
+
+static int ldiskfs_osd_writethrough_max_io_seq_show(struct seq_file *m,
+ void *data)
+{
+ struct osd_device *osd = osd_dt_dev((struct dt_device *)m->private);
+
+ LASSERT(osd != NULL);
+ if (unlikely(osd->od_mnt == NULL))
+ return -EINPROGRESS;
+
+ seq_printf(m, "%lu\n", osd->od_writethrough_max_iosize >> 20);
+ return 0;
+}
+
+static ssize_t
+ldiskfs_osd_writethrough_max_io_seq_write(struct file *file,
+ const char __user *buffer,
+ size_t count, loff_t *off)
+{
+ struct seq_file *m = file->private_data;
+ struct dt_device *dt = m->private;
+ struct osd_device *osd = osd_dt_dev(dt);
+ s64 val;
+ int rc;
+
+ LASSERT(osd != NULL);
+ if (unlikely(osd->od_mnt == NULL))
+ return -EINPROGRESS;
+
+ rc = lprocfs_str_with_units_to_s64(buffer, count, &val, 'M');
+ if (rc)
+ return rc;
+ if (val < 0)
+ return -ERANGE;
+
+ if (val > PTLRPC_MAX_BRW_SIZE)
+ return -ERANGE;
+ osd->od_writethrough_max_iosize = val;
+ return count;
+}
+
+LDEBUGFS_SEQ_FOPS(ldiskfs_osd_writethrough_max_io);
+
#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 0, 52, 0)
static ssize_t index_in_idif_show(struct kobject *kobj, struct attribute *attr,
char *buf)
.fops = &ldiskfs_osd_oi_scrub_fops },
{ .name = "readcache_max_filesize",
.fops = &ldiskfs_osd_readcache_fops },
+ { .name = "readcache_max_io_mb",
+ .fops = &ldiskfs_osd_readcache_max_io_fops },
+ { .name = "writethrough_max_io_mb",
+ .fops = &ldiskfs_osd_writethrough_max_io_fops },
{ NULL }
};
error "NOT IN CACHE: before: $BEFORE, after: $AFTER"
fi
- # the following read invalidates the cache
cancel_lru_locks osc
+ # invalidates OST cache
+ do_nodes $list "echo 1 > /proc/sys/vm/drop_caches"
set_osd_param $list '' read_cache_enable 0
cat $DIR/$tfile >/dev/null
cat $file >/dev/null
AFTER=$(roc_hit)
if ! let "AFTER - BEFORE == CPAGES"; then
- error "NOT IN CACHE: before: $BEFORE, after: $AFTER"
+ error "NOT IN CACHE (2): before: $BEFORE, after: $AFTER"
else
- log "cache hits:: before: $BEFORE, after: $AFTER"
+ log "cache hits: before: $BEFORE, after: $AFTER"
fi
log "Read again; it should be satisfied from the cache."
cat $file >/dev/null
AFTER=$(roc_hit)
if ! let "AFTER - BEFORE == CPAGES"; then
- error "NOT IN CACHE: before: $BEFORE, after: $AFTER"
+ error "NOT IN CACHE (3): before: $BEFORE, after: $AFTER"
else
log "cache hits:: before: $BEFORE, after: $AFTER"
fi
cat $file >/dev/null
AFTER=$(roc_hit)
if ! let "AFTER - BEFORE == CPAGES"; then
- error "NOT IN CACHE: before: $BEFORE, after: $AFTER"
+ error "NOT IN CACHE (4): before: $BEFORE, after: $AFTER"
else
log "cache hits:: before: $BEFORE, after: $AFTER"
fi
- log "Read again; it should not be satisfied from the cache."
- BEFORE=$AFTER
- cancel_lru_locks osc
- cat $file >/dev/null
- AFTER=$(roc_hit)
- if ! let "AFTER - BEFORE == 0"; then
- error "IN CACHE: before: $BEFORE, after: $AFTER"
- else
- log "cache hits:: before: $BEFORE, after: $AFTER"
+ if [ $OST1_VERSION -lt $(version_code 2.12.55) ]; then
+ # > 2.12.56 uses pagecache if cached
+ log "Read again; it should not be satisfied from the cache."
+ BEFORE=$AFTER
+ cancel_lru_locks osc
+ cat $file >/dev/null
+ AFTER=$(roc_hit)
+ if ! let "AFTER - BEFORE == 0"; then
+ error "IN CACHE (5): before: $BEFORE, after: $AFTER"
+ else
+ log "cache hits:: before: $BEFORE, after: $AFTER"
+ fi
fi
log "Write data and read it back."
cat $file >/dev/null
AFTER=$(roc_hit)
if ! let "AFTER - BEFORE == CPAGES"; then
- error "NOT IN CACHE: before: $BEFORE, after: $AFTER"
+ error "NOT IN CACHE (6): before: $BEFORE, after: $AFTER"
else
log "cache hits:: before: $BEFORE, after: $AFTER"
fi
- log "Read again; it should not be satisfied from the cache."
- BEFORE=$AFTER
- cancel_lru_locks osc
- cat $file >/dev/null
- AFTER=$(roc_hit)
- if ! let "AFTER - BEFORE == 0"; then
- error "IN CACHE: before: $BEFORE, after: $AFTER"
- else
- log "cache hits:: before: $BEFORE, after: $AFTER"
+ if [ $OST1_VERSION -lt $(version_code 2.12.55) ]; then
+ # > 2.12.56 uses pagecache if cached
+ log "Read again; it should not be satisfied from the cache."
+ BEFORE=$AFTER
+ cancel_lru_locks osc
+ cat $file >/dev/null
+ AFTER=$(roc_hit)
+ if ! let "AFTER - BEFORE == 0"; then
+ error "IN CACHE (7): before: $BEFORE, after: $AFTER"
+ else
+ log "cache hits:: before: $BEFORE, after: $AFTER"
+ fi
fi
log "Turn off read and write cache"
cat $file >/dev/null
AFTER=$(roc_hit)
if ! let "AFTER - BEFORE == 0"; then
- error_ignore bz20762 "IN CACHE: before: $BEFORE, after: $AFTER"
+ error_ignore bz20762 "IN CACHE (8):before:$BEFORE,after:$AFTER"
else
log "cache hits:: before: $BEFORE, after: $AFTER"
fi
cat $file >/dev/null
AFTER=$(roc_hit)
if ! let "AFTER - BEFORE == 0"; then
- error_ignore bz20762 "IN CACHE: before: $BEFORE, after: $AFTER"
+ error_ignore bz20762 "IN CACHE (9):before:$BEFORE,after:$AFTER"
else
log "cache hits:: before: $BEFORE, after: $AFTER"
fi
cat $file >/dev/null
AFTER=$(roc_hit)
if ! let "AFTER - BEFORE == CPAGES"; then
- error "NOT IN CACHE: before: $BEFORE, after: $AFTER"
+ error "NOT IN CACHE (1): before: $BEFORE, after: $AFTER"
else
log "cache hits:: before: $BEFORE, after: $AFTER"
fi