param->ddp_brw_size = ONE_MB_BRW_SIZE;
#ifdef HAVE_DMU_OFFSET_NEXT
- param->ddp_has_lseek_data_hole = true;
+ param->ddp_has_lseek_data_hole = osd->od_sync_on_lseek;
#else
param->ddp_has_lseek_data_hole = false;
#endif
sema_init(&o->od_otable_sem, 1);
INIT_LIST_HEAD(&o->od_ios_list);
o->od_auto_scrub_interval = AS_DEFAULT;
+ o->od_sync_on_lseek = B_TRUE;
/* ZFS does not support reporting nonrotional status yet, so this flag
* is only set if explicitly set by the user.
od_is_ost:1,
od_in_init:1,
od_posix_acl:1,
- od_nonrotational:1;
+ od_nonrotational:1,
+ od_sync_on_lseek:1;
unsigned int od_dnsize;
int od_index_backup_stop;
#define osd_dmu_offset_next(os, obj, hole, res) \
dmu_offset_next((os), (obj), (hole), (res))
#else
-#define osd_dmu_offset_next(os, obj, hole, res) (EBUSY)
+#define osd_dmu_offset_next(os, obj, hole, res) (EOPNOTSUPP)
#endif
#endif /* _OSD_INTERNAL_H */
loff_t offset, int whence)
{
struct osd_object *obj = osd_dt_obj(dt);
+ struct osd_device *osd = osd_obj2dev(obj);
uint64_t size = obj->oo_attr.la_size;
uint64_t result = offset;
int rc;
if (offset >= size)
RETURN(hole ? offset : -ENXIO);
- rc = osd_dmu_offset_next(osd_obj2dev(obj)->od_os,
- obj->oo_dn->dn_object, hole, &result);
+ /* Currently ZFS reports no valid DATA offset if object has dirty data
+ * and we cannot just switch to generic way with reporting DATA on all
+ * file offsets and HOLE beyond end of file, because we may get HOLE
+ * reported correctly at some offset inside file then DATA will find
+ * dirty state and be reported also at that offset by generic approach.
+ * This is because for HOLE report ZFS doesn't check dirty state but
+ * does for DATA.
+ * The only way to get reliable results is to call txg_wait_synced()
+ * when ZFS reports EBUSY result and repeat lseek call and that is
+ * controlled via od_sync_on_lseek option.
+ */
+ if (!osd->od_sync_on_lseek)
+ result = hole ? size : offset;
+
+again:
+ rc = osd_dmu_offset_next(osd->od_os, obj->oo_dn->dn_object, hole,
+ &result);
+ /* dirty inode, lseek result is unreliable without sync */
+ if (rc == EBUSY) {
+ txg_wait_synced(dmu_objset_pool(osd->od_os), 0ULL);
+ goto again;
+ }
+
if (rc == ESRCH)
RETURN(-ENXIO);
- /* file was dirty, so fall back to using generic logic:
- * For HOLE return file size, for DATA the result is set
- * already to the 'offset' parameter value.
+ /* ZFS is not exported all needed function, so fall back to the
+ * generic logic: for HOLE return file size, for DATA return
+ * the current offset
*/
- if (rc == EBUSY && hole)
- result = size;
+ if (rc == EOPNOTSUPP)
+ result = hole ? size : offset;
+ else if (rc)
+ return -rc;
/* dmu_offset_next() only works on whole blocks so may return SEEK_HOLE
* result as end of the last block instead of logical EOF which we need
}
LUSTRE_WO_ATTR(force_sync);
+static ssize_t sync_on_lseek_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ struct dt_device *dt = container_of(kobj, struct dt_device, dd_kobj);
+ struct osd_device *osd = osd_dt_dev(dt);
+
+ if (!osd->od_os)
+ return -EINPROGRESS;
+
+ return sprintf(buf, "%u\n", osd->od_sync_on_lseek);
+}
+
+ssize_t sync_on_lseek_store(struct kobject *kobj, struct attribute *attr,
+ const char *buffer, size_t count)
+{
+ struct dt_device *dt = container_of(kobj, struct dt_device, dd_kobj);
+ struct osd_device *osd = osd_dt_dev(dt);
+ bool val;
+ int rc;
+
+ if (!osd->od_os)
+ return -EINPROGRESS;
+
+ rc = kstrtobool(buffer, &val);
+ if (rc)
+ return rc;
+
+ osd->od_sync_on_lseek = !!val;
+
+ return count;
+}
+LUSTRE_RW_ATTR(sync_on_lseek);
+
static ssize_t nonrotational_show(struct kobject *kobj, struct attribute *attr,
char *buf)
{
&lustre_attr_nonrotational.attr,
&lustre_attr_index_backup.attr,
&lustre_attr_auto_scrub.attr,
+ &lustre_attr_sync_on_lseek.attr,
NULL,
};