Whamcloud - gitweb
b=22360 return -EIO in close() if there was any async I/O error
[fs/lustre-release.git] / lustre / llite / file.c
index d25de6c..7cb5232 100644 (file)
@@ -26,7 +26,7 @@
  * GPL HEADER END
  */
 /*
- * Copyright  2008 Sun Microsystems, Inc. All rights reserved
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
  */
 /*
@@ -75,7 +75,8 @@ void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
         op_data->op_attr.ia_ctime = inode->i_ctime;
         op_data->op_attr.ia_size = i_size_read(inode);
         op_data->op_attr_blocks = inode->i_blocks;
-        ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = inode->i_flags;
+        ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags =
+                                        ll_inode_to_ext_flags(inode->i_flags);
         op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
         if (fh)
                 op_data->op_handle = *fh;
@@ -514,14 +515,14 @@ int ll_file_open(struct inode *inode, struct file *file)
 
         fd->fd_file = file;
         if (S_ISDIR(inode->i_mode)) {
-                cfs_spin_lock(&lli->lli_lock);
+                cfs_spin_lock(&lli->lli_sa_lock);
                 if (lli->lli_opendir_key == NULL && lli->lli_opendir_pid == 0) {
                         LASSERT(lli->lli_sai == NULL);
                         lli->lli_opendir_key = fd;
                         lli->lli_opendir_pid = cfs_curproc_pid();
                         opendir_set = 1;
                 }
-                cfs_spin_unlock(&lli->lli_lock);
+                cfs_spin_unlock(&lli->lli_sa_lock);
         }
 
         if (inode->i_sb->s_root == file->f_dentry) {
@@ -701,7 +702,7 @@ static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
         oinfo.oi_md = lsm;
         oinfo.oi_oa = obdo;
         oinfo.oi_oa->o_id = lsm->lsm_object_id;
-        oinfo.oi_oa->o_gr = lsm->lsm_object_gr;
+        oinfo.oi_oa->o_seq = lsm->lsm_object_seq;
         oinfo.oi_oa->o_mode = S_IFREG;
         oinfo.oi_oa->o_ioepoch = ioepoch;
         oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
@@ -769,8 +770,17 @@ int ll_merge_lvb(struct inode *inode)
 
         ll_inode_size_lock(inode, 1);
         inode_init_lvb(inode, &lvb);
+
+        /* merge timestamps the most resently obtained from mds with
+           timestamps obtained from osts */
+        lvb.lvb_atime = lli->lli_lvb.lvb_atime;
+        lvb.lvb_mtime = lli->lli_lvb.lvb_mtime;
+        lvb.lvb_ctime = lli->lli_lvb.lvb_ctime;
         rc = obd_merge_lvb(sbi->ll_dt_exp, lli->lli_smd, &lvb, 0);
-        i_size_write(inode, lvb.lvb_size);
+        cl_isize_write_nolock(inode, lvb.lvb_size);
+
+        CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
+               PFID(&lli->lli_fid), lvb.lvb_size);
         inode->i_blocks = lvb.lvb_blocks;
 
         LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
@@ -840,6 +850,7 @@ static ssize_t ll_file_io_generic(const struct lu_env *env,
                 case IO_NORMAL:
                         cio->cui_iov = args->u.normal.via_iov;
                         cio->cui_nrsegs = args->u.normal.via_nrsegs;
+                        cio->cui_tot_nrsegs = cio->cui_nrsegs;
 #ifndef HAVE_FILE_WRITEV
                         cio->cui_iocb = args->u.normal.via_iocb;
 #endif
@@ -1187,11 +1198,10 @@ static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
 }
 #endif
 
-static int ll_lov_recreate_obj(struct inode *inode, struct file *file,
-                               unsigned long arg)
+static int ll_lov_recreate(struct inode *inode, obd_id id, obd_seq seq,
+                           obd_count ost_idx)
 {
         struct obd_export *exp = ll_i2dtexp(inode);
-        struct ll_recreate_obj ucreatp;
         struct obd_trans_info oti = { 0 };
         struct obdo *oa = NULL;
         int lsm_size;
@@ -1199,13 +1209,6 @@ static int ll_lov_recreate_obj(struct inode *inode, struct file *file,
         struct lov_stripe_md *lsm, *lsm2;
         ENTRY;
 
-        if (!cfs_capable(CFS_CAP_SYS_ADMIN))
-                RETURN(-EPERM);
-
-        if (cfs_copy_from_user(&ucreatp, (struct ll_recreate_obj *)arg,
-                               sizeof(struct ll_recreate_obj)))
-                RETURN(-EFAULT);
-
         OBDO_ALLOC(oa);
         if (oa == NULL)
                 RETURN(-ENOMEM);
@@ -1221,14 +1224,13 @@ static int ll_lov_recreate_obj(struct inode *inode, struct file *file,
         if (lsm2 == NULL)
                 GOTO(out, rc = -ENOMEM);
 
-        oa->o_id = ucreatp.lrc_id;
-        oa->o_gr = ucreatp.lrc_group;
-        oa->o_nlink = ucreatp.lrc_ost_idx;
+        oa->o_id = id;
+        oa->o_seq = seq;
+        oa->o_nlink = ost_idx;
         oa->o_flags |= OBD_FL_RECREATE_OBJS;
         oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
-        obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
-                        OBD_MD_FLMTIME | OBD_MD_FLCTIME);
-
+        obdo_from_inode(oa, inode, &ll_i2info(inode)->lli_fid, OBD_MD_FLTYPE |
+                        OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME);
         memcpy(lsm2, lsm, lsm_size);
         rc = obd_create(exp, oa, &lsm2, &oti);
 
@@ -1240,6 +1242,41 @@ out:
         return rc;
 }
 
+static int ll_lov_recreate_obj(struct inode *inode, unsigned long arg)
+{
+        struct ll_recreate_obj ucreat;
+        ENTRY;
+
+        if (!cfs_capable(CFS_CAP_SYS_ADMIN))
+                RETURN(-EPERM);
+
+        if (cfs_copy_from_user(&ucreat, (struct ll_recreate_obj *)arg,
+                               sizeof(struct ll_recreate_obj)))
+                RETURN(-EFAULT);
+
+        RETURN(ll_lov_recreate(inode, ucreat.lrc_id, 0,
+                               ucreat.lrc_ost_idx));
+}
+
+static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg)
+{
+        struct lu_fid fid;
+        obd_id id;
+        obd_count ost_idx;
+        ENTRY;
+
+        if (!cfs_capable(CFS_CAP_SYS_ADMIN))
+                RETURN(-EPERM);
+
+        if (cfs_copy_from_user(&fid, (struct lu_fid *)arg,
+                               sizeof(struct lu_fid)))
+                RETURN(-EFAULT);
+
+        id = fid_oid(&fid) | ((fid_seq(&fid) & 0xffff) << 32);
+        ost_idx = (fid_seq(&fid) >> 16) & 0xffff;
+        RETURN(ll_lov_recreate(inode, id, 0, ost_idx));
+}
+
 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
                              int flags, struct lov_user_md *lum, int lum_size)
 {
@@ -1285,19 +1322,22 @@ int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
         struct mdt_body  *body;
         struct lov_mds_md *lmm = NULL;
         struct ptlrpc_request *req = NULL;
-        struct obd_capa *oc;
+        struct md_op_data *op_data;
         int rc, lmmsize;
 
         rc = ll_get_max_mdsize(sbi, &lmmsize);
         if (rc)
                 RETURN(rc);
 
-        oc = ll_mdscapa_get(inode);
-        rc = md_getattr_name(sbi->ll_md_exp, ll_inode2fid(inode),
-                             oc, filename, strlen(filename) + 1,
-                             OBD_MD_FLEASIZE | OBD_MD_FLDIREA, lmmsize,
-                             ll_i2suppgid(inode), &req);
-        capa_put(oc);
+        op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
+                                     strlen(filename), lmmsize,
+                                     LUSTRE_OPC_ANY, NULL);
+        if (op_data == NULL)
+                RETURN(-ENOMEM);
+
+        op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
+        rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
+        ll_finish_md_op_data(op_data);
         if (rc < 0) {
                 CDEBUG(D_INFO, "md_getattr_name failed "
                        "on %s: rc %d\n", filename, rc);
@@ -1557,6 +1597,19 @@ int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
         int rc;
         ENTRY;
 
+        /* Checks for fiemap flags */
+        if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
+                fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
+                return -EBADR;
+        }
+
+        /* Check for FIEMAP_FLAG_SYNC */
+        if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
+                rc = filemap_fdatawrite(inode->i_mapping);
+                if (rc)
+                        return rc;
+        }
+
         /* If the stripe_count > 1 and the application does not understand
          * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
          */
@@ -1565,12 +1618,11 @@ int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
                 return -EOPNOTSUPP;
 
         fm_key.oa.o_id = lsm->lsm_object_id;
-        fm_key.oa.o_gr = lsm->lsm_object_gr;
+        fm_key.oa.o_seq = lsm->lsm_object_seq;
         fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
 
-        obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLFID | OBD_MD_FLGROUP |
+        obdo_from_inode(&fm_key.oa, inode, &ll_i2info(inode)->lli_fid,
                         OBD_MD_FLSIZE);
-
         /* If filesize is 0, then there would be no objects for mapping */
         if (fm_key.oa.o_size == 0) {
                 fiemap->fm_mapped_extents = 0;
@@ -1622,6 +1674,58 @@ gf_free:
         RETURN(rc);
 }
 
+static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg)
+{
+        struct ll_user_fiemap *fiemap_s;
+        size_t num_bytes, ret_bytes;
+        unsigned int extent_count;
+        int rc = 0;
+
+        /* Get the extent count so we can calculate the size of
+         * required fiemap buffer */
+        if (get_user(extent_count,
+            &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
+                RETURN(-EFAULT);
+        num_bytes = sizeof(*fiemap_s) + (extent_count *
+                                         sizeof(struct ll_fiemap_extent));
+
+        OBD_VMALLOC(fiemap_s, num_bytes);
+        if (fiemap_s == NULL)
+                RETURN(-ENOMEM);
+
+        /* get the fiemap value */
+        if (copy_from_user(fiemap_s,(struct ll_user_fiemap __user *)arg,
+                           sizeof(*fiemap_s)))
+                GOTO(error, rc = -EFAULT);
+
+        /* If fm_extent_count is non-zero, read the first extent since
+         * it is used to calculate end_offset and device from previous
+         * fiemap call. */
+        if (extent_count) {
+                if (copy_from_user(&fiemap_s->fm_extents[0],
+                    (char __user *)arg + sizeof(*fiemap_s),
+                    sizeof(struct ll_fiemap_extent)))
+                        GOTO(error, rc = -EFAULT);
+        }
+
+        rc = ll_do_fiemap(inode, fiemap_s, num_bytes);
+        if (rc)
+                GOTO(error, rc);
+
+        ret_bytes = sizeof(struct ll_user_fiemap);
+
+        if (extent_count != 0)
+                ret_bytes += (fiemap_s->fm_mapped_extents *
+                                 sizeof(struct ll_fiemap_extent));
+
+        if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
+                rc = -EFAULT;
+
+error:
+        OBD_VFREE(fiemap_s, num_bytes);
+        RETURN(rc);
+}
+
 int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
                   unsigned long arg)
 {
@@ -1670,74 +1774,11 @@ int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
         case LL_IOC_LOV_GETSTRIPE:
                 RETURN(ll_lov_getstripe(inode, arg));
         case LL_IOC_RECREATE_OBJ:
-                RETURN(ll_lov_recreate_obj(inode, file, arg));
-        case FSFILT_IOC_FIEMAP: {
-                struct ll_user_fiemap *fiemap_s;
-                size_t num_bytes, ret_bytes;
-                unsigned int extent_count;
-                int rc = 0;
-
-                /* Get the extent count so we can calculate the size of
-                 * required fiemap buffer */
-                if (get_user(extent_count,
-                    &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
-                        RETURN(-EFAULT);
-                num_bytes = sizeof(*fiemap_s) + (extent_count *
-                                                 sizeof(struct ll_fiemap_extent));
-                OBD_VMALLOC(fiemap_s, num_bytes);
-                if (fiemap_s == NULL)
-                        RETURN(-ENOMEM);
-
-                if (cfs_copy_from_user(fiemap_s,
-                                       (struct ll_user_fiemap __user *)arg,
-                                       sizeof(*fiemap_s)))
-                        GOTO(error, rc = -EFAULT);
-
-                if (fiemap_s->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
-                        fiemap_s->fm_flags = fiemap_s->fm_flags &
-                                                    ~LUSTRE_FIEMAP_FLAGS_COMPAT;
-                        if (cfs_copy_to_user((char *)arg, fiemap_s,
-                                             sizeof(*fiemap_s)))
-                                GOTO(error, rc = -EFAULT);
-
-                        GOTO(error, rc = -EBADR);
-                }
-
-                /* If fm_extent_count is non-zero, read the first extent since
-                 * it is used to calculate end_offset and device from previous
-                 * fiemap call. */
-                if (extent_count) {
-                        if (cfs_copy_from_user(&fiemap_s->fm_extents[0],
-                            (char __user *)arg + sizeof(*fiemap_s),
-                            sizeof(struct ll_fiemap_extent)))
-                                GOTO(error, rc = -EFAULT);
-                }
-
-                if (fiemap_s->fm_flags & FIEMAP_FLAG_SYNC) {
-                        int rc;
-
-                        rc = filemap_fdatawrite(inode->i_mapping);
-                        if (rc)
-                                GOTO(error, rc);
-                }
-
-                rc = ll_do_fiemap(inode, fiemap_s, num_bytes);
-                if (rc)
-                        GOTO(error, rc);
-
-                ret_bytes = sizeof(struct ll_user_fiemap);
-
-                if (extent_count != 0)
-                        ret_bytes += (fiemap_s->fm_mapped_extents *
-                                         sizeof(struct ll_fiemap_extent));
-
-                if (cfs_copy_to_user((void *)arg, fiemap_s, ret_bytes))
-                        rc = -EFAULT;
-
-error:
-                OBD_VFREE(fiemap_s, num_bytes);
-                RETURN(rc);
-        }
+                RETURN(ll_lov_recreate_obj(inode, arg));
+        case LL_IOC_RECREATE_FID:
+                RETURN(ll_lov_recreate_fid(inode, arg));
+        case FSFILT_IOC_FIEMAP:
+                RETURN(ll_ioctl_fiemap(inode, arg));
         case FSFILT_IOC_GETFLAGS:
         case FSFILT_IOC_SETFLAGS:
                 RETURN(ll_iocontrol(inode, file, cmd, arg));
@@ -1769,6 +1810,19 @@ error:
         case OBD_IOC_FID2PATH:
                 RETURN(ll_fid2path(ll_i2mdexp(inode), (void *)arg));
 
+        case LL_IOC_GET_MDTIDX: {
+                int mdtidx;
+
+                mdtidx = ll_get_mdt_idx(inode);
+                if (mdtidx < 0)
+                        RETURN(mdtidx);
+
+                if (put_user((int)mdtidx, (int*)arg))
+                        RETURN(-EFAULT);
+
+                RETURN(0);
+        }
+
         default: {
                 int err;
 
@@ -1804,9 +1858,7 @@ loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
                 if (rc != 0)
                         RETURN(rc);
 
-                ll_inode_size_lock(inode, 0);
                 offset += i_size_read(inode);
-                ll_inode_size_unlock(inode, 0);
         } else if (origin == 1) { /* SEEK_CUR */
                 offset += file->f_pos;
         }
@@ -1822,6 +1874,30 @@ loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
         RETURN(retval);
 }
 
+#ifdef HAVE_FLUSH_OWNER_ID
+int ll_flush(struct file *file, fl_owner_t id)
+#else
+int ll_flush(struct file *file)
+#endif
+{
+        struct inode *inode = file->f_dentry->d_inode;
+        struct ll_inode_info *lli = ll_i2info(inode);
+        struct lov_stripe_md *lsm = lli->lli_smd;
+        int rc, err;
+
+        /* catch async errors that were recorded back when async writeback
+         * failed for pages in this mapping. */
+        rc = lli->lli_async_rc;
+        lli->lli_async_rc = 0;
+        if (lsm) {
+                err = lov_test_and_clear_async_rc(lsm);
+                if (rc == 0)
+                        rc = err;
+        }
+
+        return rc ? -EIO : 0;
+}
+
 int ll_fsync(struct file *file, struct dentry *dentry, int data)
 {
         struct inode *inode = dentry->d_inode;
@@ -1868,11 +1944,12 @@ int ll_fsync(struct file *file, struct dentry *dentry, int data)
                         RETURN(rc ? rc : -ENOMEM);
 
                 oa->o_id = lsm->lsm_object_id;
-                oa->o_gr = lsm->lsm_object_gr;
+                oa->o_seq = lsm->lsm_object_seq;
                 oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
-                obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
-                                           OBD_MD_FLMTIME | OBD_MD_FLCTIME |
-                                           OBD_MD_FLGROUP);
+                obdo_from_inode(oa, inode, &ll_i2info(inode)->lli_fid,
+                                OBD_MD_FLTYPE | OBD_MD_FLATIME |
+                                OBD_MD_FLMTIME | OBD_MD_FLCTIME |
+                                OBD_MD_FLGROUP);
 
                 oc = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
                 err = obd_sync(ll_i2sbi(inode)->ll_dt_exp, oa, lsm,
@@ -2125,10 +2202,9 @@ int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
 
                 ll_lookup_finish_locks(&oit, dentry);
         } else if (!ll_have_md_lock(dentry->d_inode, ibits)) {
-
                 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
                 obd_valid valid = OBD_MD_FLGETATTR;
-                struct obd_capa *oc;
+                struct md_op_data *op_data;
                 int ealen = 0;
 
                 if (S_ISREG(inode->i_mode)) {
@@ -2137,13 +2213,19 @@ int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
                                 RETURN(rc);
                         valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
                 }
+
+                op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
+                                             0, ealen, LUSTRE_OPC_ANY,
+                                             NULL);
+                if (op_data == NULL)
+                        RETURN(-ENOMEM);
+
+                op_data->op_valid = valid;
                 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
                  * capa for this inode. Because we only keep capas of dirs
                  * fresh. */
-                oc = ll_mdscapa_get(inode);
-                rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, valid,
-                                ealen, &req);
-                capa_put(oc);
+                rc = md_getattr(sbi->ll_md_exp, op_data, &req);
+                ll_finish_md_op_data(op_data);
                 if (rc) {
                         rc = ll_inode_revalidate_fini(inode, rc);
                         RETURN(rc);
@@ -2158,6 +2240,7 @@ out:
 
 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it)
 {
+        struct inode *inode = dentry->d_inode;
         int rc;
         ENTRY;
 
@@ -2165,14 +2248,18 @@ int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it)
                                                   MDS_INODELOCK_LOOKUP);
 
         /* if object not yet allocated, don't validate size */
-        if (rc == 0 && ll_i2info(dentry->d_inode)->lli_smd == NULL)
+        if (rc == 0 && ll_i2info(dentry->d_inode)->lli_smd == NULL) {
+                LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_lvb.lvb_atime;
+                LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime;
+                LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime;
                 RETURN(0);
+        }
 
         /* cl_glimpse_size will prefer locally cached writes if they extend
          * the file */
 
         if (rc == 0)
-                rc = cl_glimpse_size(dentry->d_inode);
+                rc = cl_glimpse_size(inode);
 
         RETURN(rc);
 }
@@ -2181,6 +2268,7 @@ int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
                   struct lookup_intent *it, struct kstat *stat)
 {
         struct inode *inode = de->d_inode;
+        struct ll_inode_info *lli = ll_i2info(inode);
         int res = 0;
 
         res = ll_inode_revalidate_it(de, it);
@@ -2190,7 +2278,11 @@ int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
                 return res;
 
         stat->dev = inode->i_sb->s_dev;
-        stat->ino = inode->i_ino;
+        if (ll_need_32bit_api(ll_i2sbi(inode)))
+                stat->ino = cl_fid_build_ino32(&lli->lli_fid);
+        else
+                stat->ino = inode->i_ino;
+
         stat->mode = inode->i_mode;
         stat->nlink = inode->i_nlink;
         stat->uid = inode->i_uid;
@@ -2205,10 +2297,8 @@ int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
         stat->blksize = 1 << inode->i_blkbits;
 #endif
 
-        ll_inode_size_lock(inode, 0);
         stat->size = i_size_read(inode);
         stat->blocks = inode->i_blocks;
-        ll_inode_size_unlock(inode, 0);
 
         return 0;
 }
@@ -2224,16 +2314,32 @@ int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                 __u64 start, __u64 len)
 {
         int rc;
-        struct ll_user_fiemap *fiemap = (struct ll_user_fiemap*)(
-                fieinfo->fi_extents_start - sizeof(ll_user_fiemap));
+        size_t num_bytes;
+        struct ll_user_fiemap *fiemap;
+        unsigned int extent_count = fieinfo->fi_extents_max;
+
+        num_bytes = sizeof(*fiemap) + (extent_count *
+                                       sizeof(struct ll_fiemap_extent));
+        OBD_VMALLOC(fiemap, num_bytes);
+
+        if (fiemap == NULL)
+                RETURN(-ENOMEM);
+
+        fiemap->fm_flags = fieinfo->fi_flags;
+        fiemap->fm_extent_count = fieinfo->fi_extents_max;
+        fiemap->fm_start = start;
+        fiemap->fm_length = len;
+        memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
+               sizeof(struct ll_fiemap_extent));
 
-        rc = ll_do_fiemap(inode, fiemap, sizeof(*fiemap) +
-                          fiemap->fm_extent_count *
-                          sizeof(struct ll_fiemap_extent));
+        rc = ll_do_fiemap(inode, fiemap, num_bytes);
 
         fieinfo->fi_flags = fiemap->fm_flags;
         fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
+        memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
+               fiemap->fm_mapped_extents * sizeof(struct ll_fiemap_extent));
 
+        OBD_VFREE(fiemap, num_bytes);
         return rc;
 }
 #endif
@@ -2379,6 +2485,7 @@ struct file_operations ll_file_operations = {
         .splice_read    = ll_file_splice_read,
 #endif
         .fsync          = ll_fsync,
+        .flush          = ll_flush
 };
 
 struct file_operations ll_file_operations_flock = {
@@ -2398,6 +2505,7 @@ struct file_operations ll_file_operations_flock = {
         .splice_read    = ll_file_splice_read,
 #endif
         .fsync          = ll_fsync,
+        .flush          = ll_flush,
 #ifdef HAVE_F_OP_FLOCK
         .flock          = ll_file_flock,
 #endif
@@ -2422,6 +2530,7 @@ struct file_operations ll_file_operations_noflock = {
         .splice_read    = ll_file_splice_read,
 #endif
         .fsync          = ll_fsync,
+        .flush          = ll_flush,
 #ifdef HAVE_F_OP_FLOCK
         .flock          = ll_file_noflock,
 #endif