+static int osd_declare_fallocate(const struct lu_env *env,
+ struct dt_object *dt, __u64 start, __u64 end,
+ int mode, struct thandle *th)
+{
+ struct osd_thandle *oh = container_of(th, struct osd_thandle, ot_super);
+ struct osd_device *osd = osd_obj2dev(osd_dt_obj(dt));
+ struct inode *inode = osd_dt_obj(dt)->oo_inode;
+ long long quota_space = 0;
+ /* 5 is max tree depth. (inode + 4 index blocks) */
+ int depth = 5;
+ int rc;
+
+ ENTRY;
+
+ /*
+ * Only mode == 0 (which is standard prealloc) is supported now.
+ * Rest of mode options is not supported yet.
+ */
+ if (mode & ~FALLOC_FL_KEEP_SIZE)
+ RETURN(-EOPNOTSUPP);
+
+ /* disable fallocate completely */
+ if (osd_dev(dt->do_lu.lo_dev)->od_fallocate_zero_blocks < 0)
+ RETURN(-EOPNOTSUPP);
+
+ LASSERT(th);
+ LASSERT(inode);
+
+ /* quota space for metadata blocks
+ * approximate metadata estimate should be good enough.
+ */
+ quota_space += PAGE_SIZE;
+ quota_space += depth * LDISKFS_BLOCK_SIZE(osd_sb(osd));
+
+ /* quota space should be reported in 1K blocks */
+ quota_space = toqb(quota_space) + toqb(end - start) +
+ LDISKFS_META_TRANS_BLOCKS(inode->i_sb);
+
+ /* We don't need to reserve credits for whole fallocate here.
+ * We reserve space only for metadata. Fallocate credits are
+ * extended as required
+ */
+ rc = osd_declare_inode_qid(env, i_uid_read(inode), i_gid_read(inode),
+ i_projid_read(inode), quota_space, oh,
+ osd_dt_obj(dt), NULL, OSD_QID_BLK);
+ RETURN(rc);
+}
+
+static int osd_fallocate(const struct lu_env *env, struct dt_object *dt,
+ __u64 start, __u64 end, int mode, struct thandle *th)
+{
+ struct osd_thandle *oh = container_of(th, struct osd_thandle, ot_super);
+ handle_t *handle = ldiskfs_journal_current_handle();
+ unsigned int save_credits = oh->ot_credits;
+ struct osd_object *obj = osd_dt_obj(dt);
+ struct inode *inode = obj->oo_inode;
+ struct ldiskfs_map_blocks map;
+ unsigned int credits;
+ ldiskfs_lblk_t blen;
+ ldiskfs_lblk_t boff;
+ loff_t new_size = 0;
+ int depth = 0;
+ int flags;
+ int rc = 0;
+
+ ENTRY;
+
+ LASSERT(dt_object_exists(dt));
+ LASSERT(osd_invariant(obj));
+ LASSERT(inode != NULL);
+
+ CDEBUG(D_INODE, "fallocate: inode #%lu: start %llu end %llu mode %d\n",
+ inode->i_ino, start, end, mode);
+
+ dquot_initialize(inode);
+
+ LASSERT(th);
+
+ boff = start >> inode->i_blkbits;
+ blen = (ALIGN(end, 1 << inode->i_blkbits) >> inode->i_blkbits) - boff;
+
+ /* Create and mark new extents as either zero or unwritten */
+ flags = osd_dev(dt->do_lu.lo_dev)->od_fallocate_zero_blocks ?
+ LDISKFS_GET_BLOCKS_CREATE_ZERO :
+ LDISKFS_GET_BLOCKS_CREATE_UNWRIT_EXT;
+ if (mode & FALLOC_FL_KEEP_SIZE)
+ flags |= LDISKFS_GET_BLOCKS_KEEP_SIZE;
+
+ inode_lock(inode);
+
+ /*
+ * We only support preallocation for extent-based file only.
+ */
+ if (!(ldiskfs_test_inode_flag(inode, LDISKFS_INODE_EXTENTS)))
+ GOTO(out, rc = -EOPNOTSUPP);
+
+ if (!(mode & FALLOC_FL_KEEP_SIZE) && (end > i_size_read(inode) ||
+ end > LDISKFS_I(inode)->i_disksize)) {
+ new_size = end;
+ rc = inode_newsize_ok(inode, new_size);
+ if (rc)
+ GOTO(out, rc);
+ }
+
+ inode_dio_wait(inode);
+
+ map.m_lblk = boff;
+ map.m_len = blen;
+
+ /* Don't normalize the request if it can fit in one extent so
+ * that it doesn't get unnecessarily split into multiple extents.
+ */
+ if (blen <= EXT_UNWRITTEN_MAX_LEN)
+ flags |= LDISKFS_GET_BLOCKS_NO_NORMALIZE;
+
+ /*
+ * credits to insert 1 extent into extent tree.
+ */
+ credits = osd_chunk_trans_blocks(inode, blen);
+ depth = ext_depth(inode);
+
+ while (rc >= 0 && blen) {
+ loff_t epos;
+
+ /*
+ * Recalculate credits when extent tree depth changes.
+ */
+ if (depth != ext_depth(inode)) {
+ credits = osd_chunk_trans_blocks(inode, blen);
+ depth = ext_depth(inode);
+ }
+
+ /* TODO: quota check */
+ rc = osd_extend_restart_trans(handle, credits, inode);
+ if (rc)
+ break;
+
+ rc = ldiskfs_map_blocks(handle, inode, &map, flags);
+ if (rc <= 0) {
+ CDEBUG(D_INODE,
+ "inode #%lu: block %u: len %u: ldiskfs_map_blocks returned %d\n",
+ inode->i_ino, map.m_lblk, map.m_len, rc);
+ ldiskfs_mark_inode_dirty(handle, inode);
+ break;
+ }
+
+ map.m_lblk += rc;
+ map.m_len = blen = blen - rc;
+ epos = (loff_t)map.m_lblk << inode->i_blkbits;
+ inode->i_ctime = current_time(inode);
+ if (new_size) {
+ if (epos > end)
+ epos = end;
+ if (ldiskfs_update_inode_size(inode, epos) & 0x1)
+ inode->i_mtime = inode->i_ctime;
+ } else {
+ if (epos > inode->i_size)
+ ldiskfs_set_inode_flag(inode,
+ LDISKFS_INODE_EOFBLOCKS);
+ }
+
+ ldiskfs_mark_inode_dirty(handle, inode);
+ }
+
+out:
+ /* extand credits if needed for operations such as attribute set */
+ if (rc >= 0)
+ rc = osd_extend_restart_trans(handle, save_credits, inode);
+
+ inode_unlock(inode);
+
+ RETURN(rc);
+}
+