+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * lustre/lib/fsfilt_tmpfs.c
- * Lustre filesystem abstraction routines
- *
- * Copyright (C) 2002, 2003, 2004 Cluster File Systems, Inc.
- * Author: Yury Umanets <umka@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define DEBUG_SUBSYSTEM S_FILTER
-
-#include <linux/fs.h>
-#include <linux/jbd.h>
-#include <linux/pagemap.h>
-#include <linux/version.h>
-#include <linux/obd.h>
-#include <linux/obd_class.h>
-#include <linux/module.h>
-#include <linux/shmem_fs.h>
-#include <linux/lustre_dir.h>
-#include <linux/lustre_fsfilt.h>
-
-/* prefix is needed because tmpfs xattr patch deos not support namespaces
- * yet. */
-#define XATTR_LUSTRE_MDS_LOV_EA "trusted.lov"
-
-/* structure instance of to be returned as a transaction handle. This is not
- * needed for now, but probably we will need to save something during modifying
- * an inode and this will be useful. */
-struct tmpfs_trans {
- int op;
-};
-
-static kmem_cache_t *trans_cache;
-static atomic_t trans_count = ATOMIC_INIT(0);
-
-struct fetch_hint {
- int count;
- __u16 chunk;
- void *dirent;
- __u16 rec_len;
- struct file *file;
-};
-
-typedef struct fetch_hint fetch_hint_t;
-
-/* starts new transaction on tmpfs for metadata operations. That if for create
- * file, delete it, etc. That is everything except of read/write data. Returns
- * pointer to transaction handle to be used later. What we have to do here?
- * Seems nothing for a while. */
-static void *
-fsfilt_tmpfs_mtd_start(struct inode *inode, int op, void *desc_private)
-{
- int rc;
- struct kstatfs sfs;
- struct tmpfs_trans *trans;
-
- CDEBUG(D_INFO, "Metadata operation 0x%x is started on "
- "inode 0x%lx\n", op, inode->i_ino);
-
- if ((rc = vfs_statfs(inode->i_sb, &sfs)))
- return ERR_PTR(rc);
-
- if (sfs.f_bfree == 0)
- return ERR_PTR(-ENOSPC);
-
- OBD_SLAB_ALLOC(trans, trans_cache, GFP_NOFS,
- sizeof(*trans));
-
- if (trans == NULL)
- return NULL;
-
- atomic_inc(&trans_count);
-
- trans->op = op;
- return trans;
-}
-
-/* commits changes on passed @inode using passed transaction @handle. Should we
- * do something here? */
-static int
-fsfilt_tmpfs_mtd_commit(struct inode *inode, void *handle, int force_sync)
-{
- struct tmpfs_trans *trans;
-
- trans = (struct tmpfs_trans *)handle;
-
- OBD_SLAB_FREE(trans, trans_cache, sizeof(*trans));
- atomic_dec(&trans_count);
-
- CDEBUG(D_INFO, "Metadata operation 0x%x is "
- "finished on inode 0x%lx\n", trans->op,
- inode->i_ino);
-
- return 0;
-}
-
-/* starts new transaction for read/write operations. Seems, that here we do
- * nothing also. */
-static void *
-fsfilt_tmpfs_io_start(int objcount, struct fsfilt_objinfo *fso,
- int niocount, struct niobuf_local *nb,
- void *desc_private)
-{
- int rc;
- struct kstatfs sfs;
- struct tmpfs_trans *trans;
-
- ENTRY;
-
- CDEBUG(D_INFO, "IO operation is started on inode 0x%lx\n",
- fso->fso_dentry->d_inode->i_ino);
-
- /* check if we still have free space on filesystem. */
- if ((rc = vfs_statfs(fso->fso_dentry->d_inode->i_sb, &sfs)))
- RETURN(ERR_PTR(rc));
-
- if (sfs.f_bfree == 0)
- RETURN(ERR_PTR(-ENOSPC));
-
- OBD_SLAB_ALLOC(trans, trans_cache, GFP_NOFS,
- sizeof(*trans));
-
- if (trans == NULL)
- RETURN(NULL);
-
- atomic_inc(&trans_count);
-
- trans->op = 0;
- RETURN(trans);
-}
-
-/* commits changes on passed @inode using passed transaction @handle. This is
- * called from direct_io() with handle obtained from brw_start(). */
-static int
-fsfilt_tmpfs_io_commit(struct inode *inode, void *handle, void **wh)
-{
- struct tmpfs_trans *trans;
-
- trans = (struct tmpfs_trans *)handle;
-
- OBD_SLAB_FREE(trans, trans_cache, sizeof(*trans));
- atomic_dec(&trans_count);
-
- CDEBUG(D_INFO, "IO operation is finished on inode "
- "0x%lx\n", inode->i_ino);
-
- /* wait handle is not used. */
- *wh = NULL;
-
- return 0;
-}
-
-/* waits for transaction started by io_commit() to be finished on passed wait
- * handle. What should we do here? Nothing so far. */
-static int
-fsfilt_tmpfs_commit_wait(struct inode *inode, void *wh)
-{
- CDEBUG(D_INFO, "commit wait is called\n");
- return 0;
-}
-
-/* implements additional ioctl fucntions. Nothing do here. */
-static int
-fsfilt_tmpfs_iocontrol(struct inode * inode, struct file *file,
- unsigned int cmd, unsigned long arg)
-{
- int rc = -ENOTTY;
-
- ENTRY;
-
- if (inode->i_fop->ioctl)
- rc = inode->i_fop->ioctl(inode, file, cmd, arg);
-
- RETURN(rc);
-}
-
-/* fills @osfs by statfs info for tmpfs. Should we do some correcting
- here? Probably later. */
-static int
-fsfilt_tmpfs_statfs(struct super_block *sb, struct obd_statfs *osfs)
-{
- int rc;
- struct kstatfs sfs;
-
- if (!sb->s_op->statfs)
- return -ENOSYS;
-
- memset(&sfs, 0, sizeof(sfs));
-
- /* trying to be consistent with other parts of tmpfs filter and call
- * sb->s_op->statfs() instead of using vfs_statfs(). */
- lock_kernel();
- rc = sb->s_op->statfs(sb, &sfs);
- unlock_kernel();
-
- if (rc == 0)
- statfs_pack(osfs, &sfs);
-
- return rc;
-}
-
-/* make sure, that all dirty buffers are stored onto device. This is nothing to
- * do for tmpfs in principle, but we will not aim to be smarter than tmpfs is
- * and call sb->s_op->sync_fs() is any. */
-static int
-fsfilt_tmpfs_sync(struct super_block *sb)
-{
- if (sb->s_op->sync_fs)
- return sb->s_op->sync_fs(sb);
-
- return 0;
-}
-
-/* uses inode setattr method if any, or does default actions otherwise. */
-static int fsfilt_tmpfs_setattr(struct dentry *dentry, void *handle,
- struct iattr *iattr, int do_trunc)
-{
- int rc;
- struct inode *inode = dentry->d_inode;
-
- lock_kernel();
-
- /* preventing vmtruncate() to be called on inode_setattr(). */
- if (iattr->ia_valid & ATTR_SIZE && !do_trunc) {
- iattr->ia_valid &= ~ATTR_SIZE;
- inode->i_size = iattr->ia_size;
- }
-
- iattr->ia_mode = (inode->i_mode & S_IFMT) |
- (iattr->ia_mode & ~S_IFMT);
-
- iattr->ia_valid &= ~(ATTR_MTIME_SET | ATTR_ATIME_SET);
-
- if (inode->i_op->setattr) {
- rc = inode->i_op->setattr(dentry, iattr);
- } else {
- if (!(rc = inode_change_ok(inode, iattr)))
- rc = inode_setattr(inode, iattr);
- }
-
- unlock_kernel();
-
- return rc;
-}
-
-/* nothing to do here. */
-static int
-fsfilt_tmpfs_setup(struct super_block *sb)
-{
- return 0;
-}
-
-/* sets lmm into inode xattrs using passed transaction @handle. */
-static int
-fsfilt_tmpfs_set_md(struct inode *inode, void *handle,
- void *lmm, int lmm_size)
-{
- int rc;
-
- lock_kernel();
-
- rc = shmem_xattr_set(inode, XATTR_LUSTRE_MDS_LOV_EA,
- lmm, lmm_size, 0);
-
- unlock_kernel();
-
- if (rc) {
- CERROR("error adding MD data to inode %lu: rc = %d\n",
- inode->i_ino, rc);
- }
-
- return rc;
-}
-
-/* gets lmm from inode xattrs. */
-static int
-fsfilt_tmpfs_get_md(struct inode *inode, void *lmm,
- int lmm_size)
-{
- int rc;
-
- LASSERT(down_trylock(&inode->i_sem) != 0);
-
- lock_kernel();
-
- /* getting new key first. */
- rc = shmem_xattr_get(inode, XATTR_LUSTRE_MDS_LOV_EA,
- lmm, lmm_size);
-
- unlock_kernel();
-
- if (lmm == NULL)
- return (rc == -ENODATA) ? 0 : rc;
-
- if (rc < 0) {
- CDEBUG(D_INFO, "error getting EA %s from inode %lu: rc = %d\n",
- XATTR_LUSTRE_MDS_LOV_EA, inode->i_ino, rc);
-
- memset(lmm, 0, lmm_size);
- return (rc == -ENODATA) ? 0 : rc;
- }
-
- return rc;
-}
-
-/* reads data from passed @file to @buf. */
-static ssize_t
-fsfilt_tmpfs_read(struct file *file, char *buf,
- size_t count, loff_t *off)
-{
- struct inode *inode = file->f_dentry->d_inode;
-
- if (!S_ISREG(inode->i_mode))
- return -EINVAL;
-
- return file->f_op->read(file, buf, count, off);
-}
-
-/* writes data to regular @file. */
-static ssize_t
-fsfilt_tmpfs_write(struct file *file, char *buf,
- size_t count, loff_t *off)
-{
- struct inode *inode = file->f_dentry->d_inode;
-
- if (!S_ISREG(inode->i_mode))
- return -EINVAL;
-
- return file->f_op->write(file, buf, count, off);
-}
-
-/* puts passed page to page cache. */
-static int
-fsfilt_tmpfs_putpage(struct inode *inode, struct page *page,
- int lazy_cache)
-{
- struct page *shmem_page;
- struct shmem_inode_info *info = SHMEM_I(inode);
-
- down(&info->sem);
-
- /* getting page from shmem. It may be read from swap. And this is the
- * reason, why we do not just add passed @page to pacge cache. */
- shmem_page = shmem_getpage_locked(inode, page->index);
-
- if (IS_ERR(shmem_page)) {
- up(&info->sem);
- return PTR_ERR(shmem_page);
- }
-
- up(&info->sem);
-
- copy_page(kmap(shmem_page), kmap(page));
- kunmap(page); kunmap(shmem_page);
-
- /* taking care about possible cache aliasing. */
- if (inode->i_mapping->i_mmap_shared != NULL)
- flush_dcache_page(shmem_page);
-
- SetPageDirty(shmem_page);
- UnlockPage(shmem_page);
- page_cache_release(shmem_page);
-
- return 0;
-}
-
-/* returns inode page by its @index. */
-static struct page *
-fsfilt_tmpfs_getpage(struct inode *inode, long int index)
-{
- struct page *page;
-
- page = shmem_getpage_unlocked(inode, index);
-
- if (IS_ERR(page))
- return page;
-
- /* taking care about possible cache aliasing. */
- if (inode->i_mapping->i_mmap_shared != NULL)
- flush_dcache_page(page);
-
- return page;
-}
-
-/* fills up passed @buf by entry data. Used from readdir(). */
-static int
-fillent(void *buf, const char *name, int namlen,
- loff_t offset, ino_t ino, unsigned int d_type)
-{
- __u16 rec_len;
- fetch_hint_t *hint = (fetch_hint_t *)buf;
- ext2_dirent_t *entry = hint->dirent;
-
- rec_len = EXT2_ENT_LEN(namlen);
-
- if ((hint->chunk < rec_len))
- return -ENOENT;
-
- entry->file_type = 0;
-
- hint->count++;
- hint->chunk -= rec_len;
- hint->rec_len = rec_len;
- hint->dirent += rec_len;
-
- entry->name_len = namlen;
- entry->inode = cpu_to_le32(ino);
- memcpy(entry->name, name, namlen);
- entry->rec_len = cpu_to_le16(rec_len);
-
- return 0;
-}
-
-/* this should be the same as in tmpfs. Should it be not hardcoded? */
-#define BOGO_ENTRY_SIZE (20)
-
-/* mostly needed for reading directory from @file on MDS. */
-static ssize_t
-fsfilt_tmpfs_readpage(struct file *file, char *buf,
- size_t count, loff_t *off)
-{
- int rc = 0;
- struct inode *inode = file->f_dentry->d_inode;
-
- if (S_ISREG(inode->i_mode)) {
- rc = file->f_op->read(file, buf, count, off);
- } else if (S_ISDIR(inode->i_mode)) {
- int error;
- loff_t offset;
- fetch_hint_t hint;
- ext2_dirent_t *dirent;
-
- /* positioning to passed @off. */
- offset = *(long int *)off / BOGO_ENTRY_SIZE;
-
- if (file->f_op->llseek(file, offset, 0) != offset)
- return -ENOENT;
-
- /* reading @count bytes of data. */
- while (count > 0) {
- hint.count = 0;
- hint.file = file;
- hint.dirent = buf;
- hint.chunk = count;
- hint.rec_len = count;
-
- if ((error = vfs_readdir(file, fillent, &hint)) < 0)
- return error;
-
- /* we should have something after vfs_readdir() is
- * finished. */
- LASSERT(hint.count != 0);
-
- /* last entry should be extended up to free page
- * size. */
- if (hint.chunk > 0) {
- __u16 rec_len;
-
- hint.dirent -= hint.rec_len;
- dirent = (ext2_dirent_t *)hint.dirent;
-
- rec_len = le16_to_cpu(dirent->rec_len);
- dirent->rec_len = cpu_to_le16(rec_len + hint.chunk);
- }
-
- count -= PAGE_CACHE_SIZE;
- *off += PAGE_CACHE_SIZE;
- rc += PAGE_CACHE_SIZE;
- }
-
- UPDATE_ATIME(inode);
- } else {
- rc = -EINVAL;
- }
-
- return rc;
-}
-
-static int
-fsfilt_tmpfs_add_journal_cb(struct obd_device *obd, __u64 last_rcvd,
- void *handle, fsfilt_cb_t cb_func,
- void *cb_data)
-{
- cb_func(obd, last_rcvd, cb_data, 0);
- return 0;
-}
-
-static int
-fsfilt_tmpfs_prep_san_write(struct inode *inode, long *blocks,
- int nblocks, loff_t newsize)
-{
- /* we do not need block numbers and other stuff, as it will not be
- * used. */
- blocks[0] = 0;
-
- if (newsize > inode->i_size)
- inode->i_size = newsize;
-
- return 0;
-}
-
-/* this is used for reading configuration */
-static int
-fsfilt_tmpfs_read_record(struct file *file, void *buf,
- int size, loff_t *off)
-{
- int error;
- struct inode *inode = file->f_dentry->d_inode;
-
- lock_kernel();
-
- if (inode->i_size < *off + size) {
- size = inode->i_size - *off;
- unlock_kernel();
-
- if (size < 0) {
- return -EIO;
- } else if (size == 0) {
- return 0;
- }
- } else {
- unlock_kernel();
- }
-
- if ((error = fsfilt_tmpfs_read(file, buf, size, off)) < 0)
- return error;
-
- return 0;
-}
-
-/* this is used for writing configuration */
-static int
-fsfilt_tmpfs_write_record(struct file *file, void *buf,
- int size, loff_t *off, int sync)
-{
- int error;
-
- if ((error = fsfilt_tmpfs_write(file, buf, size, off)) < 0)
- return error;
-
- return 0;
-}
-
-static struct fsfilt_operations fsfilt_tmpfs_ops = {
- fs_type: "tmpfs",
- fs_owner: THIS_MODULE,
- fs_start: fsfilt_tmpfs_mtd_start,
- fs_commit: fsfilt_tmpfs_mtd_commit,
- fs_brw_start: fsfilt_tmpfs_io_start,
- fs_commit_async: fsfilt_tmpfs_io_commit,
- fs_commit_wait: fsfilt_tmpfs_commit_wait,
- fs_iocontrol: fsfilt_tmpfs_iocontrol,
- fs_set_md: fsfilt_tmpfs_set_md,
- fs_get_md: fsfilt_tmpfs_get_md,
- fs_readpage: fsfilt_tmpfs_readpage,
- fs_getpage: fsfilt_tmpfs_getpage,
- fs_putpage: fsfilt_tmpfs_putpage,
- fs_add_journal_cb: fsfilt_tmpfs_add_journal_cb,
- fs_statfs: fsfilt_tmpfs_statfs,
- fs_sync: fsfilt_tmpfs_sync,
- fs_prep_san_write: fsfilt_tmpfs_prep_san_write,
- fs_write_record: fsfilt_tmpfs_write_record,
- fs_read_record: fsfilt_tmpfs_read_record,
- fs_setattr: fsfilt_tmpfs_setattr,
- fs_setup: fsfilt_tmpfs_setup,
-};
-
-static int __init
-fsfilt_tmpfs_init(void)
-{
- int rc;
-
- trans_cache = kmem_cache_create("fsfilt_tmpfs_trans",
- sizeof(struct tmpfs_trans),
- 0, 0, NULL, NULL);
- if (!trans_cache) {
- CERROR("error allocating fsfilt transaction handle cache\n");
- GOTO(out, rc = -ENOMEM);
- }
-
- if ((rc = fsfilt_register_ops(&fsfilt_tmpfs_ops)))
- kmem_cache_destroy(trans_cache);
-out:
- return rc;
-}
-
-static void __exit
-fsfilt_tmpfs_exit(void)
-{
- int rc;
-
- fsfilt_unregister_ops(&fsfilt_tmpfs_ops);
- rc = kmem_cache_destroy(trans_cache);
-
- if (rc || atomic_read(&trans_count)) {
- CERROR("can't free fsfilt trans cache: count %d, rc = %d\n",
- atomic_read(&trans_count), rc);
- }
-}
-
-module_init(fsfilt_tmpfs_init);
-module_exit(fsfilt_tmpfs_exit);
-
-MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
-MODULE_DESCRIPTION("Lustre tmpfs Filesystem Helper v0.1");
-MODULE_LICENSE("GPL");