From: yury Date: Wed, 10 Mar 2004 07:50:30 +0000 (+0000) Subject: added tmpfs related stuff X-Git-Tag: v1_7_100~2483 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=65230291ac3140e29aeb5813d075825a348b45d6 added tmpfs related stuff --- diff --git a/lustre/lvfs/fsfilt_tmpfs.c b/lustre/lvfs/fsfilt_tmpfs.c new file mode 100644 index 0000000..808ee25 --- /dev/null +++ b/lustre/lvfs/fsfilt_tmpfs.c @@ -0,0 +1,654 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * lustre/lib/fsfilt_tmpfs.c + * Lustre filesystem abstraction routines + * + * Copyright (C) 2002, 2003, 2004 Cluster File Systems, Inc. + * Author: Yury Umanets + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define DEBUG_SUBSYSTEM S_FILTER + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* prefix is needed because tmpfs xattr patch deos not support namespaces + * yet. */ +#define XATTR_LUSTRE_MDS_LOV_EA "trusted.lov" +#define XATTR_LUSTRE_MDS_OBJID "system.lustre_mds_objid" + +/* structure instance of to be returned as a transaction handle. This is not + * needed for now, but probably we will need to save something during modifying + * an inode and this is useful for us. */ +struct tmpfs_trans { + int op; +}; + +static kmem_cache_t *trans_cache; +static atomic_t trans_count = ATOMIC_INIT(0); + +/* ext2 directory stuff. It is needed for fs_readpage(), which is used for + * reading directoris on MDS. Probably this should be moved to somewhere more + * convenient? */ +#define EXT2_NAME_LEN (255) + +struct ext2_dirent { + __u32 inode; + __u16 rec_len; + __u8 name_len; + __u8 file_type; + char name[0]; +}; + +typedef struct ext2_dirent ext2_dirent_t; + +struct fetch_hint { + int stop; + int count; + __u16 chunk; + void *dirent; + __u16 rec_len; + struct file *file; +}; + +typedef struct fetch_hint fetch_hint_t; + +#define EXT2_ENT_PAD 4 +#define EXT2_ENT_ROUND (EXT2_ENT_PAD - 1) +#define EXT2_ENT_LEN(len) (((len) + 8 + EXT2_ENT_ROUND) & ~EXT2_ENT_ROUND) + +/* starts new transaction on tmpfs for metadata operations. That if for create + * file, delete it, etc. That is everything except of read/write data. Returns + * pointer to transaction handle to be used later. What we have to do here? + * Seems nothing for a while. */ +static void * +fsfilt_tmpfs_mtd_start(struct inode *inode, int op, void *desc_private) +{ + int rc; + struct kstatfs sfs; + struct tmpfs_trans *trans; + + CDEBUG(D_INFO, "Metadata operation 0x%x is started on " + "inode 0x%lx\n", op, inode->i_ino); + + if ((rc = vfs_statfs(inode->i_sb, &sfs))) + return ERR_PTR(rc); + + if (sfs.f_bfree == 0) + return ERR_PTR(-ENOSPC); + + OBD_SLAB_ALLOC(trans, trans_cache, GFP_NOFS, + sizeof(*trans)); + + if (trans == NULL) + return NULL; + + atomic_inc(&trans_count); + + trans->op = op; + return trans; +} + +/* commits changes on passed @inode using passed transaction @handle. Should we + * do something here? */ +static int +fsfilt_tmpfs_mtd_commit(struct inode *inode, void *handle, int force_sync) +{ + struct tmpfs_trans *trans; + + trans = (struct tmpfs_trans *)handle; + + OBD_SLAB_FREE(trans, trans_cache, sizeof(*trans)); + atomic_dec(&trans_count); + + CDEBUG(D_INFO, "Metadata operation 0x%x is " + "finished on inode 0x%lx\n", trans->op, + inode->i_ino); + + return 0; +} + +/* starts new transaction for read/write operations. Seems, that here we do + * nothing also. */ +static void * +fsfilt_tmpfs_io_start(int objcount, struct fsfilt_objinfo *fso, + int niocount, struct niobuf_local *nb, + void *desc_private) +{ + int rc; + struct kstatfs sfs; + struct tmpfs_trans *trans; + + ENTRY; + + CDEBUG(D_INFO, "IO operation is started on inode 0x%lx\n", + fso->fso_dentry->d_inode->i_ino); + + /* check if we still have free space on filesystem. */ + if ((rc = vfs_statfs(fso->fso_dentry->d_inode->i_sb, &sfs))) + RETURN(ERR_PTR(rc)); + + if (sfs.f_bfree == 0) + RETURN(ERR_PTR(-ENOSPC)); + + OBD_SLAB_ALLOC(trans, trans_cache, GFP_NOFS, + sizeof(*trans)); + + if (trans == NULL) + RETURN(NULL); + + atomic_inc(&trans_count); + + trans->op = 0; + RETURN(trans); +} + +/* commits changes on passed @inode using passed transaction @handle. This is + * called from direct_io() with handle obtained from brw_start(). */ +static int +fsfilt_tmpfs_io_commit(struct inode *inode, void *handle, void **wh) +{ + struct tmpfs_trans *trans; + + trans = (struct tmpfs_trans *)handle; + + OBD_SLAB_FREE(trans, trans_cache, sizeof(*trans)); + atomic_dec(&trans_count); + + CDEBUG(D_INFO, "IO operation is finished on inode " + "0x%lx\n", inode->i_ino); + + /* wait handle is not used. */ + *wh = NULL; + + return 0; +} + +/* waits for transaction started by io_commit() to be finished on passed wait + * handle. What should we do here? Nothing so far. */ +static int +fsfilt_tmpfs_commit_wait(struct inode *inode, void *wh) +{ + CDEBUG(D_INFO, "commit wait is called\n"); + return 0; +} + +/* implements additional ioctl fucntions. Nothing do here. */ +static int +fsfilt_tmpfs_iocontrol(struct inode * inode, struct file *file, + unsigned int cmd, unsigned long arg) +{ + int rc = -ENOTTY; + + ENTRY; + + if (inode->i_fop->ioctl) + rc = inode->i_fop->ioctl(inode, file, cmd, arg); + + RETURN(rc); +} + +/* fills @osfs by statfs info for tmpfs. Should we do some correcting + here? Probably later. */ +static int +fsfilt_tmpfs_statfs(struct super_block *sb, struct obd_statfs *osfs) +{ + int rc; + struct kstatfs sfs; + + if (!sb->s_op->statfs) + return -ENOSYS; + + memset(&sfs, 0, sizeof(sfs)); + + /* trying to be consistent with other parts of tmpfs filter and call + * sb->s_op->statfs() instead of using vfs_statfs(). */ + lock_kernel(); + rc = sb->s_op->statfs(sb, &sfs); + unlock_kernel(); + + if (rc == 0) + statfs_pack(osfs, &sfs); + + return rc; +} + +/* make sure, that all dirty buffers are stored onto device. This is nothing to + * do for tmpfs in principle, but we will not aim to be smarter than tmpfs is + * and call sb->s_op->sync_fs() is any. */ +static int +fsfilt_tmpfs_sync(struct super_block *sb) +{ + if (sb->s_op->sync_fs) + return sb->s_op->sync_fs(sb); + + return 0; +} + +/* uses inode setattr method if any, or does default actions otherwise. */ +static int fsfilt_tmpfs_setattr(struct dentry *dentry, void *handle, + struct iattr *iattr, int do_trunc) +{ + int rc; + struct inode *inode = dentry->d_inode; + + lock_kernel(); + + /* preventing vmtruncate() to be called on inode_setattr(). */ + if (iattr->ia_valid & ATTR_SIZE && !do_trunc) { + iattr->ia_valid &= ~ATTR_SIZE; + inode->i_size = iattr->ia_size; + } + + iattr->ia_mode = (inode->i_mode & S_IFMT) | + (iattr->ia_mode & ~S_IFMT); + + iattr->ia_valid &= ~(ATTR_MTIME_SET | ATTR_ATIME_SET); + + if (inode->i_op->setattr) { + rc = inode->i_op->setattr(dentry, iattr); + } else { + if (!(rc = inode_change_ok(inode, iattr))) + rc = inode_setattr(inode, iattr); + } + + unlock_kernel(); + + return rc; +} + +/* nothing to do here. */ +static int +fsfilt_tmpfs_setup(struct super_block *sb) +{ + return 0; +} + +/* sets lmm into inode xattrs using passed transaction @handle. */ +static int +fsfilt_tmpfs_set_md(struct inode *inode, void *handle, + void *lmm, int lmm_size) +{ + int rc; + + lock_kernel(); + + rc = shmem_xattr_set(inode, XATTR_LUSTRE_MDS_LOV_EA, + lmm, lmm_size, 0); + + unlock_kernel(); + + if (rc) { + CERROR("error adding MD data to inode %lu: rc = %d\n", + inode->i_ino, rc); + } + + return rc; +} + +/* gets lmm from inode xattrs. */ +static int +fsfilt_tmpfs_get_md(struct inode *inode, void *lmm, + int lmm_size) +{ + int rc; + + LASSERT(down_trylock(&inode->i_sem) != 0); + + lock_kernel(); + + /* getting new key first. */ + rc = shmem_xattr_get(inode, XATTR_LUSTRE_MDS_LOV_EA, + lmm, lmm_size); + + /* check for old one. */ + if (rc == -ENODATA) { + rc = shmem_xattr_get(inode, XATTR_LUSTRE_MDS_OBJID, + lmm, lmm_size); + } + + unlock_kernel(); + + if (lmm == NULL) + return (rc == -ENODATA) ? 0 : rc; + + if (rc < 0) { + CDEBUG(D_INFO, "error getting EA %s from inode %lu: rc = %d\n", + XATTR_LUSTRE_MDS_OBJID, inode->i_ino, rc); + + memset(lmm, 0, lmm_size); + return (rc == -ENODATA) ? 0 : rc; + } + + return rc; +} + +/* reads data from passed @file to @buf. */ +static ssize_t +fsfilt_tmpfs_read(struct file *file, char *buf, + size_t count, loff_t *off) +{ + struct inode *inode = file->f_dentry->d_inode; + + if (!S_ISREG(inode->i_mode)) + return -EINVAL; + + return file->f_op->read(file, buf, count, off); +} + +/* writes data to regular @file. */ +static ssize_t +fsfilt_tmpfs_write(struct file *file, char *buf, + size_t count, loff_t *off) +{ + struct inode *inode = file->f_dentry->d_inode; + + if (!S_ISREG(inode->i_mode)) + return -EINVAL; + + return file->f_op->write(file, buf, count, off); +} + +/* puts passed page to page cache. */ +static int +fsfilt_tmpfs_putpage(struct inode *inode, struct page *page, + int lazy_cache) +{ + struct page *shmem_page; + struct shmem_inode_info *info = SHMEM_I(inode); + + down(&info->sem); + + /* getting page from shmem. It may be read from swap. And this is the + * reason, why we do not just add passed @page to pacge cache. */ + shmem_page = shmem_getpage_locked(inode, page->index); + + if (IS_ERR(shmem_page)) { + up(&info->sem); + return PTR_ERR(shmem_page); + } + + up(&info->sem); + + copy_page(kmap(shmem_page), kmap(page)); + kunmap(page); kunmap(shmem_page); + + /* taking care about possible cache aliasing. */ + if (inode->i_mapping->i_mmap_shared != NULL) + flush_dcache_page(shmem_page); + + SetPageDirty(shmem_page); + UnlockPage(shmem_page); + page_cache_release(shmem_page); + + return 0; +} + +/* returns inode page by its @index. */ +static struct page * +fsfilt_tmpfs_getpage(struct inode *inode, long int index) +{ + struct page *page; + + page = shmem_getpage_unlocked(inode, index); + + if (IS_ERR(page)) + return page; + + /* taking care about possible cache aliasing. */ + if (inode->i_mapping->i_mmap_shared != NULL) + flush_dcache_page(page); + + return page; +} + +/* fills up passed @buf by entry data. Used from readdir(). */ +static int +fillent(void *buf, const char *name, int namlen, + loff_t offset, ino_t ino, unsigned int d_type) +{ + __u16 rec_len; + fetch_hint_t *hint = (fetch_hint_t *)buf; + ext2_dirent_t *entry = hint->dirent; + + rec_len = EXT2_ENT_LEN(namlen); + + if ((hint->stop = (hint->chunk < rec_len))) + return -ENOENT; + + entry->file_type = 0; + + hint->count++; + hint->chunk -= rec_len; + hint->rec_len = rec_len; + hint->dirent += rec_len; + + entry->name_len = namlen; + entry->inode = cpu_to_le32(ino); + memcpy(entry->name, name, namlen); + entry->rec_len = cpu_to_le16(rec_len); + + return 0; +} + +/* this should be the same as in tmpfs. Should it be not hardcoded? */ +#define BOGO_ENTRY_SIZE (20) + +/* mostly needed for reading directory from @file on MDS. */ +static ssize_t +fsfilt_tmpfs_readpage(struct file *file, char *buf, + size_t count, loff_t *off) +{ + int rc = 0; + struct inode *inode = file->f_dentry->d_inode; + + if (S_ISREG(inode->i_mode)) { + rc = file->f_op->read(file, buf, count, off); + } else if (S_ISDIR(inode->i_mode)) { + int error; + loff_t offset; + fetch_hint_t hint; + ext2_dirent_t *dirent; + + /* positioning to passed @off. */ + offset = *(long int *)off / BOGO_ENTRY_SIZE; + + if (file->f_op->llseek(file, offset, 0) != offset) + return -ENOENT; + + /* reading @count bytesof data. */ + while (count > 0) { + hint.count = 0; + hint.file = file; + hint.dirent = buf; + hint.chunk = count; + hint.rec_len = count; + + if ((error = vfs_readdir(file, fillent, &hint)) < 0) + return error; + + /* we should have something after vfs_readdir() is + * finished. */ + LASSERT(hint.count != 0); + + /* last entry should be extended up to free page + * size. */ + if (hint.chunk > 0) { + __u16 rec_len; + + hint.dirent -= hint.rec_len; + dirent = (ext2_dirent_t *)hint.dirent; + + rec_len = le16_to_cpu(dirent->rec_len); + dirent->rec_len = cpu_to_le16(rec_len + hint.chunk); + } + + count -= PAGE_CACHE_SIZE; + *off += PAGE_CACHE_SIZE; + rc += PAGE_CACHE_SIZE; + } + + UPDATE_ATIME(inode); + } else { + rc = -EINVAL; + } + + return rc; +} + +static int +fsfilt_tmpfs_add_journal_cb(struct obd_device *obd, __u64 last_rcvd, + void *handle, fsfilt_cb_t cb_func, + void *cb_data) +{ + cb_func(obd, last_rcvd, cb_data, 0); + return 0; +} + +static int +fsfilt_tmpfs_prep_san_write(struct inode *inode, long *blocks, + int nblocks, loff_t newsize) +{ + /* we do not need block numbers and other stuff, as it will not be + * used. */ + blocks[0] = 0; + + if (newsize > inode->i_size) + inode->i_size = newsize; + + return 0; +} + +/* this is used for reading configuration */ +static int +fsfilt_tmpfs_read_record(struct file *file, void *buf, + int size, loff_t *off) +{ + int error; + struct inode *inode = file->f_dentry->d_inode; + + lock_kernel(); + + if (inode->i_size < *off + size) { + size = inode->i_size - *off; + unlock_kernel(); + + if (size < 0) { + return -EIO; + } else if (size == 0) { + return 0; + } + } else { + unlock_kernel(); + } + + if ((error = fsfilt_tmpfs_read(file, buf, size, off)) < 0) + return error; + + return 0; +} + +/* this is used for writing configuration */ +static int +fsfilt_tmpfs_write_record(struct file *file, void *buf, + int size, loff_t *off, int sync) +{ + int error; + + if ((error = fsfilt_tmpfs_write(file, buf, size, off)) < 0) + return error; + + return 0; +} + +static struct fsfilt_operations fsfilt_tmpfs_ops = { + fs_type: "tmpfs", + fs_owner: THIS_MODULE, + fs_start: fsfilt_tmpfs_mtd_start, + fs_commit: fsfilt_tmpfs_mtd_commit, + fs_brw_start: fsfilt_tmpfs_io_start, + fs_commit_async: fsfilt_tmpfs_io_commit, + fs_commit_wait: fsfilt_tmpfs_commit_wait, + fs_iocontrol: fsfilt_tmpfs_iocontrol, + fs_set_md: fsfilt_tmpfs_set_md, + fs_get_md: fsfilt_tmpfs_get_md, + fs_readpage: fsfilt_tmpfs_readpage, + fs_getpage: fsfilt_tmpfs_getpage, + fs_putpage: fsfilt_tmpfs_putpage, + fs_add_journal_cb: fsfilt_tmpfs_add_journal_cb, + fs_statfs: fsfilt_tmpfs_statfs, + fs_sync: fsfilt_tmpfs_sync, + fs_prep_san_write: fsfilt_tmpfs_prep_san_write, + fs_write_record: fsfilt_tmpfs_write_record, + fs_read_record: fsfilt_tmpfs_read_record, + fs_setattr: fsfilt_tmpfs_setattr, + fs_setup: fsfilt_tmpfs_setup, +}; + +static int __init +fsfilt_tmpfs_init(void) +{ + int rc; + + trans_cache = kmem_cache_create("fsfilt_tmpfs_trans", + sizeof(struct tmpfs_trans), + 0, 0, NULL, NULL); + if (!trans_cache) { + CERROR("error allocating fsfilt transaction handle cache\n"); + GOTO(out, rc = -ENOMEM); + } + + if ((rc = fsfilt_register_ops(&fsfilt_tmpfs_ops))) + kmem_cache_destroy(trans_cache); +out: + return rc; +} + +static void __exit +fsfilt_tmpfs_exit(void) +{ + int rc; + + fsfilt_unregister_ops(&fsfilt_tmpfs_ops); + rc = kmem_cache_destroy(trans_cache); + + if (rc || atomic_read(&trans_count)) { + CERROR("can't free fsfilt trans cache: count %d, rc = %d\n", + atomic_read(&trans_count), rc); + } +} + +module_init(fsfilt_tmpfs_init); +module_exit(fsfilt_tmpfs_exit); + +MODULE_AUTHOR("Cluster File Systems, Inc. "); +MODULE_DESCRIPTION("Lustre tmpfs Filesystem Helper v0.1"); +MODULE_LICENSE("GPL"); diff --git a/lustre/tests/ltmpfs-sanity.sh b/lustre/tests/ltmpfs-sanity.sh new file mode 100755 index 0000000..a6d921e --- /dev/null +++ b/lustre/tests/ltmpfs-sanity.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# Currently the following tests do not pass: +# 47 - due to unresolvable symbol in UML local libc +# 52a, 52b - due to not implemented ioctl() in tmpfs +# 57a - due to inability to be supplied to tmpfs +# 56 - due to some unknown reason yet. + +NAME=local FSTYPE=tmpfs MDSDEV=tmpfs OSTDEV=tmpfs sh llmount.sh && \ +START=: CLEAN=: EXCEPT="47 52a 52b 56 57a" sh sanity.sh diff --git a/lustre/tests/ltmpfs.sh b/lustre/tests/ltmpfs.sh new file mode 100755 index 0000000..c561572 --- /dev/null +++ b/lustre/tests/ltmpfs.sh @@ -0,0 +1 @@ +NAME=local FSTYPE=tmpfs MDSDEV=tmpfs OSTDEV=tmpfs sh llmount.sh