lustre/mds/mds_ext3.c

   1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
   2  * vim:expandtab:shiftwidth=8:tabstop=8:
   3  *
   4  *  lustre/mds/mds_ext3.c
   5  *  Lustre Metadata Server (mds) journal abstraction routines
   6  *
   7  *  Copyright (C) 2002  Cluster File Systems, Inc.
   8  *   Author: Andreas Dilger <adilger@clusterfs.com>
   9  *
  10  *   This file is part of Lustre, http://www.lustre.org.
  11  *
  12  *   Lustre is free software; you can redistribute it and/or
  13  *   modify it under the terms of version 2 of the GNU General Public
  14  *   License as published by the Free Software Foundation.
  15  *
  16  *   Lustre is distributed in the hope that it will be useful,
  17  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19  *   GNU General Public License for more details.
  20  *
  21  *   You should have received a copy of the GNU General Public License
  22  *   along with Lustre; if not, write to the Free Software
  23  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  24  *
  25  */
  26
  27 #define DEBUG_SUBSYSTEM S_MDS
  28
  29 #include <linux/fs.h>
  30 #include <linux/jbd.h>
  31 #include <linux/ext3_fs.h>
  32 #include <linux/ext3_jbd.h>
  33 #include <linux/lustre_mds.h>
  34 #include <linux/module.h>
  35
  36 static struct mds_fs_operations mds_ext3_fs_ops;
  37 static kmem_cache_t *mcb_cache;
  38 static int mcb_cache_count;
  39
  40 struct mds_cb_data {
  41         struct journal_callback cb_jcb;
  42         struct mds_obd *cb_mds;
  43         __u64 cb_last_rcvd;
  44 };
  45
  46 /*
  47  * We don't currently need any additional blocks for rmdir and
  48  * unlink transactions because we are storing the OST oa_id inside
  49  * the inode (which we will be changing anyways as part of this
  50  * transaction).
  51  */
  52 static void *mds_ext3_start(struct inode *inode, int op)
  53 {
  54         /* For updates to the last recieved file */
  55         int nblocks = EXT3_DATA_TRANS_BLOCKS;
  56         void *handle;
  57
  58         switch(op) {
  59         case MDS_FSOP_RMDIR:
  60         case MDS_FSOP_UNLINK:
  61                 nblocks += EXT3_DELETE_TRANS_BLOCKS;
  62                 break;
  63         case MDS_FSOP_RENAME:
  64                 /* We may be modifying two directories */
  65                 nblocks += EXT3_DATA_TRANS_BLOCKS;
  66         case MDS_FSOP_SYMLINK:
  67                 /* Possible new block + block bitmap + GDT for long symlink */
  68                 nblocks += 3;
  69         case MDS_FSOP_CREATE:
  70         case MDS_FSOP_MKDIR:
  71         case MDS_FSOP_MKNOD:
  72                 /* New inode + block bitmap + GDT for new file */
  73                 nblocks += 3;
  74         case MDS_FSOP_LINK:
  75                 /* Change parent directory */
  76                 nblocks += EXT3_DATA_TRANS_BLOCKS;
  77                 break;
  78         case MDS_FSOP_SETATTR:
  79                 /* Setattr on inode */
  80                 nblocks += 1;
  81                 break;
  82         default: CERROR("unknown transaction start op %d\n", op);
  83                  LBUG();
  84         }
  85
  86         lock_kernel();
  87         handle = journal_start(EXT3_JOURNAL(inode), nblocks);
  88         unlock_kernel();
  89
  90         return handle;
  91 }
  92
  93 static int mds_ext3_commit(struct inode *inode, void *handle)
  94 {
  95         int rc;
  96
  97         lock_kernel();
  98         rc = journal_stop((handle_t *)handle);
  99         unlock_kernel();
 100
 101         return rc;
 102 }
 103
 104 static int mds_ext3_setattr(struct dentry *dentry, void *handle,
 105                             struct iattr *iattr)
 106 {
 107         struct inode *inode = dentry->d_inode;
 108         int rc;
 109
 110         lock_kernel();
 111
 112         /* a _really_ horrible hack to avoid removing the data stored
 113            in the block pointers; this data is the object id
 114            this will go into an extended attribute at some point.
 115         */
 116         if (iattr->ia_valid & ATTR_SIZE) {
 117                 /* ATTR_SIZE would invoke truncate: clear it */
 118                 iattr->ia_valid &= ~ATTR_SIZE;
 119                 inode->i_size = iattr->ia_size;
 120
 121                 /* an _even_more_ horrible hack to make this hack work with
 122                  * ext3.  This is because ext3 keeps a separate inode size
 123                  * until the inode is committed to ensure consistency.  This
 124                  * will also go away with the move to EAs.
 125                  */
 126                 EXT3_I(inode)->i_disksize = inode->i_size;
 127
 128                 /* make sure _something_ gets set - so new inode
 129                    goes to disk (probably won't work over XFS */
 130                 if (!iattr->ia_valid & ATTR_MODE) {
 131                         iattr->ia_valid |= ATTR_MODE;
 132                         iattr->ia_mode = inode->i_mode;
 133                 }
 134         }
 135
 136         if (inode->i_op->setattr)
 137                 rc =  inode->i_op->setattr(dentry, iattr);
 138         else
 139                 rc = inode_setattr(inode, iattr);
 140
 141         unlock_kernel();
 142
 143         return rc;
 144 }
 145
 146 /*
 147  * FIXME: nasty hack - store the object id in the first two
 148  *        direct block spots.  This should be done with EAs...
 149  *        Note also that this does not currently mark the inode
 150  *        dirty (it currently is used with other operations that
 151  *        subsequently also mark the inode dirty).
 152  */
 153 static int mds_ext3_set_md(struct inode *inode, void *handle,
 154                            void *obd_md, int len)
 155 {
 156         *((__u64 *)EXT3_I(inode)->i_data) = cpu_to_le64(id);
 157         return 0;
 158 }
 159
 160 static int mds_ext3_get_objid(struct inode *inode, obd_id *id)
 161 {
 162         *id = le64_to_cpu(*((__u64 *)EXT3_I(inode)->i_data));
 163
 164         return 0;
 165 }
 166
 167 static ssize_t mds_ext3_readpage(struct file *file, char *buf, size_t count,
 168                                  loff_t *offset)
 169 {
 170         struct inode *inode = file->f_dentry->d_inode;
 171         int rc = 0;
 172
 173         if (S_ISREG(inode->i_mode))
 174                 rc = file->f_op->read(file, buf, count, offset);
 175         else {
 176                 struct buffer_head *bh;
 177
 178                 /* FIXME: this assumes the blocksize == count, but the calling
 179                  *        function will detect this as an error for now */
 180                 bh = ext3_bread(NULL, inode,
 181                                 *offset >> inode->i_sb->s_blocksize_bits,
 182                                 0, &rc);
 183
 184                 if (bh) {
 185                         memcpy(buf, bh->b_data, inode->i_blksize);
 186                         brelse(bh);
 187                         rc = inode->i_blksize;
 188                 }
 189         }
 190
 191         return rc;
 192 }
 193
 194 static void mds_ext3_delete_inode(struct inode *inode)
 195 {
 196         if (S_ISREG(inode->i_mode)) {
 197                 void *handle = mds_ext3_start(inode, MDS_FSOP_UNLINK);
 198
 199                 if (IS_ERR(handle)) {
 200                         CERROR("unable to start transaction");
 201                         EXIT;
 202                         return;
 203                 }
 204                 if (mds_ext3_set_objid(inode, handle, 0))
 205                         CERROR("error clearing objid on %ld\n", inode->i_ino);
 206
 207                 if (mds_ext3_fs_ops.cl_delete_inode)
 208                         mds_ext3_fs_ops.cl_delete_inode(inode);
 209
 210                 if (mds_ext3_commit(inode, handle))
 211                         CERROR("error closing handle on %ld\n", inode->i_ino);
 212         } else
 213                 mds_ext3_fs_ops.cl_delete_inode(inode);
 214 }
 215
 216 static void mds_ext3_callback_status(struct journal_callback *jcb, int error)
 217 {
 218         struct mds_cb_data *mcb = (struct mds_cb_data *)jcb;
 219
 220         CDEBUG(D_EXT2, "got callback for last_rcvd "LPD64": rc = %d\n",
 221                mcb->cb_last_rcvd, error);
 222         if (!error && mcb->cb_last_rcvd > mcb->cb_mds->mds_last_committed)
 223                 mcb->cb_mds->mds_last_committed = mcb->cb_last_rcvd;
 224
 225         kmem_cache_free(mcb_cache, mcb);
 226         --mcb_cache_count;
 227 }
 228
 229 static int mds_ext3_set_last_rcvd(struct mds_obd *mds, void *handle)
 230 {
 231         struct mds_cb_data *mcb;
 232
 233         mcb = kmem_cache_alloc(mcb_cache, GFP_NOFS);
 234         if (!mcb)
 235                 RETURN(-ENOMEM);
 236
 237         ++mcb_cache_count;
 238         mcb->cb_mds = mds;
 239         mcb->cb_last_rcvd = mds->mds_last_rcvd;
 240
 241 #ifdef HAVE_JOURNAL_CALLBACK_STATUS
 242         CDEBUG(D_EXT2, "set callback for last_rcvd: "LPD64"\n",
 243                mcb->cb_last_rcvd);
 244         lock_kernel();
 245         /* Note that an "incompatible pointer" warning here is OK for now */
 246         journal_callback_set(handle, mds_ext3_callback_status,
 247                              (struct journal_callback *)mcb);
 248         unlock_kernel();
 249 #else
 250 #warning "no journal callback kernel patch, faking it..."
 251         {
 252         static long next = 0;
 253
 254         if (time_after(jiffies, next)) {
 255                 CERROR("no journal callback kernel patch, faking it...\n");
 256                 next = jiffies + 300 * HZ;
 257         }
 258         }
 259         mds_ext3_callback_status((struct journal_callback *)mcb, 0);
 260 #endif
 261
 262         return 0;
 263 }
 264
 265 static int mds_ext3_journal_data(struct file *filp)
 266 {
 267         struct inode *inode = filp->f_dentry->d_inode;
 268
 269         EXT3_I(inode)->i_flags |= EXT3_JOURNAL_DATA_FL;
 270
 271         return 0;
 272 }
 273
 274 static struct mds_fs_operations mds_ext3_fs_ops = {
 275         fs_owner:               THIS_MODULE,
 276         fs_start:               mds_ext3_start,
 277         fs_commit:              mds_ext3_commit,
 278         fs_setattr:             mds_ext3_setattr,
 279         fs_set_objid:           mds_ext3_set_objid,
 280         fs_get_objid:           mds_ext3_get_objid,
 281         fs_readpage:            mds_ext3_readpage,
 282         fs_delete_inode:        mds_ext3_delete_inode,
 283         cl_delete_inode:        clear_inode,
 284         fs_journal_data:        mds_ext3_journal_data,
 285         fs_set_last_rcvd:       mds_ext3_set_last_rcvd,
 286 };
 287
 288 static int __init mds_ext3_init(void)
 289 {
 290         int rc;
 291
 292         mcb_cache = kmem_cache_create("mds_ext3_mcb",
 293                                       sizeof(struct mds_cb_data), 0,
 294                                       0, NULL, NULL);
 295         if (!mcb_cache) {
 296                 CERROR("error allocating MDS journal callback cache\n");
 297                 GOTO(out, rc = -ENOMEM);
 298         }
 299
 300         rc = mds_register_fs_type(&mds_ext3_fs_ops, "ext3");
 301
 302         if (rc)
 303                 kmem_cache_destroy(mcb_cache);
 304 out:
 305         return rc;
 306 }
 307
 308 static void __exit mds_ext3_exit(void)
 309 {
 310         int rc;
 311
 312         mds_unregister_fs_type("ext3");
 313         rc = kmem_cache_destroy(mcb_cache);
 314
 315         if (rc || mcb_cache_count) {
 316                 CERROR("can't free MDS callback cache: count %d, rc = %d\n",
 317                        mcb_cache_count, rc);
 318         }
 319 }
 320
 321 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
 322 MODULE_DESCRIPTION("Lustre MDS ext3 Filesystem Helper v0.1");
 323 MODULE_LICENSE("GPL");
 324
 325 module_init(mds_ext3_init);
 326 module_exit(mds_ext3_exit);