lustre/mds/mds_ext3.c

   1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
   2  * vim:expandtab:shiftwidth=8:tabstop=8:
   3  *
   4  *  linux/mds/mds_ext3.c
   5  *
   6  *  Lustre Metadata Server (mds) journal abstraction routines
   7  *
   8  *  Copyright (C) 2002  Cluster File Systems, Inc.
   9  *  author: Andreas Dilger <adilger@clusterfs.com>
  10  *
  11  *  This code is issued under the GNU General Public License.
  12  *  See the file COPYING in this distribution
  13  *
  14  */
  15
  16 #define DEBUG_SUBSYSTEM S_MDS
  17
  18 #include <linux/fs.h>
  19 #include <linux/jbd.h>
  20 #include <linux/ext3_fs.h>
  21 #include <linux/ext3_jbd.h>
  22 #include <linux/lustre_mds.h>
  23 #include <linux/module.h>
  24
  25 static struct mds_fs_operations mds_ext3_fs_ops;
  26 static kmem_cache_t *jcb_cache;
  27 static int jcb_cache_count;
  28
  29 struct mds_cb_data {
  30         struct journal_callback cb_jcb;
  31         struct mds_obd *cb_mds;
  32         __u64 cb_last_rcvd;
  33 };
  34
  35 /*
  36  * We don't currently need any additional blocks for rmdir and
  37  * unlink transactions because we are storing the OST oa_id inside
  38  * the inode (which we will be changing anyways as part of this
  39  * transaction).
  40  */
  41 static void *mds_ext3_start(struct inode *inode, int op)
  42 {
  43         /* For updates to the last recieved file */
  44         int nblocks = EXT3_DATA_TRANS_BLOCKS;
  45
  46         switch(op) {
  47         case MDS_FSOP_RMDIR:
  48         case MDS_FSOP_UNLINK:
  49                 nblocks += EXT3_DELETE_TRANS_BLOCKS;
  50                 break;
  51         case MDS_FSOP_RENAME:
  52                 /* We may be modifying two directories */
  53                 nblocks += EXT3_DATA_TRANS_BLOCKS;
  54         case MDS_FSOP_SYMLINK:
  55                 /* Possible new block + block bitmap + GDT for long symlink */
  56                 nblocks += 3;
  57         case MDS_FSOP_CREATE:
  58         case MDS_FSOP_MKDIR:
  59         case MDS_FSOP_MKNOD:
  60                 /* New inode + block bitmap + GDT for new file */
  61                 nblocks += 3;
  62         case MDS_FSOP_LINK:
  63                 /* Change parent directory */
  64                 nblocks += EXT3_DATA_TRANS_BLOCKS;
  65                 break;
  66         case MDS_FSOP_SETATTR:
  67                 /* Setattr on inode */
  68                 nblocks += 1;
  69                 break;
  70         default: CERROR("unknown transaction start op %d\n", op);
  71                  LBUG();
  72         }
  73
  74         return journal_start(EXT3_JOURNAL(inode), nblocks);
  75 }
  76
  77 static int mds_ext3_commit(struct inode *inode, void *handle)
  78 {
  79         return journal_stop((handle_t *)handle);
  80 }
  81
  82 static int mds_ext3_setattr(struct dentry *dentry, void *handle,
  83                             struct iattr *iattr)
  84 {
  85         struct inode *inode = dentry->d_inode;
  86
  87         /* a _really_ horrible hack to avoid removing the data stored
  88            in the block pointers; this data is the object id
  89            this will go into an extended attribute at some point.
  90         */
  91         if (iattr->ia_valid & ATTR_SIZE) {
  92                 /* ATTR_SIZE would invoke truncate: clear it */
  93                 iattr->ia_valid &= ~ATTR_SIZE;
  94                 inode->i_size = iattr->ia_size;
  95
  96                 /* an _even_more_ horrible hack to make this hack work with
  97                  * ext3.  This is because ext3 keeps a separate inode size
  98                  * until the inode is committed to ensure consistency.  This
  99                  * will also go away with the move to EAs.
 100                  */
 101                 EXT3_I(inode)->i_disksize = inode->i_size;
 102
 103                 /* make sure _something_ gets set - so new inode
 104                    goes to disk (probably won't work over XFS */
 105                 if (!iattr->ia_valid & ATTR_MODE) {
 106                         iattr->ia_valid |= ATTR_MODE;
 107                         iattr->ia_mode = inode->i_mode;
 108                 }
 109         }
 110
 111         if (inode->i_op->setattr)
 112                 return inode->i_op->setattr(dentry, iattr);
 113         else
 114                 return inode_setattr(inode, iattr);
 115 }
 116
 117 /*
 118  * FIXME: nasty hack - store the object id in the first two
 119  *        direct block spots.  This should be done with EAs...
 120  *        Note also that this does not currently mark the inode
 121  *        dirty (it currently is used with other operations that
 122  *        subsequently also mark the inode dirty).
 123  */
 124 static int mds_ext3_set_objid(struct inode *inode, void *handle, obd_id id)
 125 {
 126         *((__u64 *)EXT3_I(inode)->i_data) = cpu_to_le64(id);
 127         return 0;
 128 }
 129
 130 static int mds_ext3_get_objid(struct inode *inode, obd_id *id)
 131 {
 132         *id = le64_to_cpu(*((__u64 *)EXT3_I(inode)->i_data));
 133
 134         return 0;
 135 }
 136
 137 static ssize_t mds_ext3_readpage(struct file *file, char *buf, size_t count,
 138                                  loff_t *offset)
 139 {
 140         struct inode *inode = file->f_dentry->d_inode;
 141         int rc = 0;
 142
 143         if (S_ISREG(inode->i_mode))
 144                 rc = file->f_op->read(file, buf, count, offset);
 145         else {
 146                 struct buffer_head *bh;
 147
 148                 /* FIXME: this assumes the blocksize == count, but the calling
 149                  *        function will detect this as an error for now */
 150                 bh = ext3_bread(NULL, inode,
 151                                 *offset >> inode->i_sb->s_blocksize_bits,
 152                                 0, &rc);
 153
 154                 if (bh) {
 155                         memcpy(buf, bh->b_data, inode->i_blksize);
 156                         brelse(bh);
 157                         rc = inode->i_blksize;
 158                 }
 159         }
 160
 161         return rc;
 162 }
 163
 164 static void mds_ext3_delete_inode(struct inode *inode)
 165 {
 166         if (S_ISREG(inode->i_mode)) {
 167                 void *handle = mds_ext3_start(inode, MDS_FSOP_UNLINK);
 168
 169                 if (IS_ERR(handle)) {
 170                         CERROR("unable to start transaction");
 171                         EXIT;
 172                         return;
 173                 }
 174                 if (mds_ext3_set_objid(inode, handle, 0))
 175                         CERROR("error clearing objid on %ld\n", inode->i_ino);
 176
 177                 if (mds_ext3_fs_ops.cl_delete_inode)
 178                         mds_ext3_fs_ops.cl_delete_inode(inode);
 179
 180                 if (mds_ext3_commit(inode, handle))
 181                         CERROR("error closing handle on %ld\n", inode->i_ino);
 182         } else
 183                 mds_ext3_fs_ops.cl_delete_inode(inode);
 184 }
 185
 186
 187 static void mds_ext3_callback_status(void *jcb, int error)
 188 {
 189         struct mds_cb_data *mcb = (struct mds_cb_data *)jcb;
 190
 191         CDEBUG(D_EXT2, "got callback for last_rcvd %Ld: rc = %d\n",
 192                mcb->cb_last_rcvd, error);
 193         if (!error && mcb->cb_last_rcvd > mcb->cb_mds->mds_last_committed)
 194                 mcb->cb_mds->mds_last_committed = mcb->cb_last_rcvd;
 195
 196         kmem_cache_free(jcb_cache, mcb);
 197         --jcb_cache_count;
 198 }
 199
 200 #ifdef HAVE_JOURNAL_CALLBACK
 201 static void mds_ext3_callback_func(void *cb_data)
 202 {
 203         mds_ext3_callback_status(cb_data, 0);
 204 }
 205 #endif
 206
 207 static int mds_ext3_set_last_rcvd(struct mds_obd *mds, void *handle)
 208 {
 209         struct mds_cb_data *mcb;
 210
 211         mcb = kmem_cache_alloc(jcb_cache, GFP_NOFS);
 212         if (!mcb)
 213                 RETURN(-ENOMEM);
 214
 215         ++jcb_cache_count;
 216         mcb->cb_mds = mds;
 217         mcb->cb_last_rcvd = mds->mds_last_rcvd;
 218
 219 #ifdef HAVE_JOURNAL_CALLBACK_STATUS
 220         CDEBUG(D_EXT2, "set callback for last_rcvd: %Ld\n",
 221                (unsigned long long)mcb->cb_last_rcvd);
 222         journal_callback_set(handle, mds_ext3_callback_status,
 223                              (void *)mcb);
 224 #elif defined(HAVE_JOURNAL_CALLBACK)
 225         /* XXX original patch version - remove soon */
 226 #warning "using old journal callback kernel patch, please update"
 227         CDEBUG(D_EXT2, "set callback for last_rcvd: %Ld\n",
 228                (unsigned long long)mcb->cb_last_rcvd);
 229         journal_callback_set(handle, mds_ext3_callback_func, mcb);
 230 #else
 231 #warning "no journal callback kernel patch, faking it..."
 232         {
 233         static long next = 0;
 234
 235         if (time_after(jiffies, next)) {
 236                 CERROR("no journal callback kernel patch, faking it...\n");
 237                 next = jiffies + 300 * HZ;
 238         }
 239         }
 240         mds_ext3_callback_status((struct journal_callback *)mcb, 0);
 241 #endif
 242
 243         return 0;
 244 }
 245
 246 static int mds_ext3_journal_data(struct file *filp)
 247 {
 248         struct inode *inode = filp->f_dentry->d_inode;
 249
 250         EXT3_I(inode)->i_flags |= EXT3_JOURNAL_DATA_FL;
 251
 252         return 0;
 253 }
 254
 255 static struct mds_fs_operations mds_ext3_fs_ops = {
 256         fs_owner:               THIS_MODULE,
 257         fs_start:               mds_ext3_start,
 258         fs_commit:              mds_ext3_commit,
 259         fs_setattr:             mds_ext3_setattr,
 260         fs_set_objid:           mds_ext3_set_objid,
 261         fs_get_objid:           mds_ext3_get_objid,
 262         fs_readpage:            mds_ext3_readpage,
 263         fs_delete_inode:        mds_ext3_delete_inode,
 264         cl_delete_inode:        clear_inode,
 265         fs_journal_data:        mds_ext3_journal_data,
 266         fs_set_last_rcvd:       mds_ext3_set_last_rcvd,
 267 };
 268
 269 static int __init mds_ext3_init(void)
 270 {
 271         int rc;
 272
 273         jcb_cache = kmem_cache_create("mds_ext3_jcb",
 274                                       sizeof(struct mds_cb_data), 0,
 275                                       0, NULL, NULL);
 276         if (!jcb_cache) {
 277                 CERROR("error allocating MDS journal callback cache\n");
 278                 GOTO(out, rc = -ENOMEM);
 279         }
 280
 281         rc = mds_register_fs_type(&mds_ext3_fs_ops, "ext3");
 282
 283         if (rc)
 284                 kmem_cache_destroy(jcb_cache);
 285 out:
 286         return rc;
 287 }
 288
 289 static void __exit mds_ext3_exit(void)
 290 {
 291         int rc = 0;
 292
 293         mds_unregister_fs_type("ext3");
 294         rc = kmem_cache_destroy(jcb_cache);
 295
 296         if (rc || jcb_cache_count) {
 297                 CERROR("can't free MDS callback cache: count %d, rc = %d\n",
 298                        jcb_cache_count, rc);
 299         }
 300 }
 301
 302 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
 303 MODULE_DESCRIPTION("Lustre MDS ext3 Filesystem Helper v0.1");
 304 MODULE_LICENSE("GPL");
 305
 306 module_init(mds_ext3_init);
 307 module_exit(mds_ext3_exit);