lustre/mds/mds_ext3.c

   1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
   2  * vim:expandtab:shiftwidth=8:tabstop=8:
   3  *
   4  *  linux/mds/mds_ext3.c
   5  *
   6  *  Lustre Metadata Server (mds) journal abstraction routines
   7  *
   8  *  Copyright (C) 2002  Cluster File Systems, Inc.
   9  *  author: Andreas Dilger <adilger@clusterfs.com>
  10  *
  11  *  This code is issued under the GNU General Public License.
  12  *  See the file COPYING in this distribution
  13  *
  14  */
  15
  16 #define DEBUG_SUBSYSTEM S_MDS
  17
  18 #include <linux/fs.h>
  19 #include <linux/jbd.h>
  20 #include <linux/ext3_fs.h>
  21 #include <linux/ext3_jbd.h>
  22 #include <linux/lustre_mds.h>
  23 #include <linux/module.h>
  24
  25 static struct mds_fs_operations mds_ext3_fs_ops;
  26 static kmem_cache_t *jcb_cache;
  27 static int jcb_cache_count;
  28
  29 struct mds_cb_data {
  30         struct journal_callback cb_jcb;
  31         struct mds_obd *cb_mds;
  32         __u64 cb_last_rcvd;
  33 };
  34
  35 /*
  36  * We don't currently need any additional blocks for rmdir and
  37  * unlink transactions because we are storing the OST oa_id inside
  38  * the inode (which we will be changing anyways as part of this
  39  * transaction).
  40  */
  41 static void *mds_ext3_start(struct inode *inode, int op)
  42 {
  43         /* For updates to the last recieved file */
  44         int nblocks = EXT3_DATA_TRANS_BLOCKS;
  45
  46         switch(op) {
  47         case MDS_FSOP_RMDIR:
  48         case MDS_FSOP_UNLINK:
  49                 nblocks += EXT3_DELETE_TRANS_BLOCKS;
  50                 break;
  51         case MDS_FSOP_RENAME:
  52                 /* We may be modifying two directories */
  53                 nblocks += EXT3_DATA_TRANS_BLOCKS;
  54         case MDS_FSOP_SYMLINK:
  55                 /* Possible new block + block bitmap + GDT for long symlink */
  56                 nblocks += 3;
  57         case MDS_FSOP_CREATE:
  58         case MDS_FSOP_MKDIR:
  59         case MDS_FSOP_MKNOD:
  60                 /* New inode + block bitmap + GDT for new file */
  61                 nblocks += 3;
  62         case MDS_FSOP_LINK:
  63                 /* Change parent directory */
  64                 nblocks += EXT3_DATA_TRANS_BLOCKS;
  65                 break;
  66         case MDS_FSOP_SETATTR:
  67                 /* Setattr on inode */
  68                 nblocks += 1;
  69                 break;
  70         default: CERROR("unknown transaction start op %d\n", op);
  71                  LBUG();
  72         }
  73
  74         return journal_start(EXT3_JOURNAL(inode), nblocks);
  75 }
  76
  77 static int mds_ext3_commit(struct inode *inode, void *handle)
  78 {
  79         return journal_stop((handle_t *)handle);
  80 }
  81
  82 static int mds_ext3_setattr(struct dentry *dentry, void *handle,
  83                             struct iattr *iattr)
  84 {
  85         struct inode *inode = dentry->d_inode;
  86
  87         /* a _really_ horrible hack to avoid removing the data stored
  88            in the block pointers; this data is the object id
  89            this will go into an extended attribute at some point.
  90         */
  91         if (iattr->ia_valid & ATTR_SIZE) {
  92                 /* ATTR_SIZE would invoke truncate: clear it */
  93                 iattr->ia_valid &= ~ATTR_SIZE;
  94                 inode->i_size = iattr->ia_size;
  95
  96                 /* an _even_more_ horrible hack to make this hack work with
  97                  * ext3.  This is because ext3 keeps a separate inode size
  98                  * until the inode is committed to ensure consistency.  This
  99                  * will also go away with the move to EAs.
 100                  */
 101                 EXT3_I(inode)->i_disksize = inode->i_size;
 102
 103                 /* make sure _something_ gets set - so new inode
 104                    goes to disk (probably won't work over XFS */
 105                 if (!iattr->ia_valid & ATTR_MODE) {
 106                         iattr->ia_valid |= ATTR_MODE;
 107                         iattr->ia_mode = inode->i_mode;
 108                 }
 109         }
 110
 111         if (inode->i_op->setattr)
 112                 return inode->i_op->setattr(dentry, iattr);
 113         else
 114                 return inode_setattr(inode, iattr);
 115 }
 116
 117 /*
 118  * FIXME: nasty hack - store the object id in the first two
 119  *        direct block spots.  This should be done with EAs...
 120  *        Note also that this does not currently mark the inode
 121  *        dirty (it currently is used with other operations that
 122  *        subsequently also mark the inode dirty).
 123  */
 124 static int mds_ext3_set_objid(struct inode *inode, void *handle, obd_id id)
 125 {
 126         *((__u64 *)EXT3_I(inode)->i_data) = cpu_to_le64(id);
 127         return 0;
 128 }
 129
 130 static int mds_ext3_get_objid(struct inode *inode, obd_id *id)
 131 {
 132         *id = le64_to_cpu(*((__u64 *)EXT3_I(inode)->i_data));
 133
 134         return 0;
 135 }
 136
 137 static ssize_t mds_ext3_readpage(struct file *file, char *buf, size_t count,
 138                                  loff_t *offset)
 139 {
 140         struct inode *inode = file->f_dentry->d_inode;
 141         int rc = 0;
 142
 143         if (S_ISREG(inode->i_mode))
 144                 rc = file->f_op->read(file, buf, count, offset);
 145         else {
 146                 struct buffer_head *bh;
 147
 148                 /* FIXME: this assumes the blocksize == count, but the calling
 149                  *        function will detect this as an error for now */
 150                 bh = ext3_bread(NULL, inode,
 151                                 *offset >> inode->i_sb->s_blocksize_bits,
 152                                 0, &rc);
 153
 154                 if (bh) {
 155                         memcpy(buf, bh->b_data, inode->i_blksize);
 156                         brelse(bh);
 157                         rc = inode->i_blksize;
 158                 }
 159         }
 160
 161         return rc;
 162 }
 163
 164 static void mds_ext3_delete_inode(struct inode *inode)
 165 {
 166         if (S_ISREG(inode->i_mode)) {
 167                 void *handle = mds_ext3_start(inode, MDS_FSOP_UNLINK);
 168
 169                 if (IS_ERR(handle)) {
 170                         CERROR("unable to start transaction");
 171                         EXIT;
 172                         return;
 173                 }
 174                 if (mds_ext3_set_objid(inode, handle, 0))
 175                         CERROR("error clearing objid on %ld\n", inode->i_ino);
 176
 177                 if (mds_ext3_fs_ops.cl_delete_inode)
 178                         mds_ext3_fs_ops.cl_delete_inode(inode);
 179
 180                 if (mds_ext3_commit(inode, handle))
 181                         CERROR("error closing handle on %ld\n", inode->i_ino);
 182         } else
 183                 mds_ext3_fs_ops.cl_delete_inode(inode);
 184 }
 185
 186 static void mds_ext3_callback_status(void *jcb, int error)
 187 {
 188         struct mds_cb_data *mcb = (struct mds_cb_data *)jcb;
 189
 190         CDEBUG(D_EXT2, "got callback for last_rcvd %Ld: rc = %d\n",
 191                mcb->cb_last_rcvd, error);
 192         if (!error && mcb->cb_last_rcvd > mcb->cb_mds->mds_last_committed)
 193                 mcb->cb_mds->mds_last_committed = mcb->cb_last_rcvd;
 194
 195         kmem_cache_free(jcb_cache, mcb);
 196         --jcb_cache_count;
 197 }
 198
 199 #ifdef HAVE_JOURNAL_CALLBACK
 200 static void mds_ext3_callback_func(void *cb_data)
 201 {
 202         mds_ext3_callback_status(cb_data, 0);
 203 }
 204 #endif
 205
 206 static int mds_ext3_set_last_rcvd(struct mds_obd *mds, void *handle)
 207 {
 208         struct mds_cb_data *mcb;
 209
 210         mcb = kmem_cache_alloc(jcb_cache, GFP_NOFS);
 211         if (!mcb)
 212                 RETURN(-ENOMEM);
 213
 214         ++jcb_cache_count;
 215         mcb->cb_mds = mds;
 216         mcb->cb_last_rcvd = mds->mds_last_rcvd;
 217
 218 #ifdef HAVE_JOURNAL_CALLBACK_STATUS
 219         CDEBUG(D_EXT2, "set callback for last_rcvd: %Ld\n",
 220                (unsigned long long)mcb->cb_last_rcvd);
 221         journal_callback_set(handle, mds_ext3_callback_status,
 222                              (void *)mcb);
 223 #elif defined(HAVE_JOURNAL_CALLBACK)
 224         /* XXX original patch version - remove soon */
 225 #warning "using old journal callback kernel patch, please update"
 226         CDEBUG(D_EXT2, "set callback for last_rcvd: %Ld\n",
 227                (unsigned long long)mcb->cb_last_rcvd);
 228         journal_callback_set(handle, mds_ext3_callback_func, mcb);
 229 #else
 230 #warning "no journal callback kernel patch, faking it..."
 231         {
 232         static long next = 0;
 233
 234         if (time_after(jiffies, next)) {
 235                 CERROR("no journal callback kernel patch, faking it...\n");
 236                 next = jiffies + 300 * HZ;
 237         }
 238         }
 239         mds_ext3_callback_status((struct journal_callback *)mcb, 0);
 240 #endif
 241
 242         return 0;
 243 }
 244
 245 static int mds_ext3_journal_data(struct file *filp)
 246 {
 247         struct inode *inode = filp->f_dentry->d_inode;
 248
 249         EXT3_I(inode)->i_flags |= EXT3_JOURNAL_DATA_FL;
 250
 251         return 0;
 252 }
 253
 254 static struct mds_fs_operations mds_ext3_fs_ops = {
 255         fs_start:       mds_ext3_start,
 256         fs_commit:      mds_ext3_commit,
 257         fs_setattr:     mds_ext3_setattr,
 258         fs_set_objid:   mds_ext3_set_objid,
 259         fs_get_objid:   mds_ext3_get_objid,
 260         fs_readpage:    mds_ext3_readpage,
 261         fs_delete_inode:mds_ext3_delete_inode,
 262         cl_delete_inode:clear_inode,
 263         fs_journal_data:mds_ext3_journal_data,
 264         fs_set_last_rcvd:mds_ext3_set_last_rcvd,
 265 };
 266
 267 static int __init mds_ext3_init(void)
 268 {
 269         int rc;
 270
 271         jcb_cache = kmem_cache_create("mds_ext3_jcb",
 272                                       sizeof(struct mds_cb_data), 0,
 273                                       0, NULL, NULL);
 274         if (!jcb_cache) {
 275                 CERROR("error allocating MDS journal callback cache\n");
 276                 GOTO(out, rc = -ENOMEM);
 277         }
 278
 279         rc = mds_register_fs_type(&mds_ext3_fs_ops, "ext3");
 280
 281         if (rc)
 282                 kmem_cache_destroy(jcb_cache);
 283 out:
 284         return rc;
 285 }
 286
 287 static void __exit mds_ext3_exit(void)
 288 {
 289         int rc = 0;
 290
 291         mds_unregister_fs_type("ext3");
 292         rc = kmem_cache_destroy(jcb_cache);
 293
 294         if (rc || jcb_cache_count) {
 295                 CERROR("can't free MDS callback cache: count %d, rc = %d\n",
 296                        jcb_cache_count, rc);
 297         }
 298 }
 299
 300 MODULE_AUTHOR("Cluster File Systems, Inc. <adilger@clusterfs.com>");
 301 MODULE_DESCRIPTION("Lustre MDS ext3 Filesystem Helper v0.1");
 302 MODULE_LICENSE("GPL");
 303
 304 module_init(mds_ext3_init);
 305 module_exit(mds_ext3_exit);