lustre/mds/mds_extN.c

   1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
   2  * vim:expandtab:shiftwidth=8:tabstop=8:
   3  *
   4  *  lustre/mds/mds_extN.c
   5  *  Lustre Metadata Server (mds) journal abstraction routines
   6  *
   7  *  Copyright (c) 2002 Cluster File Systems, Inc.
   8  *   Author: Andreas Dilger <adilger@clusterfs.com>
   9  *
  10  *   This file is part of Lustre, http://www.lustre.org.
  11  *
  12  *   Lustre is free software; you can redistribute it and/or
  13  *   modify it under the terms of version 2 of the GNU General Public
  14  *   License as published by the Free Software Foundation.
  15  *
  16  *   Lustre is distributed in the hope that it will be useful,
  17  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19  *   GNU General Public License for more details.
  20  *
  21  *   You should have received a copy of the GNU General Public License
  22  *   along with Lustre; if not, write to the Free Software
  23  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  24  */
  25
  26 #define DEBUG_SUBSYSTEM S_MDS
  27
  28 #include <linux/fs.h>
  29 #include <linux/jbd.h>
  30 #include <linux/extN_fs.h>
  31 #include <linux/extN_jbd.h>
  32 #include <linux/extN_xattr.h>
  33 #include <linux/lustre_mds.h>
  34 #include <linux/module.h>
  35 #include <linux/obd_lov.h>
  36
  37 static struct mds_fs_operations mds_extN_fs_ops;
  38 static kmem_cache_t *jcb_cache;
  39 static int jcb_cache_count;
  40
  41 struct mds_cb_data {
  42         struct journal_callback cb_jcb;
  43         struct mds_obd *cb_mds;
  44         __u64 cb_last_rcvd;
  45 };
  46
  47 #define EXTN_XATTR_INDEX_LUSTRE         5
  48 #define XATTR_LUSTRE_MDS_OBJID          "system.lustre_mds_objid"
  49
  50 #define XATTR_MDS_MO_MAGIC              0xEA0BD047
  51
  52 /*
  53  * We don't currently need any additional blocks for rmdir and
  54  * unlink transactions because we are storing the OST oa_id inside
  55  * the inode (which we will be changing anyways as part of this
  56  * transaction).
  57  */
  58 static void *mds_extN_start(struct inode *inode, int op)
  59 {
  60         /* For updates to the last recieved file */
  61         int nblocks = EXTN_DATA_TRANS_BLOCKS;
  62
  63         switch(op) {
  64         case MDS_FSOP_RMDIR:
  65         case MDS_FSOP_UNLINK:
  66                 nblocks += EXTN_DELETE_TRANS_BLOCKS;
  67                 break;
  68         case MDS_FSOP_RENAME:
  69                 /* We may be modifying two directories */
  70                 nblocks += EXTN_DATA_TRANS_BLOCKS;
  71         case MDS_FSOP_SYMLINK:
  72                 /* Possible new block + block bitmap + GDT for long symlink */
  73                 nblocks += 3;
  74         case MDS_FSOP_CREATE:
  75         case MDS_FSOP_MKDIR:
  76         case MDS_FSOP_MKNOD:
  77                 /* New inode + block bitmap + GDT for new file */
  78                 nblocks += 3;
  79         case MDS_FSOP_LINK:
  80                 /* Change parent directory */
  81                 nblocks += EXTN_INDEX_EXTRA_TRANS_BLOCKS+EXTN_DATA_TRANS_BLOCKS;
  82                 break;
  83         case MDS_FSOP_SETATTR:
  84                 /* Setattr on inode */
  85                 nblocks += 1;
  86                 break;
  87         default: CERROR("unknown transaction start op %d\n", op);
  88                  LBUG();
  89         }
  90
  91         return journal_start(EXTN_JOURNAL(inode), nblocks);
  92 }
  93
  94 static int mds_extN_commit(struct inode *inode, void *handle)
  95 {
  96         return journal_stop((handle_t *)handle);
  97 }
  98
  99 /* Assumes BKL is held */
 100 static int mds_extN_setattr(struct dentry *dentry, void *handle,
 101                             struct iattr *iattr)
 102 {
 103         struct inode *inode = dentry->d_inode;
 104
 105         if (inode->i_op->setattr)
 106                 return inode->i_op->setattr(dentry, iattr);
 107         else
 108                 return inode_setattr(inode, iattr);
 109 }
 110
 111 static int mds_extN_set_md(struct inode *inode, void *handle,
 112                            struct lov_mds_md *md)
 113 {
 114         int rc;
 115
 116         down(&inode->i_sem);
 117         lock_kernel();
 118         if (md == NULL)
 119                 rc = extN_xattr_set(handle, inode, EXTN_XATTR_INDEX_LUSTRE,
 120                                     XATTR_LUSTRE_MDS_OBJID, NULL, 0, 0);
 121         else {
 122                 md->lmd_magic = cpu_to_le32(XATTR_MDS_MO_MAGIC);
 123                 rc = extN_xattr_set(handle, inode, EXTN_XATTR_INDEX_LUSTRE,
 124                                     XATTR_LUSTRE_MDS_OBJID, md,
 125                                     md->lmd_easize, XATTR_CREATE);
 126         }
 127         unlock_kernel();
 128         up(&inode->i_sem);
 129
 130         if (rc) {
 131                 CERROR("error adding objectid %Ld to inode %ld: %d\n",
 132                        (unsigned long long)md->lmd_object_id, inode->i_ino, rc);
 133                 LBUG();
 134         }
 135         return rc;
 136 }
 137
 138 static int mds_extN_get_md(struct inode *inode, struct lov_mds_md *md)
 139 {
 140         int rc;
 141         int size = md->lmd_easize;
 142
 143         down(&inode->i_sem);
 144         lock_kernel();
 145         rc = extN_xattr_get(inode, EXTN_XATTR_INDEX_LUSTRE,
 146                             XATTR_LUSTRE_MDS_OBJID, md, size);
 147         unlock_kernel();
 148         up(&inode->i_sem);
 149
 150         if (rc < 0) {
 151                 CDEBUG(D_INFO, "error getting EA %s from MDS inode %ld: "
 152                        "rc = %d\n", XATTR_LUSTRE_MDS_OBJID, inode->i_ino, rc);
 153                 memset(md, 0, size);
 154                 return rc;
 155         } else if (md == NULL)
 156                 return rc;
 157
 158         if (md->lmd_magic != cpu_to_le32(XATTR_MDS_MO_MAGIC)) {
 159                 CERROR("MDS striping md for ino %ld has bad magic\n",
 160                        inode->i_ino);
 161                 rc = -EINVAL;
 162         } else {
 163                 /* This field is byteswapped because it appears in the
 164                  * catalogue.  All others are opaque to the MDS */
 165                 md->lmd_object_id = le64_to_cpu(md->lmd_object_id);
 166         }
 167
 168         return rc;
 169 }
 170
 171 static ssize_t mds_extN_readpage(struct file *file, char *buf, size_t count,
 172                                  loff_t *offset)
 173 {
 174         struct inode *inode = file->f_dentry->d_inode;
 175         int rc = 0;
 176
 177         if (S_ISREG(inode->i_mode))
 178                 rc = file->f_op->read(file, buf, count, offset);
 179         else {
 180                 struct buffer_head *bh;
 181
 182                 /* FIXME: this assumes the blocksize == count, but the calling
 183                  *        function will detect this as an error for now */
 184                 bh = extN_bread(NULL, inode,
 185                                 *offset >> inode->i_sb->s_blocksize_bits,
 186                                 0, &rc);
 187
 188                 if (bh) {
 189                         memcpy(buf, bh->b_data, inode->i_blksize);
 190                         brelse(bh);
 191                         rc = inode->i_blksize;
 192                 }
 193         }
 194
 195         return rc;
 196 }
 197
 198 static void mds_extN_delete_inode(struct inode *inode)
 199 {
 200         if (S_ISREG(inode->i_mode)) {
 201                 void *handle = mds_extN_start(inode, MDS_FSOP_UNLINK);
 202
 203                 if (IS_ERR(handle)) {
 204                         CERROR("unable to start transaction");
 205                         EXIT;
 206                         return;
 207                 }
 208                 if (mds_extN_set_md(inode, handle, NULL))
 209                         CERROR("error clearing obdo on %ld\n", inode->i_ino);
 210
 211                 if (mds_extN_fs_ops.cl_delete_inode)
 212                         mds_extN_fs_ops.cl_delete_inode(inode);
 213
 214                 if (mds_extN_commit(inode, handle))
 215                         CERROR("error closing handle on %ld\n", inode->i_ino);
 216         } else
 217                 mds_extN_fs_ops.cl_delete_inode(inode);
 218 }
 219
 220 static void mds_extN_callback_status(void *jcb, int error)
 221 {
 222         struct mds_cb_data *mcb = (struct mds_cb_data *)jcb;
 223
 224         CDEBUG(D_EXT2, "got callback for last_rcvd %Ld: rc = %d\n",
 225                mcb->cb_last_rcvd, error);
 226         if (!error && mcb->cb_last_rcvd > mcb->cb_mds->mds_last_committed)
 227                 mcb->cb_mds->mds_last_committed = mcb->cb_last_rcvd;
 228
 229         kmem_cache_free(jcb_cache, jcb);
 230         --jcb_cache_count;
 231 }
 232
 233 static int mds_extN_set_last_rcvd(struct mds_obd *mds, void *handle)
 234 {
 235         struct mds_cb_data *mcb;
 236
 237         mcb = kmem_cache_alloc(jcb_cache, GFP_NOFS);
 238         if (!mcb)
 239                 RETURN(-ENOMEM);
 240
 241         ++jcb_cache_count;
 242         mcb->cb_mds = mds;
 243         mcb->cb_last_rcvd = mds->mds_last_rcvd;
 244
 245 #ifdef HAVE_JOURNAL_CALLBACK_STATUS
 246         CDEBUG(D_EXT2, "set callback for last_rcvd: %Ld\n",
 247                (unsigned long long)mcb->cb_last_rcvd);
 248         journal_callback_set(handle, mds_extN_callback_status,
 249                              (void *)mcb);
 250 #else
 251 #warning "no journal callback kernel patch, faking it..."
 252         {
 253         static long next = 0;
 254
 255         if (time_after(jiffies, next)) {
 256                 CERROR("no journal callback kernel patch, faking it...\n");
 257                 next = jiffies + 300 * HZ;
 258         }
 259         }
 260         mds_extN_callback_status((struct journal_callback *)mcb, 0);
 261 #endif
 262
 263         return 0;
 264 }
 265
 266 static int mds_extN_journal_data(struct file *filp)
 267 {
 268         struct inode *inode = filp->f_dentry->d_inode;
 269
 270         EXTN_I(inode)->i_flags |= EXTN_JOURNAL_DATA_FL;
 271
 272         return 0;
 273 }
 274
 275 /*
 276  * We need to hack the return value for the free inode counts because
 277  * the current EA code requires one filesystem block per inode with EAs,
 278  * so it is possible to run out of blocks before we run out of inodes.
 279  *
 280  * This can be removed when the extN EA code is fixed.
 281  */
 282 static int mds_extN_statfs(struct super_block *sb, struct statfs *sfs)
 283 {
 284         int rc = vfs_statfs(sb, sfs);
 285
 286         if (!rc && sfs->f_bfree < sfs->f_ffree)
 287                 sfs->f_ffree = sfs->f_bfree;
 288
 289         return rc;
 290 }
 291
 292 static struct mds_fs_operations mds_extN_fs_ops = {
 293         fs_owner:               THIS_MODULE,
 294         fs_start:               mds_extN_start,
 295         fs_commit:              mds_extN_commit,
 296         fs_setattr:             mds_extN_setattr,
 297         fs_set_md:              mds_extN_set_md,
 298         fs_get_md:              mds_extN_get_md,
 299         fs_readpage:            mds_extN_readpage,
 300         fs_delete_inode:        mds_extN_delete_inode,
 301         cl_delete_inode:        clear_inode,
 302         fs_journal_data:        mds_extN_journal_data,
 303         fs_set_last_rcvd:       mds_extN_set_last_rcvd,
 304         fs_statfs:              mds_extN_statfs,
 305 };
 306
 307 static int __init mds_extN_init(void)
 308 {
 309         int rc;
 310
 311         //rc = extN_xattr_register();
 312         jcb_cache = kmem_cache_create("mds_extN_jcb",
 313                                       sizeof(struct mds_cb_data), 0,
 314                                       0, NULL, NULL);
 315         if (!jcb_cache) {
 316                 CERROR("error allocating MDS journal callback cache\n");
 317                 GOTO(out, rc = -ENOMEM);
 318         }
 319         rc = mds_register_fs_type(&mds_extN_fs_ops, "extN");
 320
 321         if (rc)
 322                 kmem_cache_destroy(jcb_cache);
 323 out:
 324         return rc;
 325 }
 326
 327 static void __exit mds_extN_exit(void)
 328 {
 329         int rc;
 330
 331         mds_unregister_fs_type("extN");
 332         rc = kmem_cache_destroy(jcb_cache);
 333
 334         if (rc || jcb_cache_count) {
 335                 CERROR("can't free MDS callback cache: count %d, rc = %d\n",
 336                        jcb_cache_count, rc);
 337         }
 338
 339         //rc = extN_xattr_unregister();
 340 }
 341
 342 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
 343 MODULE_DESCRIPTION("Lustre MDS extN Filesystem Helper v0.1");
 344 MODULE_LICENSE("GPL");
 345
 346 module_init(mds_extN_init);
 347 module_exit(mds_extN_exit);