1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * lustre/mds/mds_ext3.c
5 * Lustre Metadata Server (mds) journal abstraction routines
7 * Copyright (C) 2002 Cluster File Systems, Inc.
8 * Author: Andreas Dilger <adilger@clusterfs.com>
10 * This file is part of Lustre, http://www.lustre.org.
12 * Lustre is free software; you can redistribute it and/or
13 * modify it under the terms of version 2 of the GNU General Public
14 * License as published by the Free Software Foundation.
16 * Lustre is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with Lustre; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26 #define DEBUG_SUBSYSTEM S_MDS
29 #include <linux/jbd.h>
30 #include <linux/slab.h>
31 #include <linux/init.h>
32 #include <linux/ext3_fs.h>
33 #include <linux/ext3_jbd.h>
34 #include <../fs/ext3/xattr.h>
35 #include <linux/kp30.h>
36 #include <linux/lustre_mds.h>
37 #include <linux/obd.h>
38 #include <linux/module.h>
39 #include <linux/obd_lov.h>
41 static struct mds_fs_operations mds_ext3_fs_ops;
42 static kmem_cache_t *mcb_cache;
43 static int mcb_cache_count;
46 struct journal_callback cb_jcb;
47 struct mds_obd *cb_mds;
51 #define EXT3_XATTR_INDEX_LUSTRE 5
52 #define XATTR_LUSTRE_MDS_OBJID "system.lustre_mds_objid"
54 #define XATTR_MDS_MO_MAGIC 0xEA0BD047
57 * We don't currently need any additional blocks for rmdir and
58 * unlink transactions because we are storing the OST oa_id inside
59 * the inode (which we will be changing anyways as part of this
62 static void *mds_ext3_start(struct inode *inode, int op)
64 /* For updates to the last recieved file */
65 int nblocks = EXT3_DATA_TRANS_BLOCKS;
71 nblocks += EXT3_DELETE_TRANS_BLOCKS;
74 /* We may be modifying two directories */
75 nblocks += EXT3_DATA_TRANS_BLOCKS;
76 case MDS_FSOP_SYMLINK:
77 /* Possible new block + block bitmap + GDT for long symlink */
82 /* New inode + block bitmap + GDT for new file */
85 /* Change parent directory */
86 nblocks += EXT3_INDEX_EXTRA_TRANS_BLOCKS+EXT3_DATA_TRANS_BLOCKS;
88 case MDS_FSOP_SETATTR:
89 /* Setattr on inode */
92 default: CERROR("unknown transaction start op %d\n", op);
97 handle = journal_start(EXT3_JOURNAL(inode), nblocks);
103 static int mds_ext3_commit(struct inode *inode, void *handle)
108 rc = journal_stop((handle_t *)handle);
114 static int mds_ext3_setattr(struct dentry *dentry, void *handle,
117 struct inode *inode = dentry->d_inode;
121 if (inode->i_op->setattr)
122 rc = inode->i_op->setattr(dentry, iattr);
124 rc = inode_setattr(inode, iattr);
131 static int mds_ext3_set_md(struct inode *inode, void *handle,
132 struct lov_mds_md *lmm)
138 rc = ext3_xattr_set(handle, inode, EXT3_XATTR_INDEX_LUSTRE,
139 XATTR_LUSTRE_MDS_OBJID, lmm,
140 lmm ? lmm->lmm_easize : 0, 0);
145 CERROR("error adding objectid "LPX64" to inode %ld: %d\n",
146 lmm->lmm_object_id, inode->i_ino, rc);
147 if (rc != -ENOSPC) LBUG();
152 static int mds_ext3_get_md(struct inode *inode, struct lov_mds_md *lmm)
155 int size = lmm->lmm_easize;
159 rc = ext3_xattr_get(inode, EXT3_XATTR_INDEX_LUSTRE,
160 XATTR_LUSTRE_MDS_OBJID, lmm, size);
164 /* This gives us the MD size */
169 CDEBUG(D_INFO, "error getting EA %s from MDS inode %ld: "
170 "rc = %d\n", XATTR_LUSTRE_MDS_OBJID, inode->i_ino, rc);
171 memset(lmm, 0, size);
175 /* This field is byteswapped because it appears in the
176 * catalogue. All others are opaque to the MDS */
177 lmm->lmm_object_id = le64_to_cpu(lmm->lmm_object_id);
182 static ssize_t mds_ext3_readpage(struct file *file, char *buf, size_t count,
185 struct inode *inode = file->f_dentry->d_inode;
188 if (S_ISREG(inode->i_mode))
189 rc = file->f_op->read(file, buf, count, offset);
191 struct buffer_head *bh;
193 /* FIXME: this assumes the blocksize == count, but the calling
194 * function will detect this as an error for now */
195 bh = ext3_bread(NULL, inode,
196 *offset >> inode->i_sb->s_blocksize_bits,
200 memcpy(buf, bh->b_data, inode->i_blksize);
202 rc = inode->i_blksize;
209 static void mds_ext3_delete_inode(struct inode *inode)
211 if (S_ISREG(inode->i_mode)) {
212 void *handle = mds_ext3_start(inode, MDS_FSOP_UNLINK);
214 if (IS_ERR(handle)) {
215 CERROR("unable to start transaction");
219 if (mds_ext3_set_md(inode, handle, NULL))
220 CERROR("error clearing objid on %ld\n", inode->i_ino);
222 if (mds_ext3_fs_ops.cl_delete_inode)
223 mds_ext3_fs_ops.cl_delete_inode(inode);
225 if (mds_ext3_commit(inode, handle))
226 CERROR("error closing handle on %ld\n", inode->i_ino);
228 mds_ext3_fs_ops.cl_delete_inode(inode);
231 static void mds_ext3_callback_status(struct journal_callback *jcb, int error)
233 struct mds_cb_data *mcb = (struct mds_cb_data *)jcb;
235 CDEBUG(D_EXT2, "got callback for last_rcvd "LPD64": rc = %d\n",
236 mcb->cb_last_rcvd, error);
237 if (!error && mcb->cb_last_rcvd > mcb->cb_mds->mds_last_committed)
238 mcb->cb_mds->mds_last_committed = mcb->cb_last_rcvd;
240 kmem_cache_free(mcb_cache, mcb);
244 static int mds_ext3_set_last_rcvd(struct mds_obd *mds, void *handle)
246 struct mds_cb_data *mcb;
248 mcb = kmem_cache_alloc(mcb_cache, GFP_NOFS);
254 mcb->cb_last_rcvd = mds->mds_last_rcvd;
256 #ifdef HAVE_JOURNAL_CALLBACK_STATUS
257 CDEBUG(D_EXT2, "set callback for last_rcvd: "LPD64"\n",
260 /* Note that an "incompatible pointer" warning here is OK for now */
261 journal_callback_set(handle, mds_ext3_callback_status,
262 (struct journal_callback *)mcb);
265 #warning "no journal callback kernel patch, faking it..."
267 static long next = 0;
269 if (time_after(jiffies, next)) {
270 CERROR("no journal callback kernel patch, faking it...\n");
271 next = jiffies + 300 * HZ;
274 mds_ext3_callback_status((struct journal_callback *)mcb, 0);
280 static int mds_ext3_journal_data(struct file *filp)
282 struct inode *inode = filp->f_dentry->d_inode;
284 EXT3_I(inode)->i_flags |= EXT3_JOURNAL_DATA_FL;
290 * We need to hack the return value for the free inode counts because
291 * the current EA code requires one filesystem block per inode with EAs,
292 * so it is possible to run out of blocks before we run out of inodes.
294 * This can be removed when the ext3 EA code is fixed.
296 static int mds_ext3_statfs(struct super_block *sb, struct statfs *sfs)
298 int rc = vfs_statfs(sb, sfs);
300 if (!rc && sfs->f_bfree < sfs->f_ffree)
301 sfs->f_ffree = sfs->f_bfree;
306 static struct mds_fs_operations mds_ext3_fs_ops = {
307 fs_owner: THIS_MODULE,
308 fs_start: mds_ext3_start,
309 fs_commit: mds_ext3_commit,
310 fs_setattr: mds_ext3_setattr,
311 fs_set_md: mds_ext3_set_md,
312 fs_get_md: mds_ext3_get_md,
313 fs_readpage: mds_ext3_readpage,
314 fs_delete_inode: mds_ext3_delete_inode,
315 cl_delete_inode: clear_inode,
316 fs_journal_data: mds_ext3_journal_data,
317 fs_set_last_rcvd: mds_ext3_set_last_rcvd,
318 fs_statfs: mds_ext3_statfs,
321 static int __init mds_ext3_init(void)
325 //rc = ext3_xattr_register();
326 mcb_cache = kmem_cache_create("mds_ext3_mcb",
327 sizeof(struct mds_cb_data), 0,
330 CERROR("error allocating MDS journal callback cache\n");
331 GOTO(out, rc = -ENOMEM);
334 rc = mds_register_fs_type(&mds_ext3_fs_ops, "ext3");
337 kmem_cache_destroy(mcb_cache);
342 static void __exit mds_ext3_exit(void)
346 mds_unregister_fs_type("ext3");
347 rc = kmem_cache_destroy(mcb_cache);
349 if (rc || mcb_cache_count) {
350 CERROR("can't free MDS callback cache: count %d, rc = %d\n",
351 mcb_cache_count, rc);
354 //rc = ext3_xattr_unregister();
357 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
358 MODULE_DESCRIPTION("Lustre MDS ext3 Filesystem Helper v0.1");
359 MODULE_LICENSE("GPL");
361 module_init(mds_ext3_init);
362 module_exit(mds_ext3_exit);