1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * lustre/mds/mds_ext3.c
5 * Lustre Metadata Server (mds) journal abstraction routines
7 * Copyright (C) 2002 Cluster File Systems, Inc.
8 * Author: Andreas Dilger <adilger@clusterfs.com>
10 * This file is part of Lustre, http://www.lustre.org.
12 * Lustre is free software; you can redistribute it and/or
13 * modify it under the terms of version 2 of the GNU General Public
14 * License as published by the Free Software Foundation.
16 * Lustre is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with Lustre; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27 #define DEBUG_SUBSYSTEM S_MDS
30 #include <linux/jbd.h>
31 #include <linux/ext3_fs.h>
32 #include <linux/ext3_jbd.h>
33 #include <linux/lustre_mds.h>
34 #include <linux/module.h>
36 static struct mds_fs_operations mds_ext3_fs_ops;
37 static kmem_cache_t *mcb_cache;
38 static int mcb_cache_count;
41 struct journal_callback cb_jcb;
42 struct mds_obd *cb_mds;
47 * We don't currently need any additional blocks for rmdir and
48 * unlink transactions because we are storing the OST oa_id inside
49 * the inode (which we will be changing anyways as part of this
52 static void *mds_ext3_start(struct inode *inode, int op)
54 /* For updates to the last recieved file */
55 int nblocks = EXT3_DATA_TRANS_BLOCKS;
61 nblocks += EXT3_DELETE_TRANS_BLOCKS;
64 /* We may be modifying two directories */
65 nblocks += EXT3_DATA_TRANS_BLOCKS;
66 case MDS_FSOP_SYMLINK:
67 /* Possible new block + block bitmap + GDT for long symlink */
72 /* New inode + block bitmap + GDT for new file */
75 /* Change parent directory */
76 nblocks += EXT3_DATA_TRANS_BLOCKS;
78 case MDS_FSOP_SETATTR:
79 /* Setattr on inode */
82 default: CERROR("unknown transaction start op %d\n", op);
87 handle = journal_start(EXT3_JOURNAL(inode), nblocks);
93 static int mds_ext3_commit(struct inode *inode, void *handle)
98 rc = journal_stop((handle_t *)handle);
104 static int mds_ext3_setattr(struct dentry *dentry, void *handle,
107 struct inode *inode = dentry->d_inode;
112 /* a _really_ horrible hack to avoid removing the data stored
113 in the block pointers; this data is the object id
114 this will go into an extended attribute at some point.
116 if (iattr->ia_valid & ATTR_SIZE) {
117 /* ATTR_SIZE would invoke truncate: clear it */
118 iattr->ia_valid &= ~ATTR_SIZE;
119 inode->i_size = iattr->ia_size;
121 /* an _even_more_ horrible hack to make this hack work with
122 * ext3. This is because ext3 keeps a separate inode size
123 * until the inode is committed to ensure consistency. This
124 * will also go away with the move to EAs.
126 EXT3_I(inode)->i_disksize = inode->i_size;
128 /* make sure _something_ gets set - so new inode
129 goes to disk (probably won't work over XFS */
130 if (!iattr->ia_valid & ATTR_MODE) {
131 iattr->ia_valid |= ATTR_MODE;
132 iattr->ia_mode = inode->i_mode;
136 if (inode->i_op->setattr)
137 rc = inode->i_op->setattr(dentry, iattr);
139 rc = inode_setattr(inode, iattr);
147 * FIXME: nasty hack - store the object id in the first two
148 * direct block spots. This should be done with EAs...
149 * Note also that this does not currently mark the inode
150 * dirty (it currently is used with other operations that
151 * subsequently also mark the inode dirty).
153 static int mds_ext3_set_md(struct inode *inode, void *handle,
154 void *obd_md, int len)
156 *((__u64 *)EXT3_I(inode)->i_data) = cpu_to_le64(id);
160 static int mds_ext3_get_objid(struct inode *inode, obd_id *id)
162 *id = le64_to_cpu(*((__u64 *)EXT3_I(inode)->i_data));
167 static ssize_t mds_ext3_readpage(struct file *file, char *buf, size_t count,
170 struct inode *inode = file->f_dentry->d_inode;
173 if (S_ISREG(inode->i_mode))
174 rc = file->f_op->read(file, buf, count, offset);
176 struct buffer_head *bh;
178 /* FIXME: this assumes the blocksize == count, but the calling
179 * function will detect this as an error for now */
180 bh = ext3_bread(NULL, inode,
181 *offset >> inode->i_sb->s_blocksize_bits,
185 memcpy(buf, bh->b_data, inode->i_blksize);
187 rc = inode->i_blksize;
194 static void mds_ext3_delete_inode(struct inode *inode)
196 if (S_ISREG(inode->i_mode)) {
197 void *handle = mds_ext3_start(inode, MDS_FSOP_UNLINK);
199 if (IS_ERR(handle)) {
200 CERROR("unable to start transaction");
204 if (mds_ext3_set_objid(inode, handle, 0))
205 CERROR("error clearing objid on %ld\n", inode->i_ino);
207 if (mds_ext3_fs_ops.cl_delete_inode)
208 mds_ext3_fs_ops.cl_delete_inode(inode);
210 if (mds_ext3_commit(inode, handle))
211 CERROR("error closing handle on %ld\n", inode->i_ino);
213 mds_ext3_fs_ops.cl_delete_inode(inode);
216 static void mds_ext3_callback_status(struct journal_callback *jcb, int error)
218 struct mds_cb_data *mcb = (struct mds_cb_data *)jcb;
220 CDEBUG(D_EXT2, "got callback for last_rcvd "LPD64": rc = %d\n",
221 mcb->cb_last_rcvd, error);
222 if (!error && mcb->cb_last_rcvd > mcb->cb_mds->mds_last_committed)
223 mcb->cb_mds->mds_last_committed = mcb->cb_last_rcvd;
225 kmem_cache_free(mcb_cache, mcb);
229 static int mds_ext3_set_last_rcvd(struct mds_obd *mds, void *handle)
231 struct mds_cb_data *mcb;
233 mcb = kmem_cache_alloc(mcb_cache, GFP_NOFS);
239 mcb->cb_last_rcvd = mds->mds_last_rcvd;
241 #ifdef HAVE_JOURNAL_CALLBACK_STATUS
242 CDEBUG(D_EXT2, "set callback for last_rcvd: "LPD64"\n",
245 /* Note that an "incompatible pointer" warning here is OK for now */
246 journal_callback_set(handle, mds_ext3_callback_status,
247 (struct journal_callback *)mcb);
250 #warning "no journal callback kernel patch, faking it..."
252 static long next = 0;
254 if (time_after(jiffies, next)) {
255 CERROR("no journal callback kernel patch, faking it...\n");
256 next = jiffies + 300 * HZ;
259 mds_ext3_callback_status((struct journal_callback *)mcb, 0);
265 static int mds_ext3_journal_data(struct file *filp)
267 struct inode *inode = filp->f_dentry->d_inode;
269 EXT3_I(inode)->i_flags |= EXT3_JOURNAL_DATA_FL;
274 static struct mds_fs_operations mds_ext3_fs_ops = {
275 fs_owner: THIS_MODULE,
276 fs_start: mds_ext3_start,
277 fs_commit: mds_ext3_commit,
278 fs_setattr: mds_ext3_setattr,
279 fs_set_objid: mds_ext3_set_objid,
280 fs_get_objid: mds_ext3_get_objid,
281 fs_readpage: mds_ext3_readpage,
282 fs_delete_inode: mds_ext3_delete_inode,
283 cl_delete_inode: clear_inode,
284 fs_journal_data: mds_ext3_journal_data,
285 fs_set_last_rcvd: mds_ext3_set_last_rcvd,
288 static int __init mds_ext3_init(void)
292 mcb_cache = kmem_cache_create("mds_ext3_mcb",
293 sizeof(struct mds_cb_data), 0,
296 CERROR("error allocating MDS journal callback cache\n");
297 GOTO(out, rc = -ENOMEM);
300 rc = mds_register_fs_type(&mds_ext3_fs_ops, "ext3");
303 kmem_cache_destroy(mcb_cache);
308 static void __exit mds_ext3_exit(void)
312 mds_unregister_fs_type("ext3");
313 rc = kmem_cache_destroy(mcb_cache);
315 if (rc || mcb_cache_count) {
316 CERROR("can't free MDS callback cache: count %d, rc = %d\n",
317 mcb_cache_count, rc);
321 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
322 MODULE_DESCRIPTION("Lustre MDS ext3 Filesystem Helper v0.1");
323 MODULE_LICENSE("GPL");
325 module_init(mds_ext3_init);
326 module_exit(mds_ext3_exit);