Whamcloud - gitweb
remove the lustre book-building bits from Makefile.am, in preparation for
[fs/lustre-release.git] / lustre / lvfs / lvfs_linux.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  lustre/lib/lvfs_linux.c
5  *  Lustre filesystem abstraction routines
6  *
7  *  Copyright (C) 2002, 2003 Cluster File Systems, Inc.
8  *   Author: Andreas Dilger <adilger@clusterfs.com>
9  *
10  *   This file is part of Lustre, http://www.lustre.org.
11  *
12  *   Lustre is free software; you can redistribute it and/or
13  *   modify it under the terms of version 2 of the GNU General Public
14  *   License as published by the Free Software Foundation.
15  *
16  *   Lustre is distributed in the hope that it will be useful,
17  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
18  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  *   GNU General Public License for more details.
20  *
21  *   You should have received a copy of the GNU General Public License
22  *   along with Lustre; if not, write to the Free Software
23  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24  */
25
26 #ifndef EXPORT_SYMTAB
27 # define EXPORT_SYMTAB
28 #endif
29
30 #define DEBUG_SUBSYSTEM S_FILTER
31
32 #include <linux/version.h>
33 #include <linux/fs.h>
34 #include <asm/unistd.h>
35 #include <linux/slab.h>
36 #include <linux/pagemap.h>
37 #include <linux/quotaops.h>
38 #include <linux/version.h>
39 #include <libcfs/kp30.h>
40 #include <linux/lustre_fsfilt.h>
41 #include <linux/obd.h>
42 #include <linux/obd_class.h>
43 #include <linux/module.h>
44 #include <linux/init.h>
45 #include <linux/lustre_compat25.h>
46 #include <linux/lvfs.h>
47 #include "lvfs_internal.h"
48
49 #include <linux/obd.h>
50 #include <linux/lustre_lib.h>
51
52 atomic_t obd_memory;
53 int obd_memmax;
54
55
56 /* Debugging check only needed during development */
57 #ifdef OBD_CTXT_DEBUG
58 # define ASSERT_CTXT_MAGIC(magic) LASSERT((magic) == OBD_RUN_CTXT_MAGIC)
59 # define ASSERT_NOT_KERNEL_CTXT(msg) LASSERT(!segment_eq(get_fs(), get_ds()))
60 # define ASSERT_KERNEL_CTXT(msg) LASSERT(segment_eq(get_fs(), get_ds()))
61 #else
62 # define ASSERT_CTXT_MAGIC(magic) do {} while(0)
63 # define ASSERT_NOT_KERNEL_CTXT(msg) do {} while(0)
64 # define ASSERT_KERNEL_CTXT(msg) do {} while(0)
65 #endif
66
67 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4))
68 #define current_ngroups current->group_info->ngroups
69 #define current_groups current->group_info->small_block
70 #else
71 #define current_ngroups current->ngroups
72 #define current_groups current->groups
73 #endif
74
75 /* push / pop to root of obd store */
76 void push_ctxt(struct obd_run_ctxt *save, struct obd_run_ctxt *new_ctx,
77                struct obd_ucred *uc)
78 {
79         //ASSERT_NOT_KERNEL_CTXT("already in kernel context!\n");
80         ASSERT_CTXT_MAGIC(new_ctx->magic);
81         OBD_SET_CTXT_MAGIC(save);
82
83         /*
84         CDEBUG(D_INFO,
85                "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n",
86                save, current, current->fs, current->fs->pwd,
87                atomic_read(&current->fs->pwd->d_count),
88                atomic_read(&current->fs->pwd->d_inode->i_count),
89                current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
90                current->fs->pwdmnt,
91                atomic_read(&current->fs->pwdmnt->mnt_count));
92         */
93
94         save->fs = get_fs();
95         LASSERT(atomic_read(&current->fs->pwd->d_count));
96         LASSERT(atomic_read(&new_ctx->pwd->d_count));
97         save->pwd = dget(current->fs->pwd);
98         save->pwdmnt = mntget(current->fs->pwdmnt);
99         save->ngroups = current_ngroups;
100         save->ouc.ouc_umask = current->fs->umask;
101
102         LASSERT(save->pwd);
103         LASSERT(save->pwdmnt);
104         LASSERT(new_ctx->pwd);
105         LASSERT(new_ctx->pwdmnt);
106
107         if (uc) {
108                 save->ouc.ouc_fsuid = current->fsuid;
109                 save->ouc.ouc_fsgid = current->fsgid;
110                 save->ouc.ouc_cap = current->cap_effective;
111                 save->ouc.ouc_suppgid1 = current_groups[0];
112                 save->ouc.ouc_suppgid2 = current_groups[1];
113
114                 current->fsuid = uc->ouc_fsuid;
115                 current->fsgid = uc->ouc_fsgid;
116                 current->cap_effective = uc->ouc_cap;
117                 current_ngroups = 0;
118
119                 if (uc->ouc_suppgid1 != -1)
120                         current_groups[current_ngroups++] = uc->ouc_suppgid1;
121                 if (uc->ouc_suppgid2 != -1)
122                         current_groups[current_ngroups++] = uc->ouc_suppgid2;
123 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
124                 if (uc->ouc_suppgid1 != -1 && uc->ouc_suppgid2 != -1 &&
125                     (uc->ouc_suppgid1 > uc->ouc_suppgid2)) {
126                         current_groups[0] = uc->ouc_suppgid2;
127                         current_groups[1] = uc->ouc_suppgid1;
128                 }
129 #endif
130         }
131         current->fs->umask = 0; /* umask already applied on client */
132         set_fs(new_ctx->fs);
133         set_fs_pwd(current->fs, new_ctx->pwdmnt, new_ctx->pwd);
134
135         /*
136         CDEBUG(D_INFO,
137                "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n",
138                new_ctx, current, current->fs, current->fs->pwd,
139                atomic_read(&current->fs->pwd->d_count),
140                atomic_read(&current->fs->pwd->d_inode->i_count),
141                current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
142                current->fs->pwdmnt,
143                atomic_read(&current->fs->pwdmnt->mnt_count));
144         */
145 }
146 EXPORT_SYMBOL(push_ctxt);
147
148 void pop_ctxt(struct obd_run_ctxt *saved, struct obd_run_ctxt *new_ctx,
149               struct obd_ucred *uc)
150 {
151         //printk("pc0");
152         ASSERT_CTXT_MAGIC(saved->magic);
153         //printk("pc1");
154         ASSERT_KERNEL_CTXT("popping non-kernel context!\n");
155
156         /*
157         CDEBUG(D_INFO,
158                " = pop  %p==%p = cur %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n",
159                new_ctx, current, current->fs, current->fs->pwd,
160                atomic_read(&current->fs->pwd->d_count),
161                atomic_read(&current->fs->pwd->d_inode->i_count),
162                current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
163                current->fs->pwdmnt,
164                atomic_read(&current->fs->pwdmnt->mnt_count));
165         */
166
167         LASSERT(current->fs->pwd == new_ctx->pwd);
168         LASSERT(current->fs->pwdmnt == new_ctx->pwdmnt);
169
170         set_fs(saved->fs);
171         set_fs_pwd(current->fs, saved->pwdmnt, saved->pwd);
172
173         dput(saved->pwd);
174         mntput(saved->pwdmnt);
175         current->fs->umask = saved->ouc.ouc_umask;
176         if (uc) {
177                 current->fsuid = saved->ouc.ouc_fsuid;
178                 current->fsgid = saved->ouc.ouc_fsgid;
179                 current->cap_effective = saved->ouc.ouc_cap;
180                 current_ngroups = saved->ngroups;
181                 current_groups[0] = saved->ouc.ouc_suppgid1;
182                 current_groups[1] = saved->ouc.ouc_suppgid2;
183         }
184
185         /*
186         CDEBUG(D_INFO,
187                "= pop  %p->%p = cur fs %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n",
188                saved, current, current->fs, current->fs->pwd,
189                atomic_read(&current->fs->pwd->d_count),
190                atomic_read(&current->fs->pwd->d_inode->i_count),
191                current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
192                current->fs->pwdmnt,
193                atomic_read(&current->fs->pwdmnt->mnt_count));
194         */
195 }
196 EXPORT_SYMBOL(pop_ctxt);
197
198 /* utility to make a file */
199 struct dentry *simple_mknod(struct dentry *dir, char *name, int mode, int fix)
200 {
201         struct dentry *dchild;
202         int err = 0;
203         ENTRY;
204
205         ASSERT_KERNEL_CTXT("kernel doing mknod outside kernel context\n");
206         CDEBUG(D_INODE, "creating file %.*s\n", (int)strlen(name), name);
207
208         dchild = ll_lookup_one_len(name, dir, strlen(name));
209         if (IS_ERR(dchild))
210                 GOTO(out_up, dchild);
211
212         if (dchild->d_inode) {
213                 int old_mode = dchild->d_inode->i_mode;
214                 if (!S_ISREG(old_mode))
215                         GOTO(out_err, err = -EEXIST);
216
217                 /* Fixup file permissions if necessary */
218                 if (fix && (old_mode & S_IALLUGO) != (mode & S_IALLUGO)) {
219                         CWARN("fixing permissions on %s from %o to %o\n",
220                               name, old_mode, mode);
221                         dchild->d_inode->i_mode = (mode & S_IALLUGO) |
222                                                   (old_mode & ~S_IALLUGO);
223                         mark_inode_dirty(dchild->d_inode);
224                 }
225                 GOTO(out_up, dchild);
226         }
227
228         err = ll_vfs_create(dir->d_inode, dchild, (mode & ~S_IFMT) | S_IFREG,
229                             NULL);
230         if (err)
231                 GOTO(out_err, err);
232
233         RETURN(dchild);
234
235 out_err:
236         dput(dchild);
237         dchild = ERR_PTR(err);
238 out_up:
239         return dchild;
240 }
241 EXPORT_SYMBOL(simple_mknod);
242
243 /* utility to make a directory */
244 struct dentry *simple_mkdir(struct dentry *dir, char *name, int mode, int fix)
245 {
246         struct dentry *dchild;
247         int err = 0;
248         ENTRY;
249
250         ASSERT_KERNEL_CTXT("kernel doing mkdir outside kernel context\n");
251         CDEBUG(D_INODE, "creating directory %.*s\n", (int)strlen(name), name);
252         dchild = ll_lookup_one_len(name, dir, strlen(name));
253         if (IS_ERR(dchild))
254                 GOTO(out_up, dchild);
255
256         if (dchild->d_inode) {
257                 int old_mode = dchild->d_inode->i_mode;
258                 if (!S_ISDIR(old_mode)) {
259                         CERROR("found %s (%lu/%u) is mode %o\n", name,
260                                dchild->d_inode->i_ino,
261                                dchild->d_inode->i_generation, old_mode);
262                         GOTO(out_err, err = -ENOTDIR);
263                 }
264
265                 /* Fixup directory permissions if necessary */
266                 if (fix && (old_mode & S_IALLUGO) != (mode & S_IALLUGO)) {
267                         CWARN("fixing permissions on %s from %o to %o\n",
268                               name, old_mode, mode);
269                         dchild->d_inode->i_mode = (mode & S_IALLUGO) |
270                                                   (old_mode & ~S_IALLUGO);
271                         mark_inode_dirty(dchild->d_inode);
272                 }
273                 GOTO(out_up, dchild);
274         }
275
276         err = vfs_mkdir(dir->d_inode, dchild, mode);
277         if (err)
278                 GOTO(out_err, err);
279
280         RETURN(dchild);
281
282 out_err:
283         dput(dchild);
284         dchild = ERR_PTR(err);
285 out_up:
286         return dchild;
287 }
288 EXPORT_SYMBOL(simple_mkdir);
289
290 /*
291  * Read a file from within kernel context.  Prior to calling this
292  * function we should already have done a push_ctxt().
293  */
294 int lustre_fread(struct file *file, void *buf, int len, loff_t *off)
295 {
296         ASSERT_KERNEL_CTXT("kernel doing read outside kernel context\n");
297         if (!file || !file->f_op || !file->f_op->read || !off)
298                 RETURN(-ENOSYS);
299
300         return file->f_op->read(file, buf, len, off);
301 }
302 EXPORT_SYMBOL(lustre_fread);
303
304 /*
305  * Write a file from within kernel context.  Prior to calling this
306  * function we should already have done a push_ctxt().
307  */
308 int lustre_fwrite(struct file *file, const void *buf, int len, loff_t *off)
309 {
310         ENTRY;
311         ASSERT_KERNEL_CTXT("kernel doing write outside kernel context\n");
312         if (!file)
313                 RETURN(-ENOENT);
314         if (!file->f_op)
315                 RETURN(-ENOSYS);
316         if (!off)
317                 RETURN(-EINVAL);
318
319         if (!file->f_op->write)
320                 RETURN(-EROFS);
321
322         RETURN(file->f_op->write(file, buf, len, off));
323 }
324 EXPORT_SYMBOL(lustre_fwrite);
325
326 /*
327  * Sync a file from within kernel context.  Prior to calling this
328  * function we should already have done a push_ctxt().
329  */
330 int lustre_fsync(struct file *file)
331 {
332         ENTRY;
333         ASSERT_KERNEL_CTXT("kernel doing sync outside kernel context\n");
334         if (!file || !file->f_op || !file->f_op->fsync)
335                 RETURN(-ENOSYS);
336
337         RETURN(file->f_op->fsync(file, file->f_dentry, 0));
338 }
339 EXPORT_SYMBOL(lustre_fsync);
340
341 struct l_file *l_dentry_open(struct obd_run_ctxt *ctxt, struct l_dentry *de,
342                              int flags)
343 {
344         mntget(ctxt->pwdmnt);
345         return dentry_open(de, ctxt->pwdmnt, flags);
346 }
347 EXPORT_SYMBOL(l_dentry_open);
348
349 static int l_filldir(void *__buf, const char *name, int namlen, loff_t offset,
350                      ino_t ino, unsigned int d_type)
351 {
352         struct l_linux_dirent *dirent;
353         struct l_readdir_callback *buf = (struct l_readdir_callback *)__buf;
354         
355         dirent = buf->lrc_dirent;
356         if (dirent)
357                dirent->lld_off = offset; 
358
359         OBD_ALLOC(dirent, sizeof(*dirent));
360
361         list_add_tail(&dirent->lld_list, buf->lrc_list);
362
363         buf->lrc_dirent = dirent;
364         dirent->lld_ino = ino;
365         LASSERT(sizeof(dirent->lld_name) >= namlen + 1);
366         memcpy(dirent->lld_name, name, namlen);
367
368         return 0;
369 }
370
371 long l_readdir(struct file *file, struct list_head *dentry_list)
372 {
373         struct l_linux_dirent *lastdirent;
374         struct l_readdir_callback buf;
375         int error;
376
377         buf.lrc_dirent = NULL;
378         buf.lrc_list = dentry_list; 
379
380         error = vfs_readdir(file, l_filldir, &buf);
381         if (error < 0)
382                 return error;
383
384         lastdirent = buf.lrc_dirent;
385         if (lastdirent)
386                 lastdirent->lld_off = file->f_pos;
387
388         return 0; 
389 }
390 EXPORT_SYMBOL(l_readdir);
391 EXPORT_SYMBOL(obd_memory);
392 EXPORT_SYMBOL(obd_memmax);
393
394 static int __init lvfs_linux_init(void)
395 {
396         RETURN(0);
397 }
398
399 static void __exit lvfs_linux_exit(void)
400 {
401         int leaked;
402         ENTRY;
403
404         leaked = atomic_read(&obd_memory);
405         CDEBUG(leaked ? D_ERROR : D_INFO,
406                "obd mem max: %d leaked: %d\n", obd_memmax, leaked);
407
408         return;
409 }
410
411 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
412 MODULE_DESCRIPTION("Lustre VFS Filesystem Helper v0.1");
413 MODULE_LICENSE("GPL");
414
415 module_init(lvfs_linux_init);
416 module_exit(lvfs_linux_exit);