Whamcloud - gitweb
land b_groups onto HEAD:
[fs/lustre-release.git] / lustre / lvfs / lvfs_linux.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  lustre/lib/fsfilt_ext3.c
5  *  Lustre filesystem abstraction routines
6  *
7  *  Copyright (C) 2002, 2003 Cluster File Systems, Inc.
8  *   Author: Andreas Dilger <adilger@clusterfs.com>
9  *
10  *   This file is part of Lustre, http://www.lustre.org.
11  *
12  *   Lustre is free software; you can redistribute it and/or
13  *   modify it under the terms of version 2 of the GNU General Public
14  *   License as published by the Free Software Foundation.
15  *
16  *   Lustre is distributed in the hope that it will be useful,
17  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
18  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  *   GNU General Public License for more details.
20  *
21  *   You should have received a copy of the GNU General Public License
22  *   along with Lustre; if not, write to the Free Software
23  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24  */
25
26 #ifndef EXPORT_SYMTAB
27 # define EXPORT_SYMTAB
28 #endif
29
30 #define DEBUG_SUBSYSTEM S_FILTER
31
32 #include <linux/version.h>
33 #include <linux/fs.h>
34 #include <asm/unistd.h>
35 #include <linux/jbd.h>
36 #include <linux/slab.h>
37 #include <linux/pagemap.h>
38 #include <linux/quotaops.h>
39 #include <linux/version.h>
40 #include <linux/kp30.h>
41 #include <linux/lustre_fsfilt.h>
42 #include <linux/obd.h>
43 #include <linux/obd_class.h>
44 #include <linux/module.h>
45 #include <linux/init.h>
46 #include <linux/lustre_compat25.h>
47 #include <linux/lvfs.h>
48 #include "lvfs_internal.h"
49
50 #include <linux/obd.h>
51 #include <linux/lustre_lib.h>
52 #include <linux/lustre_mds.h>   /* for mds_grp_hash_entry */
53
54 atomic_t obd_memory;
55 int obd_memmax;
56
57
58 /* Debugging check only needed during development */
59 #ifdef OBD_CTXT_DEBUG
60 # define ASSERT_CTXT_MAGIC(magic) LASSERT((magic) == OBD_RUN_CTXT_MAGIC)
61 # define ASSERT_NOT_KERNEL_CTXT(msg) LASSERT(!segment_eq(get_fs(), get_ds()))
62 # define ASSERT_KERNEL_CTXT(msg) LASSERT(segment_eq(get_fs(), get_ds()))
63 #else
64 # define ASSERT_CTXT_MAGIC(magic) do {} while(0)
65 # define ASSERT_NOT_KERNEL_CTXT(msg) do {} while(0)
66 # define ASSERT_KERNEL_CTXT(msg) do {} while(0)
67 #endif
68
69 static void push_group_info(struct lvfs_run_ctxt *save,
70                             struct group_info *ginfo)
71 {
72         if (!ginfo) {
73                 save->ngroups = current_ngroups;
74                 current_ngroups = 0;
75         } else {
76 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
77                 task_lock(current);
78                 save->group_info = current->group_info;
79                 current->group_info = group_info;
80                 task_unlock(current);
81 #else
82                 LASSERT(ginfo->ngroups <= NGROUPS);
83                 /* save old */
84                 save->group_info.ngroups = current->ngroups;
85                 if (current->ngroups)
86                         memcpy(save->group_info.small_block, current->groups,
87                                current->ngroups);
88                 /* push new */
89                 current->ngroups = ginfo->ngroups;
90                 if (ginfo->ngroups)
91                         memcpy(current->groups, ginfo->small_block,
92                                current->ngroups);
93 #endif
94         }
95 }
96
97 static void pop_group_info(struct lvfs_run_ctxt *save,
98                            struct group_info *ginfo)
99 {
100         if (!ginfo) {
101                 current_ngroups = save->ngroups;
102         } else {
103 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
104                 task_lock(current);
105                 current->group_info = save->group_info;
106                 task_unlock(current);
107 #else
108                 current->ngroups = ginfo->ngroups;
109                 if (current->ngroups)
110                         memcpy(current->groups, save->group_info.small_block,
111                                current->ngroups);
112 #endif
113         }
114 }
115
116 /* push / pop to root of obd store */
117 void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx,
118                struct lvfs_ucred *uc)
119 {
120         //ASSERT_NOT_KERNEL_CTXT("already in kernel context!\n");
121         ASSERT_CTXT_MAGIC(new_ctx->magic);
122         LASSERT(save->magic != OBD_RUN_CTXT_MAGIC || save->pid != current->pid);
123         OBD_SET_CTXT_MAGIC(save);
124         save->pid = current->pid;
125
126         /*
127         CDEBUG(D_INFO,
128                "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n",
129                save, current, current->fs, current->fs->pwd,
130                atomic_read(&current->fs->pwd->d_count),
131                atomic_read(&current->fs->pwd->d_inode->i_count),
132                current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
133                current->fs->pwdmnt,
134                atomic_read(&current->fs->pwdmnt->mnt_count));
135         */
136
137         save->fs = get_fs();
138         LASSERT(atomic_read(&current->fs->pwd->d_count));
139         LASSERT(atomic_read(&new_ctx->pwd->d_count));
140         save->pwd = dget(current->fs->pwd);
141         save->pwdmnt = mntget(current->fs->pwdmnt);
142         save->ngroups = current_ngroups;
143
144         LASSERT(save->pwd);
145         LASSERT(save->pwdmnt);
146         LASSERT(new_ctx->pwd);
147         LASSERT(new_ctx->pwdmnt);
148
149         if (uc) {
150                 save->luc.luc_fsuid = current->fsuid;
151                 save->luc.luc_fsgid = current->fsgid;
152                 save->luc.luc_cap = current->cap_effective;
153
154                 current->fsuid = uc->luc_fsuid;
155                 current->fsgid = uc->luc_fsgid;
156                 current->cap_effective = uc->luc_cap;
157
158                 push_group_info(save, uc->luc_ginfo);
159         }
160         set_fs(new_ctx->fs);
161         set_fs_pwd(current->fs, new_ctx->pwdmnt, new_ctx->pwd);
162
163         /*
164         CDEBUG(D_INFO,
165                "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n",
166                new_ctx, current, current->fs, current->fs->pwd,
167                atomic_read(&current->fs->pwd->d_count),
168                atomic_read(&current->fs->pwd->d_inode->i_count),
169                current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
170                current->fs->pwdmnt,
171                atomic_read(&current->fs->pwdmnt->mnt_count));
172         */
173 }
174 EXPORT_SYMBOL(push_ctxt);
175
176 void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx,
177               struct lvfs_ucred *uc)
178 {
179         //printk("pc0");
180         ASSERT_CTXT_MAGIC(saved->magic);
181         LASSERT(saved->pid == current->pid);
182         saved->magic = 0;
183         saved->pid = 0;
184         //printk("pc1");
185         ASSERT_KERNEL_CTXT("popping non-kernel context!\n");
186
187         /*
188         CDEBUG(D_INFO,
189                " = pop  %p==%p = cur %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n",
190                new_ctx, current, current->fs, current->fs->pwd,
191                atomic_read(&current->fs->pwd->d_count),
192                atomic_read(&current->fs->pwd->d_inode->i_count),
193                current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
194                current->fs->pwdmnt,
195                atomic_read(&current->fs->pwdmnt->mnt_count));
196         */
197
198         LASSERT(current->fs->pwd == new_ctx->pwd);
199         LASSERT(current->fs->pwdmnt == new_ctx->pwdmnt);
200
201         set_fs(saved->fs);
202         set_fs_pwd(current->fs, saved->pwdmnt, saved->pwd);
203
204         dput(saved->pwd);
205         mntput(saved->pwdmnt);
206         if (uc) {
207                 current->fsuid = saved->luc.luc_fsuid;
208                 current->fsgid = saved->luc.luc_fsgid;
209                 current->cap_effective = saved->luc.luc_cap;
210
211                 pop_group_info(saved, uc->luc_ginfo);
212         }
213
214         /*
215         CDEBUG(D_INFO,
216                "= pop  %p->%p = cur fs %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n",
217                saved, current, current->fs, current->fs->pwd,
218                atomic_read(&current->fs->pwd->d_count),
219                atomic_read(&current->fs->pwd->d_inode->i_count),
220                current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
221                current->fs->pwdmnt,
222                atomic_read(&current->fs->pwdmnt->mnt_count));
223         */
224 }
225 EXPORT_SYMBOL(pop_ctxt);
226
227 /* utility to make a file */
228 struct dentry *simple_mknod(struct dentry *dir, char *name, int mode, int fix)
229 {
230         struct dentry *dchild;
231         int err = 0;
232         ENTRY;
233
234         ASSERT_KERNEL_CTXT("kernel doing mknod outside kernel context\n");
235         CDEBUG(D_INODE, "creating file %*s\n", (int)strlen(name), name);
236
237         dchild = ll_lookup_one_len(name, dir, strlen(name));
238         if (IS_ERR(dchild))
239                 GOTO(out_up, dchild);
240
241         if (dchild->d_inode) {
242                 int old_mode = dchild->d_inode->i_mode;
243                 if (!S_ISREG(old_mode))
244                         GOTO(out_err, err = -EEXIST);
245
246                 /* Fixup file permissions if necessary */
247                 if (fix && (old_mode & S_IALLUGO) != (mode & S_IALLUGO)) {
248                         CWARN("fixing permissions on %s from %o to %o\n",
249                               name, old_mode, mode);
250                         dchild->d_inode->i_mode = (mode & S_IALLUGO) |
251                                                   (old_mode & ~S_IALLUGO);
252                         mark_inode_dirty(dchild->d_inode);
253                 }
254                 GOTO(out_up, dchild);
255         }
256
257         err = ll_vfs_create(dir->d_inode, dchild, (mode & ~S_IFMT) | S_IFREG,
258                             NULL);
259         if (err)
260                 GOTO(out_err, err);
261
262         RETURN(dchild);
263
264 out_err:
265         dput(dchild);
266         dchild = ERR_PTR(err);
267 out_up:
268         return dchild;
269 }
270 EXPORT_SYMBOL(simple_mknod);
271
272 /* utility to make a directory */
273 struct dentry *simple_mkdir(struct dentry *dir, char *name, int mode, int fix)
274 {
275         struct dentry *dchild;
276         int err = 0;
277         ENTRY;
278
279         ASSERT_KERNEL_CTXT("kernel doing mkdir outside kernel context\n");
280         CDEBUG(D_INODE, "creating directory %*s\n", (int)strlen(name), name);
281         dchild = ll_lookup_one_len(name, dir, strlen(name));
282         if (IS_ERR(dchild))
283                 GOTO(out_up, dchild);
284
285         if (dchild->d_inode) {
286                 int old_mode = dchild->d_inode->i_mode;
287                 if (!S_ISDIR(old_mode))
288                         GOTO(out_err, err = -ENOTDIR);
289
290                 /* Fixup directory permissions if necessary */
291                 if (fix && (old_mode & S_IALLUGO) != (mode & S_IALLUGO)) {
292                         CWARN("fixing permissions on %s from %o to %o\n",
293                               name, old_mode, mode);
294                         dchild->d_inode->i_mode = (mode & S_IALLUGO) |
295                                                   (old_mode & ~S_IALLUGO);
296                         mark_inode_dirty(dchild->d_inode);
297                 }
298                 GOTO(out_up, dchild);
299         }
300
301         err = vfs_mkdir(dir->d_inode, dchild, mode);
302         if (err)
303                 GOTO(out_err, err);
304
305         RETURN(dchild);
306
307 out_err:
308         dput(dchild);
309         dchild = ERR_PTR(err);
310 out_up:
311         return dchild;
312 }
313 EXPORT_SYMBOL(simple_mkdir);
314
315 /*
316  * Read a file from within kernel context.  Prior to calling this
317  * function we should already have done a push_ctxt().
318  */
319 int lustre_fread(struct file *file, void *buf, int len, loff_t *off)
320 {
321         ASSERT_KERNEL_CTXT("kernel doing read outside kernel context\n");
322         if (!file || !file->f_op || !file->f_op->read || !off)
323                 RETURN(-ENOSYS);
324
325         return file->f_op->read(file, buf, len, off);
326 }
327 EXPORT_SYMBOL(lustre_fread);
328
329 /*
330  * Write a file from within kernel context.  Prior to calling this
331  * function we should already have done a push_ctxt().
332  */
333 int lustre_fwrite(struct file *file, const void *buf, int len, loff_t *off)
334 {
335         ENTRY;
336         ASSERT_KERNEL_CTXT("kernel doing write outside kernel context\n");
337         if (!file)
338                 RETURN(-ENOENT);
339         if (!file->f_op)
340                 RETURN(-ENOSYS);
341         if (!off)
342                 RETURN(-EINVAL);
343
344         if (!file->f_op->write)
345                 RETURN(-EROFS);
346
347         RETURN(file->f_op->write(file, buf, len, off));
348 }
349 EXPORT_SYMBOL(lustre_fwrite);
350
351 /*
352  * Sync a file from within kernel context.  Prior to calling this
353  * function we should already have done a push_ctxt().
354  */
355 int lustre_fsync(struct file *file)
356 {
357         ENTRY;
358         ASSERT_KERNEL_CTXT("kernel doing sync outside kernel context\n");
359         if (!file || !file->f_op || !file->f_op->fsync)
360                 RETURN(-ENOSYS);
361
362         RETURN(file->f_op->fsync(file, file->f_dentry, 0));
363 }
364 EXPORT_SYMBOL(lustre_fsync);
365
366 struct l_file *l_dentry_open(struct lvfs_run_ctxt *ctxt, struct l_dentry *de,
367                              int flags)
368 {
369         mntget(ctxt->pwdmnt);
370         return dentry_open(de, ctxt->pwdmnt, flags);
371 }
372 EXPORT_SYMBOL(l_dentry_open);
373
374 static int l_filldir(void *__buf, const char *name, int namlen, loff_t offset,
375                      ino_t ino, unsigned int d_type)
376 {
377         struct l_linux_dirent *dirent;
378         struct l_readdir_callback *buf = (struct l_readdir_callback *)__buf;
379         
380         dirent = buf->lrc_dirent;
381         if (dirent)
382                dirent->lld_off = offset; 
383
384         OBD_ALLOC(dirent, sizeof(*dirent));
385
386         list_add_tail(&dirent->lld_list, buf->lrc_list);
387
388         buf->lrc_dirent = dirent;
389         dirent->lld_ino = ino;
390         LASSERT(sizeof(dirent->lld_name) >= namlen + 1);
391         memcpy(dirent->lld_name, name, namlen);
392
393         return 0;
394 }
395
396 long l_readdir(struct file *file, struct list_head *dentry_list)
397 {
398         struct l_linux_dirent *lastdirent;
399         struct l_readdir_callback buf;
400         int error;
401
402         buf.lrc_dirent = NULL;
403         buf.lrc_list = dentry_list; 
404
405         error = vfs_readdir(file, l_filldir, &buf);
406         if (error < 0)
407                 return error;
408
409         lastdirent = buf.lrc_dirent;
410         if (lastdirent)
411                 lastdirent->lld_off = file->f_pos;
412
413         return 0; 
414 }
415 EXPORT_SYMBOL(l_readdir);
416 EXPORT_SYMBOL(obd_memory);
417 EXPORT_SYMBOL(obd_memmax);
418
419 static int __init lvfs_linux_init(void)
420 {
421         RETURN(0);
422 }
423
424 static void __exit lvfs_linux_exit(void)
425 {
426         int leaked;
427         ENTRY;
428
429         leaked = atomic_read(&obd_memory);
430         CDEBUG(leaked ? D_ERROR : D_INFO,
431                "obd mem max: %d leaked: %d\n", obd_memmax, leaked);
432
433         return;
434 }
435
436 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
437 MODULE_DESCRIPTION("Lustre VFS Filesystem Helper v0.1");
438 MODULE_LICENSE("GPL");
439
440 module_init(lvfs_linux_init);
441 module_exit(lvfs_linux_exit);