Whamcloud - gitweb
- landing of b_hd_cleanup_merge to HEAD.
[fs/lustre-release.git] / lustre / lvfs / lvfs_linux.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  lustre/lib/lvfs_linux.c
5  *  Lustre filesystem abstraction routines
6  *
7  *  Copyright (C) 2002, 2003 Cluster File Systems, Inc.
8  *   Author: Andreas Dilger <adilger@clusterfs.com>
9  *
10  *   This file is part of Lustre, http://www.lustre.org.
11  *
12  *   Lustre is free software; you can redistribute it and/or
13  *   modify it under the terms of version 2 of the GNU General Public
14  *   License as published by the Free Software Foundation.
15  *
16  *   Lustre is distributed in the hope that it will be useful,
17  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
18  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  *   GNU General Public License for more details.
20  *
21  *   You should have received a copy of the GNU General Public License
22  *   along with Lustre; if not, write to the Free Software
23  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24  */
25
26 #ifndef EXPORT_SYMTAB
27 # define EXPORT_SYMTAB
28 #endif
29
30 #define DEBUG_SUBSYSTEM S_FILTER
31
32 #include <linux/version.h>
33 #include <linux/fs.h>
34 #include <asm/unistd.h>
35 #include <linux/slab.h>
36 #include <linux/pagemap.h>
37 #include <linux/quotaops.h>
38 #include <linux/version.h>
39 #include <linux/kp30.h>
40 #include <linux/lustre_fsfilt.h>
41 #include <linux/obd.h>
42 #include <linux/obd_class.h>
43 #include <linux/module.h>
44 #include <linux/init.h>
45 #include <linux/lustre_compat25.h>
46 #include <linux/lvfs.h>
47 #include "lvfs_internal.h"
48
49 #include <linux/obd.h>
50 #include <linux/lustre_lib.h>
51 #include <linux/lustre_mds.h>   /* for mds_grp_hash_entry */
52
53 atomic_t obd_memory;
54 int obd_memmax;
55
56
57 /* Debugging check only needed during development */
58 #ifdef OBD_CTXT_DEBUG
59 # define ASSERT_CTXT_MAGIC(magic) LASSERT((magic) == OBD_RUN_CTXT_MAGIC)
60 # define ASSERT_NOT_KERNEL_CTXT(msg) LASSERTF(!segment_eq(get_fs(), get_ds()),\
61                                               msg)
62 # define ASSERT_KERNEL_CTXT(msg) LASSERTF(segment_eq(get_fs(), get_ds()), msg)
63
64 #else
65 # define ASSERT_CTXT_MAGIC(magic) do {} while(0)
66 # define ASSERT_NOT_KERNEL_CTXT(msg) do {} while(0)
67 # define ASSERT_KERNEL_CTXT(msg) do {} while(0)
68 #endif
69
70 static void push_group_info(struct lvfs_run_ctxt *save,
71                             struct group_info *ginfo)
72 {
73         if (!ginfo) {
74                 save->ngroups = current_ngroups;
75                 current_ngroups = 0;
76         } else {
77 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
78                 task_lock(current);
79                 save->group_info = current->group_info;
80                 current->group_info = ginfo;
81                 task_unlock(current);
82 #else
83                 LASSERT(ginfo->ngroups <= NGROUPS);
84                 /* save old */
85                 save->group_info.ngroups = current->ngroups;
86                 if (current->ngroups)
87                         memcpy(save->group_info.small_block, current->groups,
88                                current->ngroups);
89                 /* push new */
90                 current->ngroups = ginfo->ngroups;
91                 if (ginfo->ngroups)
92                         memcpy(current->groups, ginfo->small_block,
93                                current->ngroups);
94 #endif
95         }
96 }
97
98 static void pop_group_info(struct lvfs_run_ctxt *save,
99                            struct group_info *ginfo)
100 {
101         if (!ginfo) {
102                 current_ngroups = save->ngroups;
103         } else {
104 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
105                 task_lock(current);
106                 current->group_info = save->group_info;
107                 task_unlock(current);
108 #else
109                 current->ngroups = ginfo->ngroups;
110                 if (current->ngroups)
111                         memcpy(current->groups, save->group_info.small_block,
112                                current->ngroups);
113 #endif
114         }
115 }
116
117 /* push / pop to root of obd store */
118 void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx,
119                struct lvfs_ucred *uc)
120 {
121         //ASSERT_NOT_KERNEL_CTXT("already in kernel context!\n");
122         ASSERT_CTXT_MAGIC(new_ctx->magic);
123         LASSERT(save->magic != OBD_RUN_CTXT_MAGIC || save->pid != current->pid);
124         OBD_SET_CTXT_MAGIC(save);
125         save->pid = current->pid;
126
127         /*
128         CDEBUG(D_INFO,
129                "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n",
130                save, current, current->fs, current->fs->pwd,
131                atomic_read(&current->fs->pwd->d_count),
132                atomic_read(&current->fs->pwd->d_inode->i_count),
133                current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
134                current->fs->pwdmnt,
135                atomic_read(&current->fs->pwdmnt->mnt_count));
136         */
137
138         save->fs = get_fs();
139         LASSERT(atomic_read(&current->fs->pwd->d_count));
140         LASSERT(atomic_read(&new_ctx->pwd->d_count));
141         save->pwd = dget(current->fs->pwd);
142         save->pwdmnt = mntget(current->fs->pwdmnt);
143         save->ngroups = current_ngroups;
144         save->luc.luc_umask = current->fs->umask;
145
146         LASSERT(save->pwd);
147         LASSERT(save->pwdmnt);
148         LASSERT(new_ctx->pwd);
149         LASSERT(new_ctx->pwdmnt);
150
151         if (uc) {
152                 save->luc.luc_fsuid = current->fsuid;
153                 save->luc.luc_fsgid = current->fsgid;
154                 save->luc.luc_cap = current->cap_effective;
155
156                 current->fsuid = uc->luc_fsuid;
157                 current->fsgid = uc->luc_fsgid;
158                 current->cap_effective = uc->luc_cap;
159
160                 push_group_info(save, uc->luc_ginfo);
161         }
162         current->fs->umask = 0; /* umask already applied on client */
163         set_fs(new_ctx->fs);
164         set_fs_pwd(current->fs, new_ctx->pwdmnt, new_ctx->pwd);
165
166         /*
167         CDEBUG(D_INFO,
168                "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n",
169                new_ctx, current, current->fs, current->fs->pwd,
170                atomic_read(&current->fs->pwd->d_count),
171                atomic_read(&current->fs->pwd->d_inode->i_count),
172                current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
173                current->fs->pwdmnt,
174                atomic_read(&current->fs->pwdmnt->mnt_count));
175         */
176 }
177 EXPORT_SYMBOL(push_ctxt);
178
179 void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx,
180               struct lvfs_ucred *uc)
181 {
182         //printk("pc0");
183         ASSERT_CTXT_MAGIC(saved->magic);
184         LASSERT(saved->pid == current->pid);
185         saved->magic = 0;
186         saved->pid = 0;
187         //printk("pc1");
188         ASSERT_KERNEL_CTXT("popping non-kernel context!\n");
189
190         /*
191         CDEBUG(D_INFO,
192                " = pop  %p==%p = cur %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n",
193                new_ctx, current, current->fs, current->fs->pwd,
194                atomic_read(&current->fs->pwd->d_count),
195                atomic_read(&current->fs->pwd->d_inode->i_count),
196                current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
197                current->fs->pwdmnt,
198                atomic_read(&current->fs->pwdmnt->mnt_count));
199         */
200
201         LASSERT(current->fs->pwd == new_ctx->pwd);
202         LASSERT(current->fs->pwdmnt == new_ctx->pwdmnt);
203
204         set_fs(saved->fs);
205         set_fs_pwd(current->fs, saved->pwdmnt, saved->pwd);
206
207         dput(saved->pwd);
208         mntput(saved->pwdmnt);
209         current->fs->umask = saved->luc.luc_umask;
210         if (uc) {
211                 current->fsuid = saved->luc.luc_fsuid;
212                 current->fsgid = saved->luc.luc_fsgid;
213                 current->cap_effective = saved->luc.luc_cap;
214
215                 pop_group_info(saved, uc->luc_ginfo);
216         }
217
218         /*
219         CDEBUG(D_INFO,
220                "= pop  %p->%p = cur fs %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n",
221                saved, current, current->fs, current->fs->pwd,
222                atomic_read(&current->fs->pwd->d_count),
223                atomic_read(&current->fs->pwd->d_inode->i_count),
224                current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
225                current->fs->pwdmnt,
226                atomic_read(&current->fs->pwdmnt->mnt_count));
227         */
228 }
229 EXPORT_SYMBOL(pop_ctxt);
230
231 /* utility to make a file */
232 struct dentry *simple_mknod(struct dentry *dir, char *name, int mode, int fix)
233 {
234         struct dentry *dchild;
235         int err = 0;
236         ENTRY;
237
238         ASSERT_KERNEL_CTXT("kernel doing mknod outside kernel context\n");
239         CDEBUG(D_INODE, "creating file %*s\n", (int)strlen(name), name);
240
241         dchild = ll_lookup_one_len(name, dir, strlen(name));
242         if (IS_ERR(dchild))
243                 GOTO(out_up, dchild);
244
245         if (dchild->d_inode) {
246                 int old_mode = dchild->d_inode->i_mode;
247                 if (!S_ISREG(old_mode))
248                         GOTO(out_err, err = -EEXIST);
249
250                 /* Fixup file permissions if necessary */
251                 if (fix && (old_mode & S_IALLUGO) != (mode & S_IALLUGO)) {
252                         CWARN("fixing permissions on %s from %o to %o\n",
253                               name, old_mode, mode);
254                         dchild->d_inode->i_mode = (mode & S_IALLUGO) |
255                                                   (old_mode & ~S_IALLUGO);
256                         mark_inode_dirty(dchild->d_inode);
257                 }
258                 GOTO(out_up, dchild);
259         }
260
261         err = ll_vfs_create(dir->d_inode, dchild, (mode & ~S_IFMT) | S_IFREG,
262                             NULL);
263         if (err)
264                 GOTO(out_err, err);
265
266         RETURN(dchild);
267
268 out_err:
269         dput(dchild);
270         dchild = ERR_PTR(err);
271 out_up:
272         return dchild;
273 }
274 EXPORT_SYMBOL(simple_mknod);
275
276 /* utility to make a directory */
277 struct dentry *simple_mkdir(struct dentry *dir, char *name, int mode, int fix)
278 {
279         struct dentry *dchild;
280         int err = 0;
281         ENTRY;
282
283         ASSERT_KERNEL_CTXT("kernel doing mkdir outside kernel context\n");
284         CDEBUG(D_INODE, "creating directory %*s\n", (int)strlen(name), name);
285         dchild = ll_lookup_one_len(name, dir, strlen(name));
286         if (IS_ERR(dchild))
287                 GOTO(out_up, dchild);
288
289         if (dchild->d_inode) {
290                 int old_mode = dchild->d_inode->i_mode;
291                 if (!S_ISDIR(old_mode))
292                         GOTO(out_err, err = -ENOTDIR);
293
294                 /* Fixup directory permissions if necessary */
295                 if (fix && (old_mode & S_IALLUGO) != (mode & S_IALLUGO)) {
296                         CWARN("fixing permissions on %s from %o to %o\n",
297                               name, old_mode, mode);
298                         dchild->d_inode->i_mode = (mode & S_IALLUGO) |
299                                                   (old_mode & ~S_IALLUGO);
300                         mark_inode_dirty(dchild->d_inode);
301                 }
302                 GOTO(out_up, dchild);
303         }
304
305         err = vfs_mkdir(dir->d_inode, dchild, mode);
306         if (err)
307                 GOTO(out_err, err);
308
309         RETURN(dchild);
310
311 out_err:
312         dput(dchild);
313         dchild = ERR_PTR(err);
314 out_up:
315         return dchild;
316 }
317 EXPORT_SYMBOL(simple_mkdir);
318
319 /*
320  * Read a file from within kernel context.  Prior to calling this
321  * function we should already have done a push_ctxt().
322  */
323 int lustre_fread(struct file *file, void *buf, int len, loff_t *off)
324 {
325         ASSERT_KERNEL_CTXT("kernel doing read outside kernel context\n");
326         if (!file || !file->f_op || !file->f_op->read || !off)
327                 RETURN(-ENOSYS);
328
329         return file->f_op->read(file, buf, len, off);
330 }
331 EXPORT_SYMBOL(lustre_fread);
332
333 /*
334  * Write a file from within kernel context.  Prior to calling this
335  * function we should already have done a push_ctxt().
336  */
337 int lustre_fwrite(struct file *file, const void *buf, int len, loff_t *off)
338 {
339         ENTRY;
340         ASSERT_KERNEL_CTXT("kernel doing write outside kernel context\n");
341         if (!file)
342                 RETURN(-ENOENT);
343         if (!file->f_op)
344                 RETURN(-ENOSYS);
345         if (!off)
346                 RETURN(-EINVAL);
347
348         if (!file->f_op->write)
349                 RETURN(-EROFS);
350
351         RETURN(file->f_op->write(file, buf, len, off));
352 }
353 EXPORT_SYMBOL(lustre_fwrite);
354
355 /*
356  * Sync a file from within kernel context.  Prior to calling this
357  * function we should already have done a push_ctxt().
358  */
359 int lustre_fsync(struct file *file)
360 {
361         ENTRY;
362         ASSERT_KERNEL_CTXT("kernel doing sync outside kernel context\n");
363         if (!file || !file->f_op || !file->f_op->fsync)
364                 RETURN(-ENOSYS);
365
366         RETURN(file->f_op->fsync(file, file->f_dentry, 0));
367 }
368 EXPORT_SYMBOL(lustre_fsync);
369
370 struct l_file *l_dentry_open(struct lvfs_run_ctxt *ctxt, struct l_dentry *de,
371                              int flags)
372 {
373         mntget(ctxt->pwdmnt);
374         return dentry_open(de, ctxt->pwdmnt, flags);
375 }
376 EXPORT_SYMBOL(l_dentry_open);
377
378 static int l_filldir(void *__buf, const char *name, int namlen, loff_t offset,
379                      ino_t ino, unsigned int d_type)
380 {
381         struct l_linux_dirent *dirent;
382         struct l_readdir_callback *buf = (struct l_readdir_callback *)__buf;
383         
384         dirent = buf->lrc_dirent;
385         if (dirent)
386                dirent->lld_off = offset; 
387
388         OBD_ALLOC(dirent, sizeof(*dirent));
389
390         list_add_tail(&dirent->lld_list, buf->lrc_list);
391
392         buf->lrc_dirent = dirent;
393         dirent->lld_ino = ino;
394         LASSERT(sizeof(dirent->lld_name) >= namlen + 1);
395         memcpy(dirent->lld_name, name, namlen);
396
397         return 0;
398 }
399
400 long l_readdir(struct file *file, struct list_head *dentry_list)
401 {
402         struct l_linux_dirent *lastdirent;
403         struct l_readdir_callback buf;
404         int error;
405
406         buf.lrc_dirent = NULL;
407         buf.lrc_list = dentry_list; 
408
409         error = vfs_readdir(file, l_filldir, &buf);
410         if (error < 0)
411                 return error;
412
413         lastdirent = buf.lrc_dirent;
414         if (lastdirent)
415                 lastdirent->lld_off = file->f_pos;
416
417         return 0; 
418 }
419 EXPORT_SYMBOL(l_readdir);
420 EXPORT_SYMBOL(obd_memory);
421 EXPORT_SYMBOL(obd_memmax);
422
423 static int __init lvfs_linux_init(void)
424 {
425         RETURN(0);
426 }
427
428 static void __exit lvfs_linux_exit(void)
429 {
430         int leaked;
431         ENTRY;
432
433         leaked = atomic_read(&obd_memory);
434         CDEBUG(leaked ? D_ERROR : D_INFO,
435                "obd mem max: %d leaked: %d\n", obd_memmax, leaked);
436
437         return;
438 }
439
440 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
441 MODULE_DESCRIPTION("Lustre VFS Filesystem Helper v0.1");
442 MODULE_LICENSE("GPL");
443
444 module_init(lvfs_linux_init);
445 module_exit(lvfs_linux_exit);