1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * lustre/lib/lvfs_linux.c
5 * Lustre filesystem abstraction routines
7 * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
8 * Author: Andreas Dilger <adilger@clusterfs.com>
10 * This file is part of Lustre, http://www.lustre.org.
12 * Lustre is free software; you can redistribute it and/or
13 * modify it under the terms of version 2 of the GNU General Public
14 * License as published by the Free Software Foundation.
16 * Lustre is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with Lustre; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27 # define EXPORT_SYMTAB
30 #define DEBUG_SUBSYSTEM S_FILTER
32 #include <linux/version.h>
34 #include <asm/unistd.h>
35 #include <linux/slab.h>
36 #include <linux/pagemap.h>
37 #include <linux/quotaops.h>
38 #include <linux/version.h>
39 #include <libcfs/kp30.h>
40 #include <linux/lustre_fsfilt.h>
41 #include <linux/obd.h>
42 #include <linux/obd_class.h>
43 #include <linux/module.h>
44 #include <linux/init.h>
45 #include <linux/lustre_compat25.h>
46 #include <linux/lvfs.h>
47 #include "lvfs_internal.h"
49 #include <linux/obd.h>
50 #include <linux/lustre_lib.h>
51 #include <linux/lustre_mds.h> /* for mds_grp_hash_entry */
56 /* Debugging check only needed during development */
58 # define ASSERT_CTXT_MAGIC(magic) LASSERT((magic) == OBD_RUN_CTXT_MAGIC)
59 # define ASSERT_NOT_KERNEL_CTXT(msg) LASSERTF(!segment_eq(get_fs(), get_ds()),\
61 # define ASSERT_KERNEL_CTXT(msg) LASSERTF(segment_eq(get_fs(), get_ds()), msg)
64 # define ASSERT_CTXT_MAGIC(magic) do {} while(0)
65 # define ASSERT_NOT_KERNEL_CTXT(msg) do {} while(0)
66 # define ASSERT_KERNEL_CTXT(msg) do {} while(0)
69 static void push_group_info(struct lvfs_run_ctxt *save,
70 struct group_info *ginfo)
73 save->ngroups = current_ngroups;
76 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
78 save->group_info = current->group_info;
79 current->group_info = ginfo;
82 LASSERT(ginfo->ngroups <= NGROUPS);
84 save->group_info.ngroups = current->ngroups;
86 memcpy(save->group_info.small_block, current->groups,
89 current->ngroups = ginfo->ngroups;
91 memcpy(current->groups, ginfo->small_block,
97 static void pop_group_info(struct lvfs_run_ctxt *save,
98 struct group_info *ginfo)
101 current_ngroups = save->ngroups;
103 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
105 current->group_info = save->group_info;
106 task_unlock(current);
108 current->ngroups = ginfo->ngroups;
109 if (current->ngroups)
110 memcpy(current->groups, save->group_info.small_block,
116 /* push / pop to root of obd store */
117 void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx,
118 struct lvfs_ucred *uc)
120 //ASSERT_NOT_KERNEL_CTXT("already in kernel context!\n");
121 ASSERT_CTXT_MAGIC(new_ctx->magic);
122 LASSERT(save->magic != OBD_RUN_CTXT_MAGIC || save->pid != current->pid);
123 OBD_SET_CTXT_MAGIC(save);
124 save->pid = current->pid;
128 "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n",
129 save, current, current->fs, current->fs->pwd,
130 atomic_read(¤t->fs->pwd->d_count),
131 atomic_read(¤t->fs->pwd->d_inode->i_count),
132 current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
134 atomic_read(¤t->fs->pwdmnt->mnt_count));
138 LASSERT(atomic_read(¤t->fs->pwd->d_count));
139 LASSERT(atomic_read(&new_ctx->pwd->d_count));
140 save->pwd = dget(current->fs->pwd);
141 save->pwdmnt = mntget(current->fs->pwdmnt);
142 save->ngroups = current_ngroups;
143 save->luc.luc_umask = current->fs->umask;
146 LASSERT(save->pwdmnt);
147 LASSERT(new_ctx->pwd);
148 LASSERT(new_ctx->pwdmnt);
151 save->luc.luc_uid = current->uid;
152 save->luc.luc_gid = current->gid;
153 save->luc.luc_fsuid = current->fsuid;
154 save->luc.luc_fsgid = current->fsgid;
155 save->luc.luc_cap = current->cap_effective;
156 save->luc.luc_nid = current->user->nid;
158 current->uid = uc->luc_uid;
159 current->gid = uc->luc_gid;
160 current->fsuid = uc->luc_fsuid;
161 current->fsgid = uc->luc_fsgid;
162 current->cap_effective = uc->luc_cap;
163 current->user->nid = uc->luc_nid;
165 push_group_info(save, uc->luc_ginfo);
167 current->fs->umask = 0; /* umask already applied on client */
169 set_fs_pwd(current->fs, new_ctx->pwdmnt, new_ctx->pwd);
173 "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n",
174 new_ctx, current, current->fs, current->fs->pwd,
175 atomic_read(¤t->fs->pwd->d_count),
176 atomic_read(¤t->fs->pwd->d_inode->i_count),
177 current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
179 atomic_read(¤t->fs->pwdmnt->mnt_count));
182 EXPORT_SYMBOL(push_ctxt);
184 void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx,
185 struct lvfs_ucred *uc)
188 ASSERT_CTXT_MAGIC(saved->magic);
189 LASSERT(saved->pid == current->pid);
193 ASSERT_KERNEL_CTXT("popping non-kernel context!\n");
197 " = pop %p==%p = cur %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n",
198 new_ctx, current, current->fs, current->fs->pwd,
199 atomic_read(¤t->fs->pwd->d_count),
200 atomic_read(¤t->fs->pwd->d_inode->i_count),
201 current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
203 atomic_read(¤t->fs->pwdmnt->mnt_count));
206 LASSERT(current->fs->pwd == new_ctx->pwd);
207 LASSERT(current->fs->pwdmnt == new_ctx->pwdmnt);
210 set_fs_pwd(current->fs, saved->pwdmnt, saved->pwd);
213 mntput(saved->pwdmnt);
214 current->fs->umask = saved->luc.luc_umask;
216 current->uid = saved->luc.luc_uid;
217 current->gid = saved->luc.luc_gid;
218 current->fsuid = saved->luc.luc_fsuid;
219 current->fsgid = saved->luc.luc_fsgid;
220 current->cap_effective = saved->luc.luc_cap;
221 current->user->nid = saved->luc.luc_nid;
222 pop_group_info(saved, uc->luc_ginfo);
227 "= pop %p->%p = cur fs %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n",
228 saved, current, current->fs, current->fs->pwd,
229 atomic_read(¤t->fs->pwd->d_count),
230 atomic_read(¤t->fs->pwd->d_inode->i_count),
231 current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
233 atomic_read(¤t->fs->pwdmnt->mnt_count));
236 EXPORT_SYMBOL(pop_ctxt);
238 /* utility to make a file */
239 struct dentry *simple_mknod(struct dentry *dir, char *name, int mode, int fix)
241 struct dentry *dchild;
245 ASSERT_KERNEL_CTXT("kernel doing mknod outside kernel context\n");
246 CDEBUG(D_INODE, "creating file %.*s\n", (int)strlen(name), name);
248 dchild = ll_lookup_one_len(name, dir, strlen(name));
250 GOTO(out_up, dchild);
252 if (dchild->d_inode) {
253 int old_mode = dchild->d_inode->i_mode;
254 if (!S_ISREG(old_mode))
255 GOTO(out_err, err = -EEXIST);
257 /* Fixup file permissions if necessary */
258 if (fix && (old_mode & S_IALLUGO) != (mode & S_IALLUGO)) {
259 CWARN("fixing permissions on %s from %o to %o\n",
260 name, old_mode, mode);
261 dchild->d_inode->i_mode = (mode & S_IALLUGO) |
262 (old_mode & ~S_IALLUGO);
263 mark_inode_dirty(dchild->d_inode);
265 GOTO(out_up, dchild);
268 err = ll_vfs_create(dir->d_inode, dchild, (mode & ~S_IFMT) | S_IFREG,
277 dchild = ERR_PTR(err);
281 EXPORT_SYMBOL(simple_mknod);
283 /* utility to make a directory */
284 struct dentry *simple_mkdir(struct dentry *dir, char *name, int mode, int fix)
286 struct dentry *dchild;
290 ASSERT_KERNEL_CTXT("kernel doing mkdir outside kernel context\n");
291 CDEBUG(D_INODE, "creating directory %.*s\n", (int)strlen(name), name);
292 dchild = ll_lookup_one_len(name, dir, strlen(name));
294 GOTO(out_up, dchild);
296 if (dchild->d_inode) {
297 int old_mode = dchild->d_inode->i_mode;
298 if (!S_ISDIR(old_mode)) {
299 CERROR("found %s (%lu/%u) is mode %o\n", name,
300 dchild->d_inode->i_ino,
301 dchild->d_inode->i_generation, old_mode);
302 GOTO(out_err, err = -ENOTDIR);
305 /* Fixup directory permissions if necessary */
306 if (fix && (old_mode & S_IALLUGO) != (mode & S_IALLUGO)) {
307 CWARN("fixing permissions on %s from %o to %o\n",
308 name, old_mode, mode);
309 dchild->d_inode->i_mode = (mode & S_IALLUGO) |
310 (old_mode & ~S_IALLUGO);
311 mark_inode_dirty(dchild->d_inode);
313 GOTO(out_up, dchild);
316 err = vfs_mkdir(dir->d_inode, dchild, mode);
324 dchild = ERR_PTR(err);
328 EXPORT_SYMBOL(simple_mkdir);
331 * Read a file from within kernel context. Prior to calling this
332 * function we should already have done a push_ctxt().
334 int lustre_fread(struct file *file, void *buf, int len, loff_t *off)
336 ASSERT_KERNEL_CTXT("kernel doing read outside kernel context\n");
337 if (!file || !file->f_op || !file->f_op->read || !off)
340 return file->f_op->read(file, buf, len, off);
342 EXPORT_SYMBOL(lustre_fread);
345 * Write a file from within kernel context. Prior to calling this
346 * function we should already have done a push_ctxt().
348 int lustre_fwrite(struct file *file, const void *buf, int len, loff_t *off)
351 ASSERT_KERNEL_CTXT("kernel doing write outside kernel context\n");
359 if (!file->f_op->write)
362 RETURN(file->f_op->write(file, buf, len, off));
364 EXPORT_SYMBOL(lustre_fwrite);
367 * Sync a file from within kernel context. Prior to calling this
368 * function we should already have done a push_ctxt().
370 int lustre_fsync(struct file *file)
373 ASSERT_KERNEL_CTXT("kernel doing sync outside kernel context\n");
374 if (!file || !file->f_op || !file->f_op->fsync)
377 RETURN(file->f_op->fsync(file, file->f_dentry, 0));
379 EXPORT_SYMBOL(lustre_fsync);
381 struct l_file *l_dentry_open(struct lvfs_run_ctxt *ctxt, struct l_dentry *de,
384 mntget(ctxt->pwdmnt);
385 return dentry_open(de, ctxt->pwdmnt, flags);
387 EXPORT_SYMBOL(l_dentry_open);
389 static int l_filldir(void *__buf, const char *name, int namlen, loff_t offset,
390 ino_t ino, unsigned int d_type)
392 struct l_linux_dirent *dirent;
393 struct l_readdir_callback *buf = (struct l_readdir_callback *)__buf;
395 dirent = buf->lrc_dirent;
397 dirent->lld_off = offset;
399 OBD_ALLOC(dirent, sizeof(*dirent));
403 list_add_tail(&dirent->lld_list, buf->lrc_list);
405 buf->lrc_dirent = dirent;
406 dirent->lld_ino = ino;
407 LASSERT(sizeof(dirent->lld_name) >= namlen + 1);
408 memcpy(dirent->lld_name, name, namlen);
413 long l_readdir(struct file *file, struct list_head *dentry_list)
415 struct l_linux_dirent *lastdirent;
416 struct l_readdir_callback buf;
419 buf.lrc_dirent = NULL;
420 buf.lrc_list = dentry_list;
422 error = vfs_readdir(file, l_filldir, &buf);
426 lastdirent = buf.lrc_dirent;
428 lastdirent->lld_off = file->f_pos;
432 EXPORT_SYMBOL(l_readdir);
433 EXPORT_SYMBOL(obd_memory);
434 EXPORT_SYMBOL(obd_memmax);
436 #if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__)
437 static spinlock_t obd_memlist_lock = SPIN_LOCK_UNLOCKED;
438 static struct hlist_head *obd_memtable;
439 static unsigned long obd_memtable_size;
441 static int lvfs_memdbg_init(int size)
443 struct hlist_head *head;
446 LASSERT(size > sizeof(sizeof(struct hlist_head)));
447 obd_memtable_size = size / sizeof(struct hlist_head);
449 CWARN("allocating %lu malloc entries\n",
450 (unsigned long)obd_memtable_size);
452 obd_memtable = kmalloc(size, GFP_KERNEL);
456 i = obd_memtable_size;
459 INIT_HLIST_HEAD(head);
467 static int lvfs_memdbg_cleanup(void)
469 struct hlist_node *node = NULL, *tmp = NULL;
470 struct hlist_head *head;
471 struct mem_track *mt;
474 spin_lock(&obd_memlist_lock);
475 for (i = 0, head = obd_memtable; i < obd_memtable_size; i++, head++) {
476 hlist_for_each_safe(node, tmp, head) {
477 mt = hlist_entry(node, struct mem_track, m_hash);
478 hlist_del_init(&mt->m_hash);
482 spin_unlock(&obd_memlist_lock);
487 static inline unsigned long const hashfn(void *ptr)
489 return (unsigned long)ptr &
490 (obd_memtable_size - 1);
493 static void __lvfs_memdbg_insert(struct mem_track *mt)
495 struct hlist_head *head = obd_memtable +
497 hlist_add_head(&mt->m_hash, head);
500 void lvfs_memdbg_insert(struct mem_track *mt)
502 spin_lock(&obd_memlist_lock);
503 __lvfs_memdbg_insert(mt);
504 spin_unlock(&obd_memlist_lock);
506 EXPORT_SYMBOL(lvfs_memdbg_insert);
508 static void __lvfs_memdbg_remove(struct mem_track *mt)
510 hlist_del_init(&mt->m_hash);
513 void lvfs_memdbg_remove(struct mem_track *mt)
515 spin_lock(&obd_memlist_lock);
516 __lvfs_memdbg_remove(mt);
517 spin_unlock(&obd_memlist_lock);
519 EXPORT_SYMBOL(lvfs_memdbg_remove);
521 static struct mem_track *__lvfs_memdbg_find(void *ptr)
523 struct hlist_node *node = NULL;
524 struct mem_track *mt = NULL;
525 struct hlist_head *head;
527 head = obd_memtable + hashfn(ptr);
529 hlist_for_each(node, head) {
530 mt = hlist_entry(node, struct mem_track, m_hash);
531 if ((unsigned long)mt->m_ptr == (unsigned long)ptr)
538 struct mem_track *lvfs_memdbg_find(void *ptr)
540 struct mem_track *mt;
542 spin_lock(&obd_memlist_lock);
543 mt = __lvfs_memdbg_find(ptr);
544 spin_unlock(&obd_memlist_lock);
548 EXPORT_SYMBOL(lvfs_memdbg_find);
550 int lvfs_memdbg_check_insert(struct mem_track *mt)
552 spin_lock(&obd_memlist_lock);
553 if (!__lvfs_memdbg_find(mt->m_ptr)) {
554 __lvfs_memdbg_insert(mt);
555 spin_unlock(&obd_memlist_lock);
558 spin_unlock(&obd_memlist_lock);
561 EXPORT_SYMBOL(lvfs_memdbg_check_insert);
564 lvfs_memdbg_check_remove(void *ptr)
566 struct mem_track *mt;
568 spin_lock(&obd_memlist_lock);
569 mt = __lvfs_memdbg_find(ptr);
571 __lvfs_memdbg_remove(mt);
572 spin_unlock(&obd_memlist_lock);
575 spin_unlock(&obd_memlist_lock);
578 EXPORT_SYMBOL(lvfs_memdbg_check_remove);
581 void lvfs_memdbg_show(void)
583 #if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__)
584 struct hlist_node *node = NULL;
585 struct hlist_head *head;
586 struct mem_track *mt;
590 #if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__)
594 leaked = atomic_read(&obd_memory);
597 CWARN("memory leaks detected (max %d, leaked %d)\n",
600 #if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__)
601 spin_lock(&obd_memlist_lock);
602 for (i = 0, head = obd_memtable; i < obd_memtable_size; i++, head++) {
603 hlist_for_each(node, head) {
604 mt = hlist_entry(node, struct mem_track, m_hash);
605 CWARN(" [%s] ptr: 0x%p, size: %d, src at \"%s\"\n",
606 ((mt->m_flags & MT_FLAGS_WRONG_SIZE) ?
607 "wrong ck size" : "leaked memory"),
608 mt->m_ptr, mt->m_size, mt->m_loc);
611 spin_unlock(&obd_memlist_lock);
613 /* remove for production */
614 portals_debug_dumplog();
617 EXPORT_SYMBOL(lvfs_memdbg_show);
619 static int __init lvfs_linux_init(void)
622 #if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__)
623 lvfs_memdbg_init(PAGE_SIZE);
625 lvfs_mount_list_init();
629 static void __exit lvfs_linux_exit(void)
633 lvfs_mount_list_cleanup();
636 #if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__)
637 lvfs_memdbg_cleanup();
643 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
644 MODULE_DESCRIPTION("Lustre VFS Filesystem Helper v0.1");
645 MODULE_LICENSE("GPL");
647 module_init(lvfs_linux_init);
648 module_exit(lvfs_linux_exit);