1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * lustre/lib/lvfs_linux.c
5 * Lustre filesystem abstraction routines
7 * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
8 * Author: Andreas Dilger <adilger@clusterfs.com>
10 * This file is part of Lustre, http://www.lustre.org.
12 * Lustre is free software; you can redistribute it and/or
13 * modify it under the terms of version 2 of the GNU General Public
14 * License as published by the Free Software Foundation.
16 * Lustre is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with Lustre; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27 # define EXPORT_SYMTAB
30 #define DEBUG_SUBSYSTEM S_FILTER
32 #include <linux/version.h>
34 #include <asm/unistd.h>
35 #include <linux/slab.h>
36 #include <linux/pagemap.h>
37 #include <linux/quotaops.h>
38 #include <linux/version.h>
39 #include <libcfs/kp30.h>
40 #include <lustre_fsfilt.h>
42 #include <obd_class.h>
43 #include <linux/module.h>
44 #include <linux/init.h>
45 #include <linux/lustre_compat25.h>
47 #include "lvfs_internal.h"
50 #include <lustre_lib.h>
51 #include <lustre_quota.h>
56 /* Debugging check only needed during development */
58 # define ASSERT_CTXT_MAGIC(magic) LASSERT((magic) == OBD_RUN_CTXT_MAGIC)
59 # define ASSERT_NOT_KERNEL_CTXT(msg) LASSERTF(!segment_eq(get_fs(), get_ds()),\
61 # define ASSERT_KERNEL_CTXT(msg) LASSERTF(segment_eq(get_fs(), get_ds()), msg)
63 # define ASSERT_CTXT_MAGIC(magic) do {} while(0)
64 # define ASSERT_NOT_KERNEL_CTXT(msg) do {} while(0)
65 # define ASSERT_KERNEL_CTXT(msg) do {} while(0)
68 static void push_group_info(struct lvfs_run_ctxt *save,
69 struct upcall_cache_entry *uce)
71 struct group_info *ginfo = uce ? uce->ue_group_info : NULL;
74 save->ngroups = current_ngroups;
77 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
79 save->group_info = current->group_info;
80 current->group_info = ginfo;
83 LASSERT(ginfo->ngroups <= NGROUPS);
84 LASSERT(current->ngroups <= NGROUPS_SMALL);
86 save->group_info.ngroups = current->ngroups;
88 memcpy(save->group_info.small_block, current->groups,
89 current->ngroups * sizeof(gid_t));
91 current->ngroups = ginfo->ngroups;
93 memcpy(current->groups, ginfo->small_block,
94 current->ngroups * sizeof(gid_t));
99 static void pop_group_info(struct lvfs_run_ctxt *save,
100 struct upcall_cache_entry *uce)
102 struct group_info *ginfo = uce ? uce->ue_group_info : NULL;
105 current_ngroups = save->ngroups;
107 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
109 current->group_info = save->group_info;
110 task_unlock(current);
112 current->ngroups = save->group_info.ngroups;
113 if (current->ngroups)
114 memcpy(current->groups, save->group_info.small_block,
115 current->ngroups * sizeof(gid_t));
120 /* push / pop to root of obd store */
121 void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx,
122 struct lvfs_ucred *uc)
124 //ASSERT_NOT_KERNEL_CTXT("already in kernel context!\n");
125 ASSERT_CTXT_MAGIC(new_ctx->magic);
126 OBD_SET_CTXT_MAGIC(save);
130 "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n",
131 save, current, current->fs, current->fs->pwd,
132 atomic_read(¤t->fs->pwd->d_count),
133 atomic_read(¤t->fs->pwd->d_inode->i_count),
134 current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
136 atomic_read(¤t->fs->pwdmnt->mnt_count));
140 LASSERT(atomic_read(¤t->fs->pwd->d_count));
141 LASSERT(atomic_read(&new_ctx->pwd->d_count));
142 save->pwd = dget(current->fs->pwd);
143 save->pwdmnt = mntget(current->fs->pwdmnt);
144 save->luc.luc_umask = current->fs->umask;
147 LASSERT(save->pwdmnt);
148 LASSERT(new_ctx->pwd);
149 LASSERT(new_ctx->pwdmnt);
152 save->luc.luc_fsuid = current->fsuid;
153 save->luc.luc_fsgid = current->fsgid;
154 save->luc.luc_cap = current->cap_effective;
156 current->fsuid = uc->luc_fsuid;
157 current->fsgid = uc->luc_fsgid;
158 current->cap_effective = uc->luc_cap;
159 push_group_info(save, uc->luc_uce);
161 current->fs->umask = 0; /* umask already applied on client */
163 ll_set_fs_pwd(current->fs, new_ctx->pwdmnt, new_ctx->pwd);
167 "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n",
168 new_ctx, current, current->fs, current->fs->pwd,
169 atomic_read(¤t->fs->pwd->d_count),
170 atomic_read(¤t->fs->pwd->d_inode->i_count),
171 current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
173 atomic_read(¤t->fs->pwdmnt->mnt_count));
176 EXPORT_SYMBOL(push_ctxt);
178 void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx,
179 struct lvfs_ucred *uc)
182 ASSERT_CTXT_MAGIC(saved->magic);
184 ASSERT_KERNEL_CTXT("popping non-kernel context!\n");
188 " = pop %p==%p = cur %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n",
189 new_ctx, current, current->fs, current->fs->pwd,
190 atomic_read(¤t->fs->pwd->d_count),
191 atomic_read(¤t->fs->pwd->d_inode->i_count),
192 current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
194 atomic_read(¤t->fs->pwdmnt->mnt_count));
197 LASSERTF(current->fs->pwd == new_ctx->pwd, "%p != %p\n",
198 current->fs->pwd, new_ctx->pwd);
199 LASSERTF(current->fs->pwdmnt == new_ctx->pwdmnt, "%p != %p\n",
200 current->fs->pwdmnt, new_ctx->pwdmnt);
203 ll_set_fs_pwd(current->fs, saved->pwdmnt, saved->pwd);
206 mntput(saved->pwdmnt);
207 current->fs->umask = saved->luc.luc_umask;
209 current->fsuid = saved->luc.luc_fsuid;
210 current->fsgid = saved->luc.luc_fsgid;
211 current->cap_effective = saved->luc.luc_cap;
212 pop_group_info(saved, uc->luc_uce);
217 "= pop %p->%p = cur fs %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n",
218 saved, current, current->fs, current->fs->pwd,
219 atomic_read(¤t->fs->pwd->d_count),
220 atomic_read(¤t->fs->pwd->d_inode->i_count),
221 current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
223 atomic_read(¤t->fs->pwdmnt->mnt_count));
226 EXPORT_SYMBOL(pop_ctxt);
228 /* utility to make a file */
229 struct dentry *simple_mknod(struct dentry *dir, char *name, int mode, int fix)
231 struct dentry *dchild;
235 ASSERT_KERNEL_CTXT("kernel doing mknod outside kernel context\n");
236 CDEBUG(D_INODE, "creating file %.*s\n", (int)strlen(name), name);
238 dchild = ll_lookup_one_len(name, dir, strlen(name));
240 GOTO(out_up, dchild);
242 if (dchild->d_inode) {
243 int old_mode = dchild->d_inode->i_mode;
244 if (!S_ISREG(old_mode))
245 GOTO(out_err, err = -EEXIST);
247 /* Fixup file permissions if necessary */
248 if (fix && (old_mode & S_IALLUGO) != (mode & S_IALLUGO)) {
249 CWARN("fixing permissions on %s from %o to %o\n",
250 name, old_mode, mode);
251 dchild->d_inode->i_mode = (mode & S_IALLUGO) |
252 (old_mode & ~S_IALLUGO);
253 mark_inode_dirty(dchild->d_inode);
255 GOTO(out_up, dchild);
258 err = ll_vfs_create(dir->d_inode, dchild, (mode & ~S_IFMT) | S_IFREG,
267 dchild = ERR_PTR(err);
271 EXPORT_SYMBOL(simple_mknod);
273 /* utility to make a directory */
274 struct dentry *simple_mkdir(struct dentry *dir, char *name, int mode, int fix)
276 struct dentry *dchild;
280 ASSERT_KERNEL_CTXT("kernel doing mkdir outside kernel context\n");
281 CDEBUG(D_INODE, "creating directory %.*s\n", (int)strlen(name), name);
282 dchild = ll_lookup_one_len(name, dir, strlen(name));
284 GOTO(out_up, dchild);
286 if (dchild->d_inode) {
287 int old_mode = dchild->d_inode->i_mode;
288 if (!S_ISDIR(old_mode)) {
289 CERROR("found %s (%lu/%u) is mode %o\n", name,
290 dchild->d_inode->i_ino,
291 dchild->d_inode->i_generation, old_mode);
292 GOTO(out_err, err = -ENOTDIR);
295 /* Fixup directory permissions if necessary */
296 if (fix && (old_mode & S_IALLUGO) != (mode & S_IALLUGO)) {
298 "fixing permissions on %s from %o to %o\n",
299 name, old_mode, mode);
300 dchild->d_inode->i_mode = (mode & S_IALLUGO) |
301 (old_mode & ~S_IALLUGO);
302 mark_inode_dirty(dchild->d_inode);
304 GOTO(out_up, dchild);
307 err = vfs_mkdir(dir->d_inode, dchild, mode);
315 dchild = ERR_PTR(err);
319 EXPORT_SYMBOL(simple_mkdir);
322 * Read a file from within kernel context. Prior to calling this
323 * function we should already have done a push_ctxt().
325 int lustre_fread(struct file *file, void *buf, int len, loff_t *off)
327 ASSERT_KERNEL_CTXT("kernel doing read outside kernel context\n");
328 if (!file || !file->f_op || !file->f_op->read || !off)
331 return file->f_op->read(file, buf, len, off);
333 EXPORT_SYMBOL(lustre_fread);
336 * Write a file from within kernel context. Prior to calling this
337 * function we should already have done a push_ctxt().
339 int lustre_fwrite(struct file *file, const void *buf, int len, loff_t *off)
342 ASSERT_KERNEL_CTXT("kernel doing write outside kernel context\n");
350 if (!file->f_op->write)
353 RETURN(file->f_op->write(file, buf, len, off));
355 EXPORT_SYMBOL(lustre_fwrite);
358 * Sync a file from within kernel context. Prior to calling this
359 * function we should already have done a push_ctxt().
361 int lustre_fsync(struct file *file)
364 ASSERT_KERNEL_CTXT("kernel doing sync outside kernel context\n");
365 if (!file || !file->f_op || !file->f_op->fsync)
368 RETURN(file->f_op->fsync(file, file->f_dentry, 0));
370 EXPORT_SYMBOL(lustre_fsync);
372 struct l_file *l_dentry_open(struct lvfs_run_ctxt *ctxt, struct l_dentry *de,
375 mntget(ctxt->pwdmnt);
376 return dentry_open(de, ctxt->pwdmnt, flags);
378 EXPORT_SYMBOL(l_dentry_open);
380 #ifdef HAVE_VFS_READDIR_U64_INO
381 static int l_filldir(void *__buf, const char *name, int namlen, loff_t offset,
382 u64 ino, unsigned int d_type)
384 static int l_filldir(void *__buf, const char *name, int namlen, loff_t offset,
385 ino_t ino, unsigned int d_type)
388 struct l_linux_dirent *dirent;
389 struct l_readdir_callback *buf = (struct l_readdir_callback *)__buf;
391 dirent = buf->lrc_dirent;
393 dirent->lld_off = offset;
395 OBD_ALLOC(dirent, sizeof(*dirent));
400 list_add_tail(&dirent->lld_list, buf->lrc_list);
402 buf->lrc_dirent = dirent;
403 dirent->lld_ino = ino;
404 LASSERT(sizeof(dirent->lld_name) >= namlen + 1);
405 memcpy(dirent->lld_name, name, namlen);
410 long l_readdir(struct file *file, struct list_head *dentry_list)
412 struct l_linux_dirent *lastdirent;
413 struct l_readdir_callback buf;
416 buf.lrc_dirent = NULL;
417 buf.lrc_list = dentry_list;
419 error = vfs_readdir(file, l_filldir, &buf);
423 lastdirent = buf.lrc_dirent;
425 lastdirent->lld_off = file->f_pos;
429 EXPORT_SYMBOL(l_readdir);
430 EXPORT_SYMBOL(obd_memory);
431 EXPORT_SYMBOL(obd_memmax);
433 #ifdef LUSTRE_KERNEL_VERSION
434 #ifdef HAVE_OLD_DEV_SET_RDONLY
435 void dev_set_rdonly(lvfs_sbdev_type dev, int no_write);
436 void dev_clear_rdonly(int no_write);
437 int dev_check_rdonly(lvfs_sbdev_type dev);
438 #elif !defined(HAVE_CLEAR_RDONLY_ON_PUT)
439 void dev_set_rdonly(lvfs_sbdev_type dev);
440 void dev_clear_rdonly(lvfs_sbdev_type dev);
441 int dev_check_rdonly(lvfs_sbdev_type dev);
444 void lvfs_set_rdonly(lvfs_sbdev_type dev)
446 CDEBUG(D_IOCTL | D_HA, "set dev %lx rdonly\n", (long)dev);
447 lvfs_sbdev_sync(dev);
448 #ifdef HAVE_OLD_DEV_SET_RDONLY
449 dev_set_rdonly(dev, 2);
455 int lvfs_check_rdonly(lvfs_sbdev_type dev)
457 return dev_check_rdonly(dev);
460 void lvfs_clear_rdonly(lvfs_sbdev_type dev)
462 CDEBUG(D_IOCTL | D_HA, "(will unset dev %lx rdonly on put)\n",
466 EXPORT_SYMBOL(lvfs_set_rdonly);
467 EXPORT_SYMBOL(lvfs_check_rdonly);
468 EXPORT_SYMBOL(lvfs_clear_rdonly);
471 int lvfs_check_io_health(struct obd_device *obd, struct file *file)
473 char *write_page = NULL;
478 OBD_ALLOC(write_page, CFS_PAGE_SIZE);
482 rc = fsfilt_write_record(obd, file, write_page, CFS_PAGE_SIZE, &offset, 1);
484 OBD_FREE(write_page, CFS_PAGE_SIZE);
486 CDEBUG(D_INFO, "write 1 page synchronously for checking io rc %d\n",rc);
489 EXPORT_SYMBOL(lvfs_check_io_health);
491 static int __init lvfs_linux_init(void)
496 static void __exit lvfs_linux_exit(void)
501 leaked = atomic_read(&obd_memory);
502 CDEBUG(leaked ? D_ERROR : D_INFO,
503 "obd mem max: %d leaked: %d\n", obd_memmax, leaked);
509 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
510 MODULE_DESCRIPTION("Lustre VFS Filesystem Helper v0.1");
511 MODULE_LICENSE("GPL");
513 module_init(lvfs_linux_init);
514 module_exit(lvfs_linux_exit);