1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * lustre/lib/lvfs_linux.c
5 * Lustre filesystem abstraction routines
7 * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
8 * Author: Andreas Dilger <adilger@clusterfs.com>
10 * This file is part of Lustre, http://www.lustre.org.
12 * Lustre is free software; you can redistribute it and/or
13 * modify it under the terms of version 2 of the GNU General Public
14 * License as published by the Free Software Foundation.
16 * Lustre is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with Lustre; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27 # define EXPORT_SYMTAB
30 #define DEBUG_SUBSYSTEM S_FILTER
32 #include <linux/version.h>
34 #include <asm/unistd.h>
35 #include <linux/slab.h>
36 #include <linux/pagemap.h>
37 #include <linux/quotaops.h>
38 #include <linux/version.h>
39 #include <libcfs/kp30.h>
40 #include <lustre_fsfilt.h>
42 #include <linux/module.h>
43 #include <linux/init.h>
44 #include <linux/lustre_compat25.h>
46 #include "lvfs_internal.h"
49 #include <lustre_lib.h>
50 #include <lustre_quota.h>
54 unsigned int obd_fail_val;
55 unsigned int obd_fail_loc;
56 unsigned int obd_alloc_fail_rate = 0;
58 /* Debugging check only needed during development */
60 # define ASSERT_CTXT_MAGIC(magic) LASSERT((magic) == OBD_RUN_CTXT_MAGIC)
61 # define ASSERT_NOT_KERNEL_CTXT(msg) LASSERTF(!segment_eq(get_fs(), get_ds()),\
63 # define ASSERT_KERNEL_CTXT(msg) LASSERTF(segment_eq(get_fs(), get_ds()), msg)
65 # define ASSERT_CTXT_MAGIC(magic) do {} while(0)
66 # define ASSERT_NOT_KERNEL_CTXT(msg) do {} while(0)
67 # define ASSERT_KERNEL_CTXT(msg) do {} while(0)
70 static void push_group_info(struct lvfs_run_ctxt *save,
71 struct upcall_cache_entry *uce)
73 struct group_info *ginfo = uce ? uce->ue_group_info : NULL;
76 save->ngroups = current_ngroups;
79 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
81 save->group_info = current->group_info;
82 current->group_info = ginfo;
85 LASSERT(ginfo->ngroups <= NGROUPS);
86 LASSERT(current->ngroups <= NGROUPS_SMALL);
88 save->group_info.ngroups = current->ngroups;
90 memcpy(save->group_info.small_block, current->groups,
91 current->ngroups * sizeof(gid_t));
93 current->ngroups = ginfo->ngroups;
95 memcpy(current->groups, ginfo->small_block,
96 current->ngroups * sizeof(gid_t));
101 static void pop_group_info(struct lvfs_run_ctxt *save,
102 struct upcall_cache_entry *uce)
104 struct group_info *ginfo = uce ? uce->ue_group_info : NULL;
107 current_ngroups = save->ngroups;
109 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
111 current->group_info = save->group_info;
112 task_unlock(current);
114 current->ngroups = save->group_info.ngroups;
115 if (current->ngroups)
116 memcpy(current->groups, save->group_info.small_block,
117 current->ngroups * sizeof(gid_t));
122 /* push / pop to root of obd store */
123 void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx,
124 struct lvfs_ucred *uc)
126 //ASSERT_NOT_KERNEL_CTXT("already in kernel context!\n");
127 ASSERT_CTXT_MAGIC(new_ctx->magic);
128 OBD_SET_CTXT_MAGIC(save);
132 "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n",
133 save, current, current->fs, current->fs->pwd,
134 atomic_read(¤t->fs->pwd->d_count),
135 atomic_read(¤t->fs->pwd->d_inode->i_count),
136 current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
138 atomic_read(¤t->fs->pwdmnt->mnt_count));
142 LASSERT(atomic_read(¤t->fs->pwd->d_count));
143 LASSERT(atomic_read(&new_ctx->pwd->d_count));
144 save->pwd = dget(current->fs->pwd);
145 save->pwdmnt = mntget(current->fs->pwdmnt);
146 save->luc.luc_umask = current->fs->umask;
149 LASSERT(save->pwdmnt);
150 LASSERT(new_ctx->pwd);
151 LASSERT(new_ctx->pwdmnt);
154 save->luc.luc_fsuid = current->fsuid;
155 save->luc.luc_fsgid = current->fsgid;
156 save->luc.luc_cap = current->cap_effective;
158 current->fsuid = uc->luc_fsuid;
159 current->fsgid = uc->luc_fsgid;
160 current->cap_effective = uc->luc_cap;
161 push_group_info(save, uc->luc_uce);
163 current->fs->umask = 0; /* umask already applied on client */
165 ll_set_fs_pwd(current->fs, new_ctx->pwdmnt, new_ctx->pwd);
169 "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n",
170 new_ctx, current, current->fs, current->fs->pwd,
171 atomic_read(¤t->fs->pwd->d_count),
172 atomic_read(¤t->fs->pwd->d_inode->i_count),
173 current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
175 atomic_read(¤t->fs->pwdmnt->mnt_count));
178 EXPORT_SYMBOL(push_ctxt);
180 void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx,
181 struct lvfs_ucred *uc)
184 ASSERT_CTXT_MAGIC(saved->magic);
186 ASSERT_KERNEL_CTXT("popping non-kernel context!\n");
190 " = pop %p==%p = cur %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n",
191 new_ctx, current, current->fs, current->fs->pwd,
192 atomic_read(¤t->fs->pwd->d_count),
193 atomic_read(¤t->fs->pwd->d_inode->i_count),
194 current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
196 atomic_read(¤t->fs->pwdmnt->mnt_count));
199 LASSERTF(current->fs->pwd == new_ctx->pwd, "%p != %p\n",
200 current->fs->pwd, new_ctx->pwd);
201 LASSERTF(current->fs->pwdmnt == new_ctx->pwdmnt, "%p != %p\n",
202 current->fs->pwdmnt, new_ctx->pwdmnt);
205 ll_set_fs_pwd(current->fs, saved->pwdmnt, saved->pwd);
208 mntput(saved->pwdmnt);
209 current->fs->umask = saved->luc.luc_umask;
211 current->fsuid = saved->luc.luc_fsuid;
212 current->fsgid = saved->luc.luc_fsgid;
213 current->cap_effective = saved->luc.luc_cap;
214 pop_group_info(saved, uc->luc_uce);
219 "= pop %p->%p = cur fs %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n",
220 saved, current, current->fs, current->fs->pwd,
221 atomic_read(¤t->fs->pwd->d_count),
222 atomic_read(¤t->fs->pwd->d_inode->i_count),
223 current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
225 atomic_read(¤t->fs->pwdmnt->mnt_count));
228 EXPORT_SYMBOL(pop_ctxt);
230 /* utility to make a file */
231 struct dentry *simple_mknod(struct dentry *dir, char *name, int mode, int fix)
233 struct dentry *dchild;
237 ASSERT_KERNEL_CTXT("kernel doing mknod outside kernel context\n");
238 CDEBUG(D_INODE, "creating file %.*s\n", (int)strlen(name), name);
240 dchild = ll_lookup_one_len(name, dir, strlen(name));
242 GOTO(out_up, dchild);
244 if (dchild->d_inode) {
245 int old_mode = dchild->d_inode->i_mode;
246 if (!S_ISREG(old_mode))
247 GOTO(out_err, err = -EEXIST);
249 /* Fixup file permissions if necessary */
250 if (fix && (old_mode & S_IALLUGO) != (mode & S_IALLUGO)) {
251 CWARN("fixing permissions on %s from %o to %o\n",
252 name, old_mode, mode);
253 dchild->d_inode->i_mode = (mode & S_IALLUGO) |
254 (old_mode & ~S_IALLUGO);
255 mark_inode_dirty(dchild->d_inode);
257 GOTO(out_up, dchild);
260 err = ll_vfs_create(dir->d_inode, dchild, (mode & ~S_IFMT) | S_IFREG,
269 dchild = ERR_PTR(err);
273 EXPORT_SYMBOL(simple_mknod);
275 /* utility to make a directory */
276 struct dentry *simple_mkdir(struct dentry *dir, char *name, int mode, int fix)
278 struct dentry *dchild;
282 ASSERT_KERNEL_CTXT("kernel doing mkdir outside kernel context\n");
283 CDEBUG(D_INODE, "creating directory %.*s\n", (int)strlen(name), name);
284 dchild = ll_lookup_one_len(name, dir, strlen(name));
286 GOTO(out_up, dchild);
288 if (dchild->d_inode) {
289 int old_mode = dchild->d_inode->i_mode;
290 if (!S_ISDIR(old_mode)) {
291 CERROR("found %s (%lu/%u) is mode %o\n", name,
292 dchild->d_inode->i_ino,
293 dchild->d_inode->i_generation, old_mode);
294 GOTO(out_err, err = -ENOTDIR);
297 /* Fixup directory permissions if necessary */
298 if (fix && (old_mode & S_IALLUGO) != (mode & S_IALLUGO)) {
300 "fixing permissions on %s from %o to %o\n",
301 name, old_mode, mode);
302 dchild->d_inode->i_mode = (mode & S_IALLUGO) |
303 (old_mode & ~S_IALLUGO);
304 mark_inode_dirty(dchild->d_inode);
306 GOTO(out_up, dchild);
309 err = vfs_mkdir(dir->d_inode, dchild, mode);
317 dchild = ERR_PTR(err);
321 EXPORT_SYMBOL(simple_mkdir);
323 /* utility to rename a file */
324 int lustre_rename(struct dentry *dir, char *oldname, char *newname)
326 struct dentry *dchild_old, *dchild_new;
330 ASSERT_KERNEL_CTXT("kernel doing rename outside kernel context\n");
331 CDEBUG(D_INODE, "renaming file %.*s to %.*s\n",
332 (int)strlen(oldname), oldname, (int)strlen(newname), newname);
334 dchild_old = ll_lookup_one_len(oldname, dir, strlen(oldname));
335 if (IS_ERR(dchild_old))
336 RETURN(PTR_ERR(dchild_old));
338 if (!dchild_old->d_inode)
339 GOTO(put_old, err = -ENOENT);
341 dchild_new = ll_lookup_one_len(newname, dir, strlen(newname));
342 if (IS_ERR(dchild_new))
343 GOTO(put_old, err = PTR_ERR(dchild_new));
345 err = vfs_rename(dir->d_inode, dchild_old, dir->d_inode, dchild_new);
352 EXPORT_SYMBOL(lustre_rename);
355 * Read a file from within kernel context. Prior to calling this
356 * function we should already have done a push_ctxt().
358 int lustre_fread(struct file *file, void *buf, int len, loff_t *off)
360 ASSERT_KERNEL_CTXT("kernel doing read outside kernel context\n");
361 if (!file || !file->f_op || !file->f_op->read || !off)
364 return file->f_op->read(file, buf, len, off);
366 EXPORT_SYMBOL(lustre_fread);
369 * Write a file from within kernel context. Prior to calling this
370 * function we should already have done a push_ctxt().
372 int lustre_fwrite(struct file *file, const void *buf, int len, loff_t *off)
375 ASSERT_KERNEL_CTXT("kernel doing write outside kernel context\n");
383 if (!file->f_op->write)
386 RETURN(file->f_op->write(file, buf, len, off));
388 EXPORT_SYMBOL(lustre_fwrite);
391 * Sync a file from within kernel context. Prior to calling this
392 * function we should already have done a push_ctxt().
394 int lustre_fsync(struct file *file)
397 ASSERT_KERNEL_CTXT("kernel doing sync outside kernel context\n");
398 if (!file || !file->f_op || !file->f_op->fsync)
401 RETURN(file->f_op->fsync(file, file->f_dentry, 0));
403 EXPORT_SYMBOL(lustre_fsync);
405 struct l_file *l_dentry_open(struct lvfs_run_ctxt *ctxt, struct l_dentry *de,
408 mntget(ctxt->pwdmnt);
409 return dentry_open(de, ctxt->pwdmnt, flags);
411 EXPORT_SYMBOL(l_dentry_open);
413 #ifdef HAVE_VFS_READDIR_U64_INO
414 static int l_filldir(void *__buf, const char *name, int namlen, loff_t offset,
415 u64 ino, unsigned int d_type)
417 static int l_filldir(void *__buf, const char *name, int namlen, loff_t offset,
418 ino_t ino, unsigned int d_type)
421 struct l_linux_dirent *dirent;
422 struct l_readdir_callback *buf = (struct l_readdir_callback *)__buf;
424 dirent = buf->lrc_dirent;
426 dirent->lld_off = offset;
428 OBD_ALLOC(dirent, sizeof(*dirent));
433 list_add_tail(&dirent->lld_list, buf->lrc_list);
435 buf->lrc_dirent = dirent;
436 dirent->lld_ino = ino;
437 LASSERT(sizeof(dirent->lld_name) >= namlen + 1);
438 memcpy(dirent->lld_name, name, namlen);
443 long l_readdir(struct file *file, struct list_head *dentry_list)
445 struct l_linux_dirent *lastdirent;
446 struct l_readdir_callback buf;
449 buf.lrc_dirent = NULL;
450 buf.lrc_list = dentry_list;
452 error = vfs_readdir(file, l_filldir, &buf);
456 lastdirent = buf.lrc_dirent;
458 lastdirent->lld_off = file->f_pos;
462 EXPORT_SYMBOL(l_readdir);
463 EXPORT_SYMBOL(obd_memory);
464 EXPORT_SYMBOL(obd_memmax);
466 #ifdef LUSTRE_KERNEL_VERSION
467 #ifndef HAVE_CLEAR_RDONLY_ON_PUT
468 #error rdonly patchset must be updated [cfs bz11248]
471 void dev_set_rdonly(lvfs_sbdev_type dev);
472 int dev_check_rdonly(lvfs_sbdev_type dev);
474 void __lvfs_set_rdonly(lvfs_sbdev_type dev, lvfs_sbdev_type jdev)
476 lvfs_sbdev_sync(dev);
477 if (jdev && (jdev != dev)) {
478 CDEBUG(D_IOCTL | D_HA, "set journal dev %lx rdonly\n",
480 dev_set_rdonly(jdev);
482 CDEBUG(D_IOCTL | D_HA, "set dev %lx rdonly\n", (long)dev);
486 int lvfs_check_rdonly(lvfs_sbdev_type dev)
488 return dev_check_rdonly(dev);
491 EXPORT_SYMBOL(__lvfs_set_rdonly);
492 EXPORT_SYMBOL(lvfs_check_rdonly);
493 #endif /* LUSTRE_KERNEL_VERSION */
495 int lvfs_check_io_health(struct obd_device *obd, struct file *file)
497 char *write_page = NULL;
502 OBD_ALLOC(write_page, CFS_PAGE_SIZE);
506 rc = fsfilt_write_record(obd, file, write_page, CFS_PAGE_SIZE, &offset, 1);
508 OBD_FREE(write_page, CFS_PAGE_SIZE);
510 CDEBUG(D_INFO, "write 1 page synchronously for checking io rc %d\n",rc);
513 EXPORT_SYMBOL(lvfs_check_io_health);
515 static int __init lvfs_linux_init(void)
520 static void __exit lvfs_linux_exit(void)
525 leaked = atomic_read(&obd_memory);
526 CDEBUG(leaked ? D_ERROR : D_INFO,
527 "obd mem max: %d leaked: %d\n", obd_memmax, leaked);
533 EXPORT_SYMBOL(obd_fail_loc);
534 EXPORT_SYMBOL(obd_alloc_fail_rate);
535 EXPORT_SYMBOL(obd_fail_val);
537 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
538 MODULE_DESCRIPTION("Lustre VFS Filesystem Helper v0.1");
539 MODULE_LICENSE("GPL");
541 module_init(lvfs_linux_init);
542 module_exit(lvfs_linux_exit);