1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/lvfs/lvfs_linux.c
38 * Author: Andreas Dilger <adilger@clusterfs.com>
42 # define EXPORT_SYMTAB
45 #define DEBUG_SUBSYSTEM S_FILTER
47 #include <linux/version.h>
49 #include <asm/unistd.h>
50 #include <linux/slab.h>
51 #include <linux/pagemap.h>
52 #include <linux/quotaops.h>
53 #include <linux/version.h>
54 #include <libcfs/kp30.h>
55 #include <lustre_fsfilt.h>
57 #include <linux/module.h>
58 #include <linux/init.h>
59 #include <linux/lustre_compat25.h>
61 #include "lvfs_internal.h"
64 #include <lustre_lib.h>
65 #include <lustre_quota.h>
67 /* Debugging check only needed during development */
69 # define ASSERT_CTXT_MAGIC(magic) LASSERT((magic) == OBD_RUN_CTXT_MAGIC)
70 # define ASSERT_NOT_KERNEL_CTXT(msg) LASSERTF(!segment_eq(get_fs(), get_ds()),\
72 # define ASSERT_KERNEL_CTXT(msg) LASSERTF(segment_eq(get_fs(), get_ds()), msg)
74 # define ASSERT_CTXT_MAGIC(magic) do {} while(0)
75 # define ASSERT_NOT_KERNEL_CTXT(msg) do {} while(0)
76 # define ASSERT_KERNEL_CTXT(msg) do {} while(0)
79 static void push_group_info(struct lvfs_run_ctxt *save,
80 struct upcall_cache_entry *uce)
82 struct group_info *ginfo = uce ? uce->ue_group_info : NULL;
85 save->ngroups = current_ngroups;
88 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
90 save->group_info = current->group_info;
91 current->group_info = ginfo;
94 LASSERT(ginfo->ngroups <= NGROUPS);
95 LASSERT(current->ngroups <= NGROUPS_SMALL);
97 save->group_info.ngroups = current->ngroups;
99 memcpy(save->group_info.small_block, current->groups,
100 current->ngroups * sizeof(gid_t));
102 current->ngroups = ginfo->ngroups;
104 memcpy(current->groups, ginfo->small_block,
105 current->ngroups * sizeof(gid_t));
110 static void pop_group_info(struct lvfs_run_ctxt *save,
111 struct upcall_cache_entry *uce)
113 struct group_info *ginfo = uce ? uce->ue_group_info : NULL;
116 current_ngroups = save->ngroups;
118 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
120 current->group_info = save->group_info;
121 task_unlock(current);
123 current->ngroups = save->group_info.ngroups;
124 if (current->ngroups)
125 memcpy(current->groups, save->group_info.small_block,
126 current->ngroups * sizeof(gid_t));
131 /* push / pop to root of obd store */
132 void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx,
133 struct lvfs_ucred *uc)
135 //ASSERT_NOT_KERNEL_CTXT("already in kernel context!\n");
136 ASSERT_CTXT_MAGIC(new_ctx->magic);
137 OBD_SET_CTXT_MAGIC(save);
141 "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n",
142 save, current, current->fs, current->fs->pwd,
143 atomic_read(¤t->fs->pwd->d_count),
144 atomic_read(¤t->fs->pwd->d_inode->i_count),
145 current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
147 atomic_read(¤t->fs->pwdmnt->mnt_count));
151 LASSERT(atomic_read(&cfs_fs_pwd(current->fs)->d_count));
152 LASSERT(atomic_read(&new_ctx->pwd->d_count));
153 save->pwd = dget(cfs_fs_pwd(current->fs));
154 save->pwdmnt = mntget(cfs_fs_mnt(current->fs));
155 save->luc.luc_umask = current->fs->umask;
158 LASSERT(save->pwdmnt);
159 LASSERT(new_ctx->pwd);
160 LASSERT(new_ctx->pwdmnt);
163 save->luc.luc_fsuid = current->fsuid;
164 save->luc.luc_fsgid = current->fsgid;
165 save->luc.luc_cap = current->cap_effective;
167 current->fsuid = uc->luc_fsuid;
168 current->fsgid = uc->luc_fsgid;
169 current->cap_effective = uc->luc_cap;
170 push_group_info(save, uc->luc_uce);
172 current->fs->umask = 0; /* umask already applied on client */
174 ll_set_fs_pwd(current->fs, new_ctx->pwdmnt, new_ctx->pwd);
178 "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n",
179 new_ctx, current, current->fs, current->fs->pwd,
180 atomic_read(¤t->fs->pwd->d_count),
181 atomic_read(¤t->fs->pwd->d_inode->i_count),
182 current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
184 atomic_read(¤t->fs->pwdmnt->mnt_count));
187 EXPORT_SYMBOL(push_ctxt);
189 void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx,
190 struct lvfs_ucred *uc)
193 ASSERT_CTXT_MAGIC(saved->magic);
195 ASSERT_KERNEL_CTXT("popping non-kernel context!\n");
199 " = pop %p==%p = cur %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n",
200 new_ctx, current, current->fs, current->fs->pwd,
201 atomic_read(¤t->fs->pwd->d_count),
202 atomic_read(¤t->fs->pwd->d_inode->i_count),
203 current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
205 atomic_read(¤t->fs->pwdmnt->mnt_count));
208 LASSERTF(cfs_fs_pwd(current->fs) == new_ctx->pwd, "%p != %p\n",
209 cfs_fs_pwd(current->fs), new_ctx->pwd);
210 LASSERTF(cfs_fs_mnt(current->fs) == new_ctx->pwdmnt, "%p != %p\n",
211 cfs_fs_mnt(current->fs), new_ctx->pwdmnt);
214 ll_set_fs_pwd(current->fs, saved->pwdmnt, saved->pwd);
217 mntput(saved->pwdmnt);
218 current->fs->umask = saved->luc.luc_umask;
220 current->fsuid = saved->luc.luc_fsuid;
221 current->fsgid = saved->luc.luc_fsgid;
222 current->cap_effective = saved->luc.luc_cap;
223 pop_group_info(saved, uc->luc_uce);
228 "= pop %p->%p = cur fs %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n",
229 saved, current, current->fs, current->fs->pwd,
230 atomic_read(¤t->fs->pwd->d_count),
231 atomic_read(¤t->fs->pwd->d_inode->i_count),
232 current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
234 atomic_read(¤t->fs->pwdmnt->mnt_count));
237 EXPORT_SYMBOL(pop_ctxt);
239 /* utility to make a file */
240 struct dentry *simple_mknod(struct dentry *dir, char *name, int mode, int fix)
242 struct dentry *dchild;
246 ASSERT_KERNEL_CTXT("kernel doing mknod outside kernel context\n");
247 CDEBUG(D_INODE, "creating file %.*s\n", (int)strlen(name), name);
249 dchild = ll_lookup_one_len(name, dir, strlen(name));
251 GOTO(out_up, dchild);
253 if (dchild->d_inode) {
254 int old_mode = dchild->d_inode->i_mode;
255 if (!S_ISREG(old_mode))
256 GOTO(out_err, err = -EEXIST);
258 /* Fixup file permissions if necessary */
259 if (fix && (old_mode & S_IALLUGO) != (mode & S_IALLUGO)) {
260 CWARN("fixing permissions on %s from %o to %o\n",
261 name, old_mode, mode);
262 dchild->d_inode->i_mode = (mode & S_IALLUGO) |
263 (old_mode & ~S_IALLUGO);
264 mark_inode_dirty(dchild->d_inode);
266 GOTO(out_up, dchild);
269 err = ll_vfs_create(dir->d_inode, dchild, (mode & ~S_IFMT) | S_IFREG,
278 dchild = ERR_PTR(err);
282 EXPORT_SYMBOL(simple_mknod);
284 /* utility to make a directory */
285 struct dentry *simple_mkdir(struct dentry *dir, struct vfsmount *mnt,
286 char *name, int mode, int fix)
288 struct dentry *dchild;
292 ASSERT_KERNEL_CTXT("kernel doing mkdir outside kernel context\n");
293 CDEBUG(D_INODE, "creating directory %.*s\n", (int)strlen(name), name);
294 dchild = ll_lookup_one_len(name, dir, strlen(name));
296 GOTO(out_up, dchild);
298 if (dchild->d_inode) {
299 int old_mode = dchild->d_inode->i_mode;
300 if (!S_ISDIR(old_mode)) {
301 CERROR("found %s (%lu/%u) is mode %o\n", name,
302 dchild->d_inode->i_ino,
303 dchild->d_inode->i_generation, old_mode);
304 GOTO(out_err, err = -ENOTDIR);
307 /* Fixup directory permissions if necessary */
308 if (fix && (old_mode & S_IALLUGO) != (mode & S_IALLUGO)) {
310 "fixing permissions on %s from %o to %o\n",
311 name, old_mode, mode);
312 dchild->d_inode->i_mode = (mode & S_IALLUGO) |
313 (old_mode & ~S_IALLUGO);
314 mark_inode_dirty(dchild->d_inode);
316 GOTO(out_up, dchild);
319 err = ll_vfs_mkdir(dir->d_inode, dchild, mnt, mode);
327 dchild = ERR_PTR(err);
331 EXPORT_SYMBOL(simple_mkdir);
333 /* utility to rename a file */
334 int lustre_rename(struct dentry *dir, struct vfsmount *mnt,
335 char *oldname, char *newname)
337 struct dentry *dchild_old, *dchild_new;
341 ASSERT_KERNEL_CTXT("kernel doing rename outside kernel context\n");
342 CDEBUG(D_INODE, "renaming file %.*s to %.*s\n",
343 (int)strlen(oldname), oldname, (int)strlen(newname), newname);
345 dchild_old = ll_lookup_one_len(oldname, dir, strlen(oldname));
346 if (IS_ERR(dchild_old))
347 RETURN(PTR_ERR(dchild_old));
349 if (!dchild_old->d_inode)
350 GOTO(put_old, err = -ENOENT);
352 dchild_new = ll_lookup_one_len(newname, dir, strlen(newname));
353 if (IS_ERR(dchild_new))
354 GOTO(put_old, err = PTR_ERR(dchild_new));
356 err = ll_vfs_rename(dir->d_inode, dchild_old, mnt,
357 dir->d_inode, dchild_new, mnt);
364 EXPORT_SYMBOL(lustre_rename);
367 * Read a file from within kernel context. Prior to calling this
368 * function we should already have done a push_ctxt().
370 int lustre_fread(struct file *file, void *buf, int len, loff_t *off)
372 ASSERT_KERNEL_CTXT("kernel doing read outside kernel context\n");
373 if (!file || !file->f_op || !file->f_op->read || !off)
376 return file->f_op->read(file, buf, len, off);
378 EXPORT_SYMBOL(lustre_fread);
381 * Write a file from within kernel context. Prior to calling this
382 * function we should already have done a push_ctxt().
384 int lustre_fwrite(struct file *file, const void *buf, int len, loff_t *off)
387 ASSERT_KERNEL_CTXT("kernel doing write outside kernel context\n");
395 if (!file->f_op->write)
398 RETURN(file->f_op->write(file, buf, len, off));
400 EXPORT_SYMBOL(lustre_fwrite);
403 * Sync a file from within kernel context. Prior to calling this
404 * function we should already have done a push_ctxt().
406 int lustre_fsync(struct file *file)
409 ASSERT_KERNEL_CTXT("kernel doing sync outside kernel context\n");
410 if (!file || !file->f_op || !file->f_op->fsync)
413 RETURN(file->f_op->fsync(file, file->f_dentry, 0));
415 EXPORT_SYMBOL(lustre_fsync);
417 struct l_file *l_dentry_open(struct lvfs_run_ctxt *ctxt, struct l_dentry *de,
420 mntget(ctxt->pwdmnt);
421 return dentry_open(de, ctxt->pwdmnt, flags);
423 EXPORT_SYMBOL(l_dentry_open);
425 #ifdef HAVE_VFS_READDIR_U64_INO
426 static int l_filldir(void *__buf, const char *name, int namlen, loff_t offset,
427 u64 ino, unsigned int d_type)
429 static int l_filldir(void *__buf, const char *name, int namlen, loff_t offset,
430 ino_t ino, unsigned int d_type)
433 struct l_linux_dirent *dirent;
434 struct l_readdir_callback *buf = (struct l_readdir_callback *)__buf;
436 dirent = buf->lrc_dirent;
438 dirent->lld_off = offset;
440 OBD_ALLOC(dirent, sizeof(*dirent));
445 list_add_tail(&dirent->lld_list, buf->lrc_list);
447 buf->lrc_dirent = dirent;
448 dirent->lld_ino = ino;
449 LASSERT(sizeof(dirent->lld_name) >= namlen + 1);
450 memcpy(dirent->lld_name, name, namlen);
455 long l_readdir(struct file *file, struct list_head *dentry_list)
457 struct l_linux_dirent *lastdirent;
458 struct l_readdir_callback buf;
461 buf.lrc_dirent = NULL;
462 buf.lrc_list = dentry_list;
464 error = vfs_readdir(file, l_filldir, &buf);
468 lastdirent = buf.lrc_dirent;
470 lastdirent->lld_off = file->f_pos;
474 EXPORT_SYMBOL(l_readdir);
476 int l_notify_change(struct vfsmount *mnt, struct dentry *dchild,
477 struct iattr *newattrs)
481 LOCK_INODE_MUTEX(dchild->d_inode);
482 #ifdef HAVE_SECURITY_PLUG
483 rc = notify_change(dchild, mnt, newattrs);
485 rc = notify_change(dchild, newattrs);
487 UNLOCK_INODE_MUTEX(dchild->d_inode);
490 EXPORT_SYMBOL(l_notify_change);
492 /* utility to truncate a file */
493 int simple_truncate(struct dentry *dir, struct vfsmount *mnt,
494 char *name, loff_t length)
496 struct dentry *dchild;
497 struct iattr newattrs;
501 CDEBUG(D_INODE, "truncating file %.*s to %lld\n", (int)strlen(name),
502 name, (long long)length);
503 dchild = ll_lookup_one_len(name, dir, strlen(name));
505 GOTO(out, err = PTR_ERR(dchild));
507 if (dchild->d_inode) {
508 int old_mode = dchild->d_inode->i_mode;
509 if (S_ISDIR(old_mode)) {
510 CERROR("found %s (%lu/%u) is mode %o\n", name,
511 dchild->d_inode->i_ino,
512 dchild->d_inode->i_generation, old_mode);
513 GOTO(out_dput, err = -EISDIR);
516 newattrs.ia_size = length;
517 newattrs.ia_valid = ATTR_SIZE;
518 err = l_notify_change(mnt, dchild, &newattrs);
526 EXPORT_SYMBOL(simple_truncate);
528 #ifdef LUSTRE_KERNEL_VERSION
529 #ifndef HAVE_CLEAR_RDONLY_ON_PUT
530 #error rdonly patchset must be updated [cfs bz11248]
533 void dev_set_rdonly(lvfs_sbdev_type dev);
534 int dev_check_rdonly(lvfs_sbdev_type dev);
536 void __lvfs_set_rdonly(lvfs_sbdev_type dev, lvfs_sbdev_type jdev)
538 lvfs_sbdev_sync(dev);
539 if (jdev && (jdev != dev)) {
540 CDEBUG(D_IOCTL | D_HA, "set journal dev %lx rdonly\n",
542 dev_set_rdonly(jdev);
544 CDEBUG(D_IOCTL | D_HA, "set dev %lx rdonly\n", (long)dev);
548 int lvfs_check_rdonly(lvfs_sbdev_type dev)
550 return dev_check_rdonly(dev);
553 EXPORT_SYMBOL(__lvfs_set_rdonly);
554 EXPORT_SYMBOL(lvfs_check_rdonly);
555 #endif /* LUSTRE_KERNEL_VERSION */
557 int lvfs_check_io_health(struct obd_device *obd, struct file *file)
559 char *write_page = NULL;
564 OBD_ALLOC(write_page, CFS_PAGE_SIZE);
568 rc = fsfilt_write_record(obd, file, write_page, CFS_PAGE_SIZE, &offset, 1);
570 OBD_FREE(write_page, CFS_PAGE_SIZE);
572 CDEBUG(D_INFO, "write 1 page synchronously for checking io rc %d\n",rc);
575 EXPORT_SYMBOL(lvfs_check_io_health);
577 MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
578 MODULE_DESCRIPTION("Lustre VFS Filesystem Helper v0.1");
579 MODULE_LICENSE("GPL");