1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/lvfs/lvfs_linux.c
38 * Author: Andreas Dilger <adilger@clusterfs.com>
42 # define EXPORT_SYMTAB
45 #define DEBUG_SUBSYSTEM S_FILTER
47 #include <linux/version.h>
49 #include <asm/unistd.h>
50 #include <linux/slab.h>
51 #include <linux/pagemap.h>
52 #include <linux/quotaops.h>
53 #include <linux/version.h>
54 #include <libcfs/kp30.h>
55 #include <lustre_fsfilt.h>
57 #include <linux/module.h>
58 #include <linux/init.h>
59 #include <linux/lustre_compat25.h>
61 #include "lvfs_internal.h"
64 #include <lustre_lib.h>
65 #include <lustre_quota.h>
67 /* Debugging check only needed during development */
69 # define ASSERT_CTXT_MAGIC(magic) LASSERT((magic) == OBD_RUN_CTXT_MAGIC)
70 # define ASSERT_NOT_KERNEL_CTXT(msg) LASSERTF(!segment_eq(get_fs(), get_ds()),\
72 # define ASSERT_KERNEL_CTXT(msg) LASSERTF(segment_eq(get_fs(), get_ds()), msg)
74 # define ASSERT_CTXT_MAGIC(magic) do {} while(0)
75 # define ASSERT_NOT_KERNEL_CTXT(msg) do {} while(0)
76 # define ASSERT_KERNEL_CTXT(msg) do {} while(0)
79 static void push_group_info(struct lvfs_run_ctxt *save,
80 struct upcall_cache_entry *uce)
82 struct group_info *ginfo = uce ? uce->ue_group_info : NULL;
85 save->ngroups = current_ngroups;
88 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
91 save->group_info = current_cred()->group_info;
92 if ((cred = prepare_creds())) {
93 cred->group_info = ginfo;
98 LASSERT(ginfo->ngroups <= NGROUPS);
99 LASSERT(current->ngroups <= NGROUPS_SMALL);
101 save->group_info.ngroups = current->ngroups;
102 if (current->ngroups)
103 memcpy(save->group_info.small_block, current->groups,
104 current->ngroups * sizeof(gid_t));
106 current->ngroups = ginfo->ngroups;
108 memcpy(current->groups, ginfo->small_block,
109 current->ngroups * sizeof(gid_t));
114 static void pop_group_info(struct lvfs_run_ctxt *save,
115 struct upcall_cache_entry *uce)
117 struct group_info *ginfo = uce ? uce->ue_group_info : NULL;
120 current_ngroups = save->ngroups;
122 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
125 if ((cred = prepare_creds())) {
126 cred->group_info = save->group_info;
129 task_unlock(current);
131 current->ngroups = save->group_info.ngroups;
132 if (current->ngroups)
133 memcpy(current->groups, save->group_info.small_block,
134 current->ngroups * sizeof(gid_t));
139 /* push / pop to root of obd store */
140 void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx,
141 struct lvfs_ucred *uc)
143 //ASSERT_NOT_KERNEL_CTXT("already in kernel context!\n");
144 ASSERT_CTXT_MAGIC(new_ctx->magic);
145 OBD_SET_CTXT_MAGIC(save);
149 "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n",
150 save, current, current->fs, current->fs->pwd,
151 atomic_read(¤t->fs->pwd->d_count),
152 atomic_read(¤t->fs->pwd->d_inode->i_count),
153 current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
155 atomic_read(¤t->fs->pwdmnt->mnt_count));
159 LASSERT(atomic_read(&cfs_fs_pwd(current->fs)->d_count));
160 LASSERT(atomic_read(&new_ctx->pwd->d_count));
161 save->pwd = dget(cfs_fs_pwd(current->fs));
162 save->pwdmnt = mntget(cfs_fs_mnt(current->fs));
163 save->luc.luc_umask = current->fs->umask;
166 LASSERT(save->pwdmnt);
167 LASSERT(new_ctx->pwd);
168 LASSERT(new_ctx->pwdmnt);
172 save->luc.luc_fsuid = current_fsuid();
173 save->luc.luc_fsgid = current_fsgid();
174 save->luc.luc_cap = current_cap();
176 if ((cred = prepare_creds())) {
177 cred->fsuid = uc->luc_fsuid;
178 cred->fsgid = uc->luc_fsgid;
179 cred->cap_effective = uc->luc_cap;
183 push_group_info(save, uc->luc_uce);
185 current->fs->umask = 0; /* umask already applied on client */
187 ll_set_fs_pwd(current->fs, new_ctx->pwdmnt, new_ctx->pwd);
191 "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n",
192 new_ctx, current, current->fs, current->fs->pwd,
193 atomic_read(¤t->fs->pwd->d_count),
194 atomic_read(¤t->fs->pwd->d_inode->i_count),
195 current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
197 atomic_read(¤t->fs->pwdmnt->mnt_count));
200 EXPORT_SYMBOL(push_ctxt);
202 void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx,
203 struct lvfs_ucred *uc)
206 ASSERT_CTXT_MAGIC(saved->magic);
208 ASSERT_KERNEL_CTXT("popping non-kernel context!\n");
212 " = pop %p==%p = cur %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n",
213 new_ctx, current, current->fs, current->fs->pwd,
214 atomic_read(¤t->fs->pwd->d_count),
215 atomic_read(¤t->fs->pwd->d_inode->i_count),
216 current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
218 atomic_read(¤t->fs->pwdmnt->mnt_count));
221 LASSERTF(cfs_fs_pwd(current->fs) == new_ctx->pwd, "%p != %p\n",
222 cfs_fs_pwd(current->fs), new_ctx->pwd);
223 LASSERTF(cfs_fs_mnt(current->fs) == new_ctx->pwdmnt, "%p != %p\n",
224 cfs_fs_mnt(current->fs), new_ctx->pwdmnt);
227 ll_set_fs_pwd(current->fs, saved->pwdmnt, saved->pwd);
230 mntput(saved->pwdmnt);
231 current->fs->umask = saved->luc.luc_umask;
234 if ((cred = prepare_creds())) {
235 cred->fsuid = saved->luc.luc_fsuid;
236 cred->fsgid = saved->luc.luc_fsgid;
237 cred->cap_effective = saved->luc.luc_cap;
241 pop_group_info(saved, uc->luc_uce);
246 "= pop %p->%p = cur fs %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n",
247 saved, current, current->fs, current->fs->pwd,
248 atomic_read(¤t->fs->pwd->d_count),
249 atomic_read(¤t->fs->pwd->d_inode->i_count),
250 current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
252 atomic_read(¤t->fs->pwdmnt->mnt_count));
255 EXPORT_SYMBOL(pop_ctxt);
257 /* utility to make a file */
258 struct dentry *simple_mknod(struct dentry *dir, char *name, int mode, int fix)
260 struct dentry *dchild;
264 ASSERT_KERNEL_CTXT("kernel doing mknod outside kernel context\n");
265 CDEBUG(D_INODE, "creating file %.*s\n", (int)strlen(name), name);
267 dchild = ll_lookup_one_len(name, dir, strlen(name));
269 GOTO(out_up, dchild);
271 if (dchild->d_inode) {
272 int old_mode = dchild->d_inode->i_mode;
273 if (!S_ISREG(old_mode))
274 GOTO(out_err, err = -EEXIST);
276 /* Fixup file permissions if necessary */
277 if (fix && (old_mode & S_IALLUGO) != (mode & S_IALLUGO)) {
278 CWARN("fixing permissions on %s from %o to %o\n",
279 name, old_mode, mode);
280 dchild->d_inode->i_mode = (mode & S_IALLUGO) |
281 (old_mode & ~S_IALLUGO);
282 mark_inode_dirty(dchild->d_inode);
284 GOTO(out_up, dchild);
287 err = ll_vfs_create(dir->d_inode, dchild, (mode & ~S_IFMT) | S_IFREG,
296 dchild = ERR_PTR(err);
300 EXPORT_SYMBOL(simple_mknod);
302 /* utility to make a directory */
303 struct dentry *simple_mkdir(struct dentry *dir, struct vfsmount *mnt,
304 char *name, int mode, int fix)
306 struct dentry *dchild;
310 ASSERT_KERNEL_CTXT("kernel doing mkdir outside kernel context\n");
311 CDEBUG(D_INODE, "creating directory %.*s\n", (int)strlen(name), name);
312 dchild = ll_lookup_one_len(name, dir, strlen(name));
314 GOTO(out_up, dchild);
316 if (dchild->d_inode) {
317 int old_mode = dchild->d_inode->i_mode;
318 if (!S_ISDIR(old_mode)) {
319 CERROR("found %s (%lu/%u) is mode %o\n", name,
320 dchild->d_inode->i_ino,
321 dchild->d_inode->i_generation, old_mode);
322 GOTO(out_err, err = -ENOTDIR);
325 /* Fixup directory permissions if necessary */
326 if (fix && (old_mode & S_IALLUGO) != (mode & S_IALLUGO)) {
328 "fixing permissions on %s from %o to %o\n",
329 name, old_mode, mode);
330 dchild->d_inode->i_mode = (mode & S_IALLUGO) |
331 (old_mode & ~S_IALLUGO);
332 mark_inode_dirty(dchild->d_inode);
334 GOTO(out_up, dchild);
337 err = ll_vfs_mkdir(dir->d_inode, dchild, mnt, mode);
345 dchild = ERR_PTR(err);
349 EXPORT_SYMBOL(simple_mkdir);
351 /* utility to rename a file */
352 int lustre_rename(struct dentry *dir, struct vfsmount *mnt,
353 char *oldname, char *newname)
355 struct dentry *dchild_old, *dchild_new;
359 ASSERT_KERNEL_CTXT("kernel doing rename outside kernel context\n");
360 CDEBUG(D_INODE, "renaming file %.*s to %.*s\n",
361 (int)strlen(oldname), oldname, (int)strlen(newname), newname);
363 dchild_old = ll_lookup_one_len(oldname, dir, strlen(oldname));
364 if (IS_ERR(dchild_old))
365 RETURN(PTR_ERR(dchild_old));
367 if (!dchild_old->d_inode)
368 GOTO(put_old, err = -ENOENT);
370 dchild_new = ll_lookup_one_len(newname, dir, strlen(newname));
371 if (IS_ERR(dchild_new))
372 GOTO(put_old, err = PTR_ERR(dchild_new));
374 err = ll_vfs_rename(dir->d_inode, dchild_old, mnt,
375 dir->d_inode, dchild_new, mnt);
382 EXPORT_SYMBOL(lustre_rename);
385 * Read a file from within kernel context. Prior to calling this
386 * function we should already have done a push_ctxt().
388 int lustre_fread(struct file *file, void *buf, int len, loff_t *off)
390 ASSERT_KERNEL_CTXT("kernel doing read outside kernel context\n");
391 if (!file || !file->f_op || !file->f_op->read || !off)
394 return file->f_op->read(file, buf, len, off);
396 EXPORT_SYMBOL(lustre_fread);
399 * Write a file from within kernel context. Prior to calling this
400 * function we should already have done a push_ctxt().
402 int lustre_fwrite(struct file *file, const void *buf, int len, loff_t *off)
405 ASSERT_KERNEL_CTXT("kernel doing write outside kernel context\n");
413 if (!file->f_op->write)
416 RETURN(file->f_op->write(file, buf, len, off));
418 EXPORT_SYMBOL(lustre_fwrite);
421 * Sync a file from within kernel context. Prior to calling this
422 * function we should already have done a push_ctxt().
424 int lustre_fsync(struct file *file)
427 ASSERT_KERNEL_CTXT("kernel doing sync outside kernel context\n");
428 if (!file || !file->f_op || !file->f_op->fsync)
431 RETURN(file->f_op->fsync(file, file->f_dentry, 0));
433 EXPORT_SYMBOL(lustre_fsync);
435 /* Note: dput(dchild) will be called if there is an error */
436 struct l_file *l_dentry_open(struct lvfs_run_ctxt *ctxt, struct l_dentry *de,
439 mntget(ctxt->pwdmnt);
440 return ll_dentry_open(de, ctxt->pwdmnt, flags, current_cred());
442 EXPORT_SYMBOL(l_dentry_open);
444 #ifdef HAVE_VFS_READDIR_U64_INO
445 static int l_filldir(void *__buf, const char *name, int namlen, loff_t offset,
446 u64 ino, unsigned int d_type)
448 static int l_filldir(void *__buf, const char *name, int namlen, loff_t offset,
449 ino_t ino, unsigned int d_type)
452 struct l_linux_dirent *dirent;
453 struct l_readdir_callback *buf = (struct l_readdir_callback *)__buf;
455 dirent = buf->lrc_dirent;
457 dirent->lld_off = offset;
459 OBD_ALLOC(dirent, sizeof(*dirent));
464 list_add_tail(&dirent->lld_list, buf->lrc_list);
466 buf->lrc_dirent = dirent;
467 dirent->lld_ino = ino;
468 LASSERT(sizeof(dirent->lld_name) >= namlen + 1);
469 memcpy(dirent->lld_name, name, namlen);
474 long l_readdir(struct file *file, struct list_head *dentry_list)
476 struct l_linux_dirent *lastdirent;
477 struct l_readdir_callback buf;
480 buf.lrc_dirent = NULL;
481 buf.lrc_list = dentry_list;
483 error = vfs_readdir(file, l_filldir, &buf);
487 lastdirent = buf.lrc_dirent;
489 lastdirent->lld_off = file->f_pos;
493 EXPORT_SYMBOL(l_readdir);
495 int l_notify_change(struct vfsmount *mnt, struct dentry *dchild,
496 struct iattr *newattrs)
500 LOCK_INODE_MUTEX(dchild->d_inode);
501 #ifdef HAVE_SECURITY_PLUG
502 rc = notify_change(dchild, mnt, newattrs);
504 rc = notify_change(dchild, newattrs);
506 UNLOCK_INODE_MUTEX(dchild->d_inode);
509 EXPORT_SYMBOL(l_notify_change);
511 /* utility to truncate a file */
512 int simple_truncate(struct dentry *dir, struct vfsmount *mnt,
513 char *name, loff_t length)
515 struct dentry *dchild;
516 struct iattr newattrs;
520 CDEBUG(D_INODE, "truncating file %.*s to %lld\n", (int)strlen(name),
521 name, (long long)length);
522 dchild = ll_lookup_one_len(name, dir, strlen(name));
524 GOTO(out, err = PTR_ERR(dchild));
526 if (dchild->d_inode) {
527 int old_mode = dchild->d_inode->i_mode;
528 if (S_ISDIR(old_mode)) {
529 CERROR("found %s (%lu/%u) is mode %o\n", name,
530 dchild->d_inode->i_ino,
531 dchild->d_inode->i_generation, old_mode);
532 GOTO(out_dput, err = -EISDIR);
535 newattrs.ia_size = length;
536 newattrs.ia_valid = ATTR_SIZE;
537 err = l_notify_change(mnt, dchild, &newattrs);
545 EXPORT_SYMBOL(simple_truncate);
547 #ifdef LUSTRE_KERNEL_VERSION
548 #ifndef HAVE_CLEAR_RDONLY_ON_PUT
549 #error rdonly patchset must be updated [cfs bz11248]
552 void dev_set_rdonly(lvfs_sbdev_type dev);
553 int dev_check_rdonly(lvfs_sbdev_type dev);
555 void __lvfs_set_rdonly(lvfs_sbdev_type dev, lvfs_sbdev_type jdev)
557 lvfs_sbdev_sync(dev);
558 if (jdev && (jdev != dev)) {
559 CDEBUG(D_IOCTL | D_HA, "set journal dev %lx rdonly\n",
561 dev_set_rdonly(jdev);
563 CDEBUG(D_IOCTL | D_HA, "set dev %lx rdonly\n", (long)dev);
567 int lvfs_check_rdonly(lvfs_sbdev_type dev)
569 return dev_check_rdonly(dev);
572 EXPORT_SYMBOL(__lvfs_set_rdonly);
573 EXPORT_SYMBOL(lvfs_check_rdonly);
574 #endif /* LUSTRE_KERNEL_VERSION */
576 int lvfs_check_io_health(struct obd_device *obd, struct file *file)
578 char *write_page = NULL;
583 OBD_ALLOC(write_page, CFS_PAGE_SIZE);
587 rc = fsfilt_write_record(obd, file, write_page, CFS_PAGE_SIZE, &offset, 1);
589 OBD_FREE(write_page, CFS_PAGE_SIZE);
591 CDEBUG(D_INFO, "write 1 page synchronously for checking io rc %d\n",rc);
594 EXPORT_SYMBOL(lvfs_check_io_health);
596 MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
597 MODULE_DESCRIPTION("Lustre VFS Filesystem Helper v0.1");
598 MODULE_LICENSE("GPL");