4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2012, Whamcloud, Inc.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/lvfs/lvfs_linux.c
38 * Author: Andreas Dilger <adilger@clusterfs.com>
41 #define DEBUG_SUBSYSTEM S_FILTER
43 #include <linux/version.h>
45 #include <asm/unistd.h>
46 #include <linux/slab.h>
47 #include <linux/pagemap.h>
48 #include <linux/quotaops.h>
49 #include <linux/version.h>
50 #include <libcfs/libcfs.h>
51 #include <lustre_fsfilt.h>
53 #include <linux/module.h>
54 #include <linux/init.h>
55 #include <linux/lustre_compat25.h>
57 #include "lvfs_internal.h"
60 #include <lustre_lib.h>
61 #include <lustre_quota.h>
63 __u64 obd_max_pages = 0;
64 __u64 obd_max_alloc = 0;
65 struct lprocfs_stats *obd_memory = NULL;
66 EXPORT_SYMBOL(obd_memory);
67 DEFINE_SPINLOCK(obd_updatemax_lock);
68 /* refine later and change to seqlock or simlar from libcfs */
70 /* Debugging check only needed during development */
72 # define ASSERT_CTXT_MAGIC(magic) LASSERT((magic) == OBD_RUN_CTXT_MAGIC)
73 # define ASSERT_NOT_KERNEL_CTXT(msg) LASSERTF(!segment_eq(get_fs(), get_ds()),\
75 # define ASSERT_KERNEL_CTXT(msg) LASSERTF(segment_eq(get_fs(), get_ds()), msg)
77 # define ASSERT_CTXT_MAGIC(magic) do {} while(0)
78 # define ASSERT_NOT_KERNEL_CTXT(msg) do {} while(0)
79 # define ASSERT_KERNEL_CTXT(msg) do {} while(0)
82 static void push_group_info(struct lvfs_run_ctxt *save,
83 struct group_info *ginfo)
86 save->ngroups = current_ngroups;
91 save->group_info = current_cred()->group_info;
92 if ((cred = prepare_creds())) {
93 cred->group_info = ginfo;
100 static void pop_group_info(struct lvfs_run_ctxt *save,
101 struct group_info *ginfo)
104 current_ngroups = save->ngroups;
108 if ((cred = prepare_creds())) {
109 cred->group_info = save->group_info;
112 task_unlock(current);
116 /* push / pop to root of obd store */
117 void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx,
118 struct lvfs_ucred *uc)
120 /* if there is underlaying dt_device then push_ctxt is not needed */
121 if (new_ctx->dt != NULL)
124 //ASSERT_NOT_KERNEL_CTXT("already in kernel context!\n");
125 ASSERT_CTXT_MAGIC(new_ctx->magic);
126 OBD_SET_CTXT_MAGIC(save);
129 LASSERT(d_refcount(cfs_fs_pwd(current->fs)));
130 LASSERT(d_refcount(new_ctx->pwd));
131 save->pwd = dget(cfs_fs_pwd(current->fs));
132 save->pwdmnt = mntget(cfs_fs_mnt(current->fs));
133 save->luc.luc_umask = cfs_curproc_umask();
134 save->ngroups = current_cred()->group_info->ngroups;
137 LASSERT(save->pwdmnt);
138 LASSERT(new_ctx->pwd);
139 LASSERT(new_ctx->pwdmnt);
143 save->luc.luc_uid = current_uid();
144 save->luc.luc_gid = current_gid();
145 save->luc.luc_fsuid = current_fsuid();
146 save->luc.luc_fsgid = current_fsgid();
147 save->luc.luc_cap = current_cap();
149 if ((cred = prepare_creds())) {
150 cred->uid = uc->luc_uid;
151 cred->gid = uc->luc_gid;
152 cred->fsuid = uc->luc_fsuid;
153 cred->fsgid = uc->luc_fsgid;
154 cred->cap_effective = uc->luc_cap;
158 push_group_info(save,
160 uc->luc_identity ? uc->luc_identity->mi_ginfo :
163 current->fs->umask = 0; /* umask already applied on client */
165 ll_set_fs_pwd(current->fs, new_ctx->pwdmnt, new_ctx->pwd);
167 EXPORT_SYMBOL(push_ctxt);
169 void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx,
170 struct lvfs_ucred *uc)
172 /* if there is underlaying dt_device then pop_ctxt is not needed */
173 if (new_ctx->dt != NULL)
176 ASSERT_CTXT_MAGIC(saved->magic);
177 ASSERT_KERNEL_CTXT("popping non-kernel context!\n");
179 LASSERTF(cfs_fs_pwd(current->fs) == new_ctx->pwd, "%p != %p\n",
180 cfs_fs_pwd(current->fs), new_ctx->pwd);
181 LASSERTF(cfs_fs_mnt(current->fs) == new_ctx->pwdmnt, "%p != %p\n",
182 cfs_fs_mnt(current->fs), new_ctx->pwdmnt);
185 ll_set_fs_pwd(current->fs, saved->pwdmnt, saved->pwd);
188 mntput(saved->pwdmnt);
189 current->fs->umask = saved->luc.luc_umask;
192 if ((cred = prepare_creds())) {
193 cred->uid = saved->luc.luc_uid;
194 cred->gid = saved->luc.luc_gid;
195 cred->fsuid = saved->luc.luc_fsuid;
196 cred->fsgid = saved->luc.luc_fsgid;
197 cred->cap_effective = saved->luc.luc_cap;
201 pop_group_info(saved,
203 uc->luc_identity ? uc->luc_identity->mi_ginfo :
207 EXPORT_SYMBOL(pop_ctxt);
209 /* utility to make a file */
210 struct dentry *simple_mknod(struct dentry *dir, char *name, int mode, int fix)
212 struct dentry *dchild;
216 // ASSERT_KERNEL_CTXT("kernel doing mknod outside kernel context\n");
217 CDEBUG(D_INODE, "creating file %.*s\n", (int)strlen(name), name);
219 dchild = ll_lookup_one_len(name, dir, strlen(name));
221 GOTO(out_up, dchild);
223 if (dchild->d_inode) {
224 int old_mode = dchild->d_inode->i_mode;
225 if (!S_ISREG(old_mode))
226 GOTO(out_err, err = -EEXIST);
228 /* Fixup file permissions if necessary */
229 if (fix && (old_mode & S_IALLUGO) != (mode & S_IALLUGO)) {
230 CWARN("fixing permissions on %s from %o to %o\n",
231 name, old_mode, mode);
232 dchild->d_inode->i_mode = (mode & S_IALLUGO) |
233 (old_mode & ~S_IALLUGO);
234 mark_inode_dirty(dchild->d_inode);
236 GOTO(out_up, dchild);
239 err = ll_vfs_create(dir->d_inode, dchild, (mode & ~S_IFMT) | S_IFREG,
248 dchild = ERR_PTR(err);
252 EXPORT_SYMBOL(simple_mknod);
254 /* utility to make a directory */
255 struct dentry *simple_mkdir(struct dentry *dir, struct vfsmount *mnt,
256 const char *name, int mode, int fix)
258 struct dentry *dchild;
262 // ASSERT_KERNEL_CTXT("kernel doing mkdir outside kernel context\n");
263 CDEBUG(D_INODE, "creating directory %.*s\n", (int)strlen(name), name);
264 dchild = ll_lookup_one_len(name, dir, strlen(name));
266 GOTO(out_up, dchild);
268 if (dchild->d_inode) {
269 int old_mode = dchild->d_inode->i_mode;
270 if (!S_ISDIR(old_mode)) {
271 CERROR("found %s (%lu/%u) is mode %o\n", name,
272 dchild->d_inode->i_ino,
273 dchild->d_inode->i_generation, old_mode);
274 GOTO(out_err, err = -ENOTDIR);
277 /* Fixup directory permissions if necessary */
278 if (fix && (old_mode & S_IALLUGO) != (mode & S_IALLUGO)) {
280 "fixing permissions on %s from %o to %o\n",
281 name, old_mode, mode);
282 dchild->d_inode->i_mode = (mode & S_IALLUGO) |
283 (old_mode & ~S_IALLUGO);
284 mark_inode_dirty(dchild->d_inode);
286 GOTO(out_up, dchild);
289 err = ll_vfs_mkdir(dir->d_inode, dchild, mnt, mode);
297 dchild = ERR_PTR(err);
301 EXPORT_SYMBOL(simple_mkdir);
303 /* utility to rename a file */
304 int lustre_rename(struct dentry *dir, struct vfsmount *mnt,
305 char *oldname, char *newname)
307 struct dentry *dchild_old, *dchild_new;
311 ASSERT_KERNEL_CTXT("kernel doing rename outside kernel context\n");
312 CDEBUG(D_INODE, "renaming file %.*s to %.*s\n",
313 (int)strlen(oldname), oldname, (int)strlen(newname), newname);
315 dchild_old = ll_lookup_one_len(oldname, dir, strlen(oldname));
316 if (IS_ERR(dchild_old))
317 RETURN(PTR_ERR(dchild_old));
319 if (!dchild_old->d_inode)
320 GOTO(put_old, err = -ENOENT);
322 dchild_new = ll_lookup_one_len(newname, dir, strlen(newname));
323 if (IS_ERR(dchild_new))
324 GOTO(put_old, err = PTR_ERR(dchild_new));
326 err = ll_vfs_rename(dir->d_inode, dchild_old, mnt,
327 dir->d_inode, dchild_new, mnt);
334 EXPORT_SYMBOL(lustre_rename);
337 * Read a file from within kernel context. Prior to calling this
338 * function we should already have done a push_ctxt().
340 int lustre_fread(struct file *file, void *buf, int len, loff_t *off)
342 ASSERT_KERNEL_CTXT("kernel doing read outside kernel context\n");
343 if (!file || !file->f_op || !file->f_op->read || !off)
346 return file->f_op->read(file, buf, len, off);
348 EXPORT_SYMBOL(lustre_fread);
351 * Write a file from within kernel context. Prior to calling this
352 * function we should already have done a push_ctxt().
354 int lustre_fwrite(struct file *file, const void *buf, int len, loff_t *off)
357 ASSERT_KERNEL_CTXT("kernel doing write outside kernel context\n");
365 if (!file->f_op->write)
368 RETURN(file->f_op->write(file, buf, len, off));
370 EXPORT_SYMBOL(lustre_fwrite);
373 * Sync a file from within kernel context. Prior to calling this
374 * function we should already have done a push_ctxt().
376 int lustre_fsync(struct file *file)
379 ASSERT_KERNEL_CTXT("kernel doing sync outside kernel context\n");
380 if (!file || !file->f_op || !file->f_op->fsync)
383 RETURN(cfs_do_fsync(file, 0));
385 EXPORT_SYMBOL(lustre_fsync);
387 /* Note: dput(dchild) will be called if there is an error */
388 struct l_file *l_dentry_open(struct lvfs_run_ctxt *ctxt, struct l_dentry *de,
391 mntget(ctxt->pwdmnt);
392 return ll_dentry_open(de, ctxt->pwdmnt, flags, current_cred());
394 EXPORT_SYMBOL(l_dentry_open);
396 static int l_filldir(void *__buf, const char *name, int namlen, loff_t offset,
397 u64 ino, unsigned int d_type)
399 struct l_linux_dirent *dirent;
400 struct l_readdir_callback *buf = (struct l_readdir_callback *)__buf;
402 dirent = buf->lrc_dirent;
404 dirent->lld_off = offset;
406 OBD_ALLOC(dirent, sizeof(*dirent));
411 cfs_list_add_tail(&dirent->lld_list, buf->lrc_list);
413 buf->lrc_dirent = dirent;
414 dirent->lld_ino = ino;
415 LASSERT(sizeof(dirent->lld_name) >= namlen + 1);
416 memcpy(dirent->lld_name, name, namlen);
421 long l_readdir(struct file *file, cfs_list_t *dentry_list)
423 struct l_linux_dirent *lastdirent;
424 struct l_readdir_callback buf;
427 buf.lrc_dirent = NULL;
428 buf.lrc_list = dentry_list;
430 error = vfs_readdir(file, l_filldir, &buf);
434 lastdirent = buf.lrc_dirent;
436 lastdirent->lld_off = file->f_pos;
440 EXPORT_SYMBOL(l_readdir);
442 int l_notify_change(struct vfsmount *mnt, struct dentry *dchild,
443 struct iattr *newattrs)
447 mutex_lock(&dchild->d_inode->i_mutex);
448 #ifdef HAVE_SECURITY_PLUG
449 rc = notify_change(dchild, mnt, newattrs);
451 rc = notify_change(dchild, newattrs);
453 mutex_unlock(&dchild->d_inode->i_mutex);
456 EXPORT_SYMBOL(l_notify_change);
458 /* utility to truncate a file */
459 int simple_truncate(struct dentry *dir, struct vfsmount *mnt,
460 char *name, loff_t length)
462 struct dentry *dchild;
463 struct iattr newattrs;
467 CDEBUG(D_INODE, "truncating file %.*s to %lld\n", (int)strlen(name),
468 name, (long long)length);
469 dchild = ll_lookup_one_len(name, dir, strlen(name));
471 GOTO(out, err = PTR_ERR(dchild));
473 if (dchild->d_inode) {
474 int old_mode = dchild->d_inode->i_mode;
475 if (S_ISDIR(old_mode)) {
476 CERROR("found %s (%lu/%u) is mode %o\n", name,
477 dchild->d_inode->i_ino,
478 dchild->d_inode->i_generation, old_mode);
479 GOTO(out_dput, err = -EISDIR);
482 newattrs.ia_size = length;
483 newattrs.ia_valid = ATTR_SIZE;
484 err = l_notify_change(mnt, dchild, &newattrs);
492 EXPORT_SYMBOL(simple_truncate);
494 int __lvfs_set_rdonly(lvfs_sbdev_type dev, lvfs_sbdev_type jdev)
496 #ifdef HAVE_DEV_SET_RDONLY
497 if (jdev && (jdev != dev)) {
498 CDEBUG(D_IOCTL | D_HA, "set journal dev %lx rdonly\n",
500 dev_set_rdonly(jdev);
502 CDEBUG(D_IOCTL | D_HA, "set dev %lx rdonly\n", (long)dev);
507 CERROR("DEV %lx CANNOT BE SET READONLY\n", (long)dev);
512 EXPORT_SYMBOL(__lvfs_set_rdonly);
514 int lvfs_check_rdonly(lvfs_sbdev_type dev)
516 #ifdef HAVE_DEV_SET_RDONLY
517 return dev_check_rdonly(dev);
522 EXPORT_SYMBOL(lvfs_check_rdonly);
524 int lvfs_check_io_health(struct obd_device *obd, struct file *file)
526 char *write_page = NULL;
531 OBD_ALLOC(write_page, CFS_PAGE_SIZE);
535 rc = fsfilt_write_record(obd, file, write_page, CFS_PAGE_SIZE, &offset, 1);
537 OBD_FREE(write_page, CFS_PAGE_SIZE);
539 CDEBUG(D_INFO, "write 1 page synchronously for checking io rc %d\n",rc);
542 EXPORT_SYMBOL(lvfs_check_io_health);
544 void obd_update_maxusage()
548 max1 = obd_pages_sum();
549 max2 = obd_memory_sum();
551 cfs_spin_lock(&obd_updatemax_lock);
552 if (max1 > obd_max_pages)
553 obd_max_pages = max1;
554 if (max2 > obd_max_alloc)
555 obd_max_alloc = max2;
556 cfs_spin_unlock(&obd_updatemax_lock);
559 EXPORT_SYMBOL(obd_update_maxusage);
561 __u64 obd_memory_max(void)
565 cfs_spin_lock(&obd_updatemax_lock);
567 cfs_spin_unlock(&obd_updatemax_lock);
571 EXPORT_SYMBOL(obd_memory_max);
573 __u64 obd_pages_max(void)
577 cfs_spin_lock(&obd_updatemax_lock);
579 cfs_spin_unlock(&obd_updatemax_lock);
583 EXPORT_SYMBOL(obd_pages_max);
586 __s64 lprocfs_read_helper(struct lprocfs_counter *lc,
587 enum lprocfs_fields_flags field)
595 centry = cfs_atomic_read(&lc->lc_cntl.la_entry);
598 case LPROCFS_FIELDS_FLAGS_CONFIG:
601 case LPROCFS_FIELDS_FLAGS_SUM:
602 ret = lc->lc_sum + lc->lc_sum_irq;
604 case LPROCFS_FIELDS_FLAGS_MIN:
607 case LPROCFS_FIELDS_FLAGS_MAX:
610 case LPROCFS_FIELDS_FLAGS_AVG:
611 ret = (lc->lc_max - lc->lc_min)/2;
613 case LPROCFS_FIELDS_FLAGS_SUMSQUARE:
614 ret = lc->lc_sumsquare;
616 case LPROCFS_FIELDS_FLAGS_COUNT:
622 } while (centry != cfs_atomic_read(&lc->lc_cntl.la_entry) &&
623 centry != cfs_atomic_read(&lc->lc_cntl.la_exit));
627 EXPORT_SYMBOL(lprocfs_read_helper);
630 MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
631 MODULE_DESCRIPTION("Lustre VFS Filesystem Helper v0.1");
632 MODULE_LICENSE("GPL");