1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see [sun.com URL with a
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/lvfs/lvfs_linux.c
38 * Author: Andreas Dilger <adilger@clusterfs.com>
42 # define EXPORT_SYMTAB
45 #define DEBUG_SUBSYSTEM S_FILTER
47 #include <linux/version.h>
49 #include <asm/unistd.h>
50 #include <linux/slab.h>
51 #include <linux/pagemap.h>
52 #include <linux/quotaops.h>
53 #include <linux/version.h>
54 #include <libcfs/libcfs.h>
55 #include <lustre_fsfilt.h>
57 #include <linux/module.h>
58 #include <linux/init.h>
59 #include <linux/lustre_compat25.h>
61 #include "lvfs_internal.h"
64 #include <lustre_lib.h>
65 #include <lustre_quota.h>
67 __u64 obd_max_pages = 0;
68 __u64 obd_max_alloc = 0;
69 struct lprocfs_stats *obd_memory = NULL;
70 spinlock_t obd_updatemax_lock = SPIN_LOCK_UNLOCKED;
71 /* refine later and change to seqlock or simlar from libcfs */
73 /* Debugging check only needed during development */
75 # define ASSERT_CTXT_MAGIC(magic) LASSERT((magic) == OBD_RUN_CTXT_MAGIC)
76 # define ASSERT_NOT_KERNEL_CTXT(msg) LASSERTF(!segment_eq(get_fs(), get_ds()),\
78 # define ASSERT_KERNEL_CTXT(msg) LASSERTF(segment_eq(get_fs(), get_ds()), msg)
80 # define ASSERT_CTXT_MAGIC(magic) do {} while(0)
81 # define ASSERT_NOT_KERNEL_CTXT(msg) do {} while(0)
82 # define ASSERT_KERNEL_CTXT(msg) do {} while(0)
85 static void push_group_info(struct lvfs_run_ctxt *save,
86 struct group_info *ginfo)
89 save->ngroups = current_ngroups;
93 save->group_info = current->group_info;
94 current->group_info = ginfo;
99 static void pop_group_info(struct lvfs_run_ctxt *save,
100 struct group_info *ginfo)
103 current_ngroups = save->ngroups;
106 current->group_info = save->group_info;
107 task_unlock(current);
111 /* push / pop to root of obd store */
112 void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx,
113 struct lvfs_ucred *uc)
115 //ASSERT_NOT_KERNEL_CTXT("already in kernel context!\n");
116 ASSERT_CTXT_MAGIC(new_ctx->magic);
117 OBD_SET_CTXT_MAGIC(save);
121 "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n",
122 save, current, current->fs, current->fs->pwd,
123 atomic_read(¤t->fs->pwd->d_count),
124 atomic_read(¤t->fs->pwd->d_inode->i_count),
125 current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
127 atomic_read(¤t->fs->pwdmnt->mnt_count));
131 LASSERT(atomic_read(¤t->fs->pwd->d_count));
132 LASSERT(atomic_read(&new_ctx->pwd->d_count));
133 save->pwd = dget(current->fs->pwd);
134 save->pwdmnt = mntget(current->fs->pwdmnt);
135 save->luc.luc_umask = current->fs->umask;
136 save->ngroups = current->group_info->ngroups;
139 LASSERT(save->pwdmnt);
140 LASSERT(new_ctx->pwd);
141 LASSERT(new_ctx->pwdmnt);
144 save->luc.luc_uid = current->uid;
145 save->luc.luc_gid = current->gid;
146 save->luc.luc_fsuid = current->fsuid;
147 save->luc.luc_fsgid = current->fsgid;
148 save->luc.luc_cap = current->cap_effective;
150 current->uid = uc->luc_uid;
151 current->gid = uc->luc_gid;
152 current->fsuid = uc->luc_fsuid;
153 current->fsgid = uc->luc_fsgid;
154 current->cap_effective = uc->luc_cap;
156 push_group_info(save,
158 uc->luc_identity ? uc->luc_identity->mi_ginfo :
161 current->fs->umask = 0; /* umask already applied on client */
163 ll_set_fs_pwd(current->fs, new_ctx->pwdmnt, new_ctx->pwd);
167 "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n",
168 new_ctx, current, current->fs, current->fs->pwd,
169 atomic_read(¤t->fs->pwd->d_count),
170 atomic_read(¤t->fs->pwd->d_inode->i_count),
171 current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
173 atomic_read(¤t->fs->pwdmnt->mnt_count));
176 EXPORT_SYMBOL(push_ctxt);
178 void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx,
179 struct lvfs_ucred *uc)
182 ASSERT_CTXT_MAGIC(saved->magic);
184 ASSERT_KERNEL_CTXT("popping non-kernel context!\n");
188 " = pop %p==%p = cur %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n",
189 new_ctx, current, current->fs, current->fs->pwd,
190 atomic_read(¤t->fs->pwd->d_count),
191 atomic_read(¤t->fs->pwd->d_inode->i_count),
192 current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
194 atomic_read(¤t->fs->pwdmnt->mnt_count));
197 LASSERTF(current->fs->pwd == new_ctx->pwd, "%p != %p\n",
198 current->fs->pwd, new_ctx->pwd);
199 LASSERTF(current->fs->pwdmnt == new_ctx->pwdmnt, "%p != %p\n",
200 current->fs->pwdmnt, new_ctx->pwdmnt);
203 ll_set_fs_pwd(current->fs, saved->pwdmnt, saved->pwd);
206 mntput(saved->pwdmnt);
207 current->fs->umask = saved->luc.luc_umask;
209 current->uid = saved->luc.luc_uid;
210 current->gid = saved->luc.luc_gid;
211 current->fsuid = saved->luc.luc_fsuid;
212 current->fsgid = saved->luc.luc_fsgid;
213 current->cap_effective = saved->luc.luc_cap;
214 pop_group_info(saved,
216 uc->luc_identity ? uc->luc_identity->mi_ginfo :
222 "= pop %p->%p = cur fs %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n",
223 saved, current, current->fs, current->fs->pwd,
224 atomic_read(¤t->fs->pwd->d_count),
225 atomic_read(¤t->fs->pwd->d_inode->i_count),
226 current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
228 atomic_read(¤t->fs->pwdmnt->mnt_count));
231 EXPORT_SYMBOL(pop_ctxt);
233 /* utility to make a file */
234 struct dentry *simple_mknod(struct dentry *dir, char *name, int mode, int fix)
236 struct dentry *dchild;
240 // ASSERT_KERNEL_CTXT("kernel doing mknod outside kernel context\n");
241 CDEBUG(D_INODE, "creating file %.*s\n", (int)strlen(name), name);
243 dchild = ll_lookup_one_len(name, dir, strlen(name));
245 GOTO(out_up, dchild);
247 if (dchild->d_inode) {
248 int old_mode = dchild->d_inode->i_mode;
249 if (!S_ISREG(old_mode))
250 GOTO(out_err, err = -EEXIST);
252 /* Fixup file permissions if necessary */
253 if (fix && (old_mode & S_IALLUGO) != (mode & S_IALLUGO)) {
254 CWARN("fixing permissions on %s from %o to %o\n",
255 name, old_mode, mode);
256 dchild->d_inode->i_mode = (mode & S_IALLUGO) |
257 (old_mode & ~S_IALLUGO);
258 mark_inode_dirty(dchild->d_inode);
260 GOTO(out_up, dchild);
263 err = ll_vfs_create(dir->d_inode, dchild, (mode & ~S_IFMT) | S_IFREG,
272 dchild = ERR_PTR(err);
276 EXPORT_SYMBOL(simple_mknod);
278 /* utility to make a directory */
279 struct dentry *simple_mkdir(struct dentry *dir, struct vfsmount *mnt,
280 char *name, int mode, int fix)
282 struct dentry *dchild;
286 // ASSERT_KERNEL_CTXT("kernel doing mkdir outside kernel context\n");
287 CDEBUG(D_INODE, "creating directory %.*s\n", (int)strlen(name), name);
288 dchild = ll_lookup_one_len(name, dir, strlen(name));
290 GOTO(out_up, dchild);
292 if (dchild->d_inode) {
293 int old_mode = dchild->d_inode->i_mode;
294 if (!S_ISDIR(old_mode)) {
295 CERROR("found %s (%lu/%u) is mode %o\n", name,
296 dchild->d_inode->i_ino,
297 dchild->d_inode->i_generation, old_mode);
298 GOTO(out_err, err = -ENOTDIR);
301 /* Fixup directory permissions if necessary */
302 if (fix && (old_mode & S_IALLUGO) != (mode & S_IALLUGO)) {
304 "fixing permissions on %s from %o to %o\n",
305 name, old_mode, mode);
306 dchild->d_inode->i_mode = (mode & S_IALLUGO) |
307 (old_mode & ~S_IALLUGO);
308 mark_inode_dirty(dchild->d_inode);
310 GOTO(out_up, dchild);
313 err = ll_vfs_mkdir(dir->d_inode, dchild, mnt, mode);
321 dchild = ERR_PTR(err);
325 EXPORT_SYMBOL(simple_mkdir);
327 /* utility to rename a file */
328 int lustre_rename(struct dentry *dir, struct vfsmount *mnt,
329 char *oldname, char *newname)
331 struct dentry *dchild_old, *dchild_new;
335 ASSERT_KERNEL_CTXT("kernel doing rename outside kernel context\n");
336 CDEBUG(D_INODE, "renaming file %.*s to %.*s\n",
337 (int)strlen(oldname), oldname, (int)strlen(newname), newname);
339 dchild_old = ll_lookup_one_len(oldname, dir, strlen(oldname));
340 if (IS_ERR(dchild_old))
341 RETURN(PTR_ERR(dchild_old));
343 if (!dchild_old->d_inode)
344 GOTO(put_old, err = -ENOENT);
346 dchild_new = ll_lookup_one_len(newname, dir, strlen(newname));
347 if (IS_ERR(dchild_new))
348 GOTO(put_old, err = PTR_ERR(dchild_new));
350 err = ll_vfs_rename(dir->d_inode, dchild_old, mnt,
351 dir->d_inode, dchild_new, mnt);
358 EXPORT_SYMBOL(lustre_rename);
361 * Read a file from within kernel context. Prior to calling this
362 * function we should already have done a push_ctxt().
364 int lustre_fread(struct file *file, void *buf, int len, loff_t *off)
366 ASSERT_KERNEL_CTXT("kernel doing read outside kernel context\n");
367 if (!file || !file->f_op || !file->f_op->read || !off)
370 return file->f_op->read(file, buf, len, off);
372 EXPORT_SYMBOL(lustre_fread);
375 * Write a file from within kernel context. Prior to calling this
376 * function we should already have done a push_ctxt().
378 int lustre_fwrite(struct file *file, const void *buf, int len, loff_t *off)
381 ASSERT_KERNEL_CTXT("kernel doing write outside kernel context\n");
389 if (!file->f_op->write)
392 RETURN(file->f_op->write(file, buf, len, off));
394 EXPORT_SYMBOL(lustre_fwrite);
397 * Sync a file from within kernel context. Prior to calling this
398 * function we should already have done a push_ctxt().
400 int lustre_fsync(struct file *file)
403 ASSERT_KERNEL_CTXT("kernel doing sync outside kernel context\n");
404 if (!file || !file->f_op || !file->f_op->fsync)
407 RETURN(file->f_op->fsync(file, file->f_dentry, 0));
409 EXPORT_SYMBOL(lustre_fsync);
411 struct l_file *l_dentry_open(struct lvfs_run_ctxt *ctxt, struct l_dentry *de,
414 mntget(ctxt->pwdmnt);
415 return dentry_open(de, ctxt->pwdmnt, flags);
417 EXPORT_SYMBOL(l_dentry_open);
419 #ifdef HAVE_VFS_READDIR_U64_INO
420 static int l_filldir(void *__buf, const char *name, int namlen, loff_t offset,
421 u64 ino, unsigned int d_type)
423 static int l_filldir(void *__buf, const char *name, int namlen, loff_t offset,
424 ino_t ino, unsigned int d_type)
427 struct l_linux_dirent *dirent;
428 struct l_readdir_callback *buf = (struct l_readdir_callback *)__buf;
430 dirent = buf->lrc_dirent;
432 dirent->lld_off = offset;
434 OBD_ALLOC(dirent, sizeof(*dirent));
439 list_add_tail(&dirent->lld_list, buf->lrc_list);
441 buf->lrc_dirent = dirent;
442 dirent->lld_ino = ino;
443 LASSERT(sizeof(dirent->lld_name) >= namlen + 1);
444 memcpy(dirent->lld_name, name, namlen);
449 long l_readdir(struct file *file, struct list_head *dentry_list)
451 struct l_linux_dirent *lastdirent;
452 struct l_readdir_callback buf;
455 buf.lrc_dirent = NULL;
456 buf.lrc_list = dentry_list;
458 error = vfs_readdir(file, l_filldir, &buf);
462 lastdirent = buf.lrc_dirent;
464 lastdirent->lld_off = file->f_pos;
468 EXPORT_SYMBOL(l_readdir);
470 #ifdef LUSTRE_KERNEL_VERSION
471 #ifndef HAVE_CLEAR_RDONLY_ON_PUT
472 #error rdonly patchset must be updated [cfs bz11248]
474 void dev_set_rdonly(lvfs_sbdev_type dev);
475 int dev_check_rdonly(lvfs_sbdev_type dev);
477 void __lvfs_set_rdonly(lvfs_sbdev_type dev, lvfs_sbdev_type jdev)
479 lvfs_sbdev_sync(dev);
480 if (jdev && (jdev != dev)) {
481 CDEBUG(D_IOCTL | D_HA, "set journal dev %lx rdonly\n",
483 dev_set_rdonly(jdev);
485 CDEBUG(D_IOCTL | D_HA, "set dev %lx rdonly\n", (long)dev);
489 int lvfs_check_rdonly(lvfs_sbdev_type dev)
491 return dev_check_rdonly(dev);
494 EXPORT_SYMBOL(__lvfs_set_rdonly);
495 EXPORT_SYMBOL(lvfs_check_rdonly);
497 int lvfs_check_io_health(struct obd_device *obd, struct file *file)
499 char *write_page = NULL;
504 OBD_ALLOC(write_page, CFS_PAGE_SIZE);
508 rc = fsfilt_write_record(obd, file, write_page, CFS_PAGE_SIZE, &offset, 1);
510 OBD_FREE(write_page, CFS_PAGE_SIZE);
512 CDEBUG(D_INFO, "write 1 page synchronously for checking io rc %d\n",rc);
515 EXPORT_SYMBOL(lvfs_check_io_health);
516 #endif /* LUSTRE_KERNEL_VERSION */
518 void obd_update_maxusage()
522 max1 = obd_pages_sum();
523 max2 = obd_memory_sum();
525 spin_lock(&obd_updatemax_lock);
526 if (max1 > obd_max_pages)
527 obd_max_pages = max1;
528 if (max2 > obd_max_alloc)
529 obd_max_alloc = max2;
530 spin_unlock(&obd_updatemax_lock);
534 __u64 obd_memory_max(void)
538 spin_lock(&obd_updatemax_lock);
540 spin_unlock(&obd_updatemax_lock);
545 __u64 obd_pages_max(void)
549 spin_lock(&obd_updatemax_lock);
551 spin_unlock(&obd_updatemax_lock);
556 EXPORT_SYMBOL(obd_update_maxusage);
557 EXPORT_SYMBOL(obd_pages_max);
558 EXPORT_SYMBOL(obd_memory_max);
559 EXPORT_SYMBOL(obd_memory);
562 __s64 lprocfs_read_helper(struct lprocfs_counter *lc,
563 enum lprocfs_fields_flags field)
571 centry = atomic_read(&lc->lc_cntl.la_entry);
574 case LPROCFS_FIELDS_FLAGS_CONFIG:
577 case LPROCFS_FIELDS_FLAGS_SUM:
580 case LPROCFS_FIELDS_FLAGS_MIN:
583 case LPROCFS_FIELDS_FLAGS_MAX:
586 case LPROCFS_FIELDS_FLAGS_AVG:
587 ret = (lc->lc_max - lc->lc_min)/2;
589 case LPROCFS_FIELDS_FLAGS_SUMSQUARE:
590 ret = lc->lc_sumsquare;
592 case LPROCFS_FIELDS_FLAGS_COUNT:
598 } while (centry != atomic_read(&lc->lc_cntl.la_entry) &&
599 centry != atomic_read(&lc->lc_cntl.la_exit));
603 EXPORT_SYMBOL(lprocfs_read_helper);
606 MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
607 MODULE_DESCRIPTION("Lustre VFS Filesystem Helper v0.1");
608 MODULE_LICENSE("GPL");