Whamcloud - gitweb
LU-1347 lvfs: makes EXPORT_SYMBOL follows function body
[fs/lustre-release.git] / lustre / lvfs / lvfs_linux.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2011, 2012, Whamcloud, Inc.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/lvfs/lvfs_linux.c
37  *
38  * Author: Andreas Dilger <adilger@clusterfs.com>
39  */
40
41 #ifndef EXPORT_SYMTAB
42 # define EXPORT_SYMTAB
43 #endif
44
45 #define DEBUG_SUBSYSTEM S_FILTER
46
47 #include <linux/version.h>
48 #include <linux/fs.h>
49 #include <asm/unistd.h>
50 #include <linux/slab.h>
51 #include <linux/pagemap.h>
52 #include <linux/quotaops.h>
53 #include <linux/version.h>
54 #include <libcfs/libcfs.h>
55 #include <lustre_fsfilt.h>
56 #include <obd.h>
57 #include <linux/module.h>
58 #include <linux/init.h>
59 #include <linux/lustre_compat25.h>
60 #include <lvfs.h>
61 #include "lvfs_internal.h"
62
63 #include <obd.h>
64 #include <lustre_lib.h>
65 #include <lustre_quota.h>
66
67 __u64 obd_max_pages = 0;
68 __u64 obd_max_alloc = 0;
69 struct lprocfs_stats *obd_memory = NULL;
70 EXPORT_SYMBOL(obd_memory);
71 cfs_spinlock_t obd_updatemax_lock = CFS_SPIN_LOCK_UNLOCKED;
72 /* refine later and change to seqlock or simlar from libcfs */
73
74 /* Debugging check only needed during development */
75 #ifdef OBD_CTXT_DEBUG
76 # define ASSERT_CTXT_MAGIC(magic) LASSERT((magic) == OBD_RUN_CTXT_MAGIC)
77 # define ASSERT_NOT_KERNEL_CTXT(msg) LASSERTF(!segment_eq(get_fs(), get_ds()),\
78                                               msg)
79 # define ASSERT_KERNEL_CTXT(msg) LASSERTF(segment_eq(get_fs(), get_ds()), msg)
80 #else
81 # define ASSERT_CTXT_MAGIC(magic) do {} while(0)
82 # define ASSERT_NOT_KERNEL_CTXT(msg) do {} while(0)
83 # define ASSERT_KERNEL_CTXT(msg) do {} while(0)
84 #endif
85
86 static void push_group_info(struct lvfs_run_ctxt *save,
87                             struct group_info *ginfo)
88 {
89         if (!ginfo) {
90                 save->ngroups = current_ngroups;
91                 current_ngroups = 0;
92         } else {
93                 struct cred *cred;
94                 task_lock(current);
95                 save->group_info = current_cred()->group_info;
96                 if ((cred = prepare_creds())) {
97                         cred->group_info = ginfo;
98                         commit_creds(cred);
99                 }
100                 task_unlock(current);
101         }
102 }
103
104 static void pop_group_info(struct lvfs_run_ctxt *save,
105                            struct group_info *ginfo)
106 {
107         if (!ginfo) {
108                 current_ngroups = save->ngroups;
109         } else {
110                 struct cred *cred;
111                 task_lock(current);
112                 if ((cred = prepare_creds())) {
113                         cred->group_info = save->group_info;
114                         commit_creds(cred);
115                 }
116                 task_unlock(current);
117         }
118 }
119
120 /* push / pop to root of obd store */
121 void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx,
122                struct lvfs_ucred *uc)
123 {
124         //ASSERT_NOT_KERNEL_CTXT("already in kernel context!\n");
125         ASSERT_CTXT_MAGIC(new_ctx->magic);
126         OBD_SET_CTXT_MAGIC(save);
127
128         save->fs = get_fs();
129         LASSERT(cfs_atomic_read(&cfs_fs_pwd(current->fs)->d_count));
130         LASSERT(cfs_atomic_read(&new_ctx->pwd->d_count));
131         save->pwd = dget(cfs_fs_pwd(current->fs));
132         save->pwdmnt = mntget(cfs_fs_mnt(current->fs));
133         save->luc.luc_umask = cfs_curproc_umask();
134         save->ngroups = current_cred()->group_info->ngroups;
135
136         LASSERT(save->pwd);
137         LASSERT(save->pwdmnt);
138         LASSERT(new_ctx->pwd);
139         LASSERT(new_ctx->pwdmnt);
140
141         if (uc) {
142                 struct cred *cred;
143                 save->luc.luc_uid = current_uid();
144                 save->luc.luc_gid = current_gid();
145                 save->luc.luc_fsuid = current_fsuid();
146                 save->luc.luc_fsgid = current_fsgid();
147                 save->luc.luc_cap = current_cap();
148
149                 if ((cred = prepare_creds())) {
150                         cred->uid = uc->luc_uid;
151                         cred->gid = uc->luc_gid;
152                         cred->fsuid = uc->luc_fsuid;
153                         cred->fsgid = uc->luc_fsgid;
154                         cred->cap_effective = uc->luc_cap;
155                         commit_creds(cred);
156                 }
157
158                 push_group_info(save,
159                                 uc->luc_ginfo ?:
160                                 uc->luc_identity ? uc->luc_identity->mi_ginfo :
161                                                    NULL);
162         }
163         current->fs->umask = 0; /* umask already applied on client */
164         set_fs(new_ctx->fs);
165         ll_set_fs_pwd(current->fs, new_ctx->pwdmnt, new_ctx->pwd);
166 }
167 EXPORT_SYMBOL(push_ctxt);
168
169 void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx,
170               struct lvfs_ucred *uc)
171 {
172         ASSERT_CTXT_MAGIC(saved->magic);
173         ASSERT_KERNEL_CTXT("popping non-kernel context!\n");
174
175         LASSERTF(cfs_fs_pwd(current->fs) == new_ctx->pwd, "%p != %p\n",
176                  cfs_fs_pwd(current->fs), new_ctx->pwd);
177         LASSERTF(cfs_fs_mnt(current->fs) == new_ctx->pwdmnt, "%p != %p\n",
178                  cfs_fs_mnt(current->fs), new_ctx->pwdmnt);
179
180         set_fs(saved->fs);
181         ll_set_fs_pwd(current->fs, saved->pwdmnt, saved->pwd);
182
183         dput(saved->pwd);
184         mntput(saved->pwdmnt);
185         current->fs->umask = saved->luc.luc_umask;
186         if (uc) {
187                 struct cred *cred;
188                 if ((cred = prepare_creds())) {
189                         cred->uid = saved->luc.luc_uid;
190                         cred->gid = saved->luc.luc_gid;
191                         cred->fsuid = saved->luc.luc_fsuid;
192                         cred->fsgid = saved->luc.luc_fsgid;
193                         cred->cap_effective = saved->luc.luc_cap;
194                         commit_creds(cred);
195                 }
196
197                 pop_group_info(saved,
198                                uc->luc_ginfo ?:
199                                uc->luc_identity ? uc->luc_identity->mi_ginfo :
200                                                   NULL);
201         }
202 }
203 EXPORT_SYMBOL(pop_ctxt);
204
205 /* utility to make a file */
206 struct dentry *simple_mknod(struct dentry *dir, char *name, int mode, int fix)
207 {
208         struct dentry *dchild;
209         int err = 0;
210         ENTRY;
211
212         // ASSERT_KERNEL_CTXT("kernel doing mknod outside kernel context\n");
213         CDEBUG(D_INODE, "creating file %.*s\n", (int)strlen(name), name);
214
215         dchild = ll_lookup_one_len(name, dir, strlen(name));
216         if (IS_ERR(dchild))
217                 GOTO(out_up, dchild);
218
219         if (dchild->d_inode) {
220                 int old_mode = dchild->d_inode->i_mode;
221                 if (!S_ISREG(old_mode))
222                         GOTO(out_err, err = -EEXIST);
223
224                 /* Fixup file permissions if necessary */
225                 if (fix && (old_mode & S_IALLUGO) != (mode & S_IALLUGO)) {
226                         CWARN("fixing permissions on %s from %o to %o\n",
227                               name, old_mode, mode);
228                         dchild->d_inode->i_mode = (mode & S_IALLUGO) |
229                                                   (old_mode & ~S_IALLUGO);
230                         mark_inode_dirty(dchild->d_inode);
231                 }
232                 GOTO(out_up, dchild);
233         }
234
235         err = ll_vfs_create(dir->d_inode, dchild, (mode & ~S_IFMT) | S_IFREG,
236                             NULL);
237         if (err)
238                 GOTO(out_err, err);
239
240         RETURN(dchild);
241
242 out_err:
243         dput(dchild);
244         dchild = ERR_PTR(err);
245 out_up:
246         return dchild;
247 }
248 EXPORT_SYMBOL(simple_mknod);
249
250 /* utility to make a directory */
251 struct dentry *simple_mkdir(struct dentry *dir, struct vfsmount *mnt, 
252                             const char *name, int mode, int fix)
253 {
254         struct dentry *dchild;
255         int err = 0;
256         ENTRY;
257
258         // ASSERT_KERNEL_CTXT("kernel doing mkdir outside kernel context\n");
259         CDEBUG(D_INODE, "creating directory %.*s\n", (int)strlen(name), name);
260         dchild = ll_lookup_one_len(name, dir, strlen(name));
261         if (IS_ERR(dchild))
262                 GOTO(out_up, dchild);
263
264         if (dchild->d_inode) {
265                 int old_mode = dchild->d_inode->i_mode;
266                 if (!S_ISDIR(old_mode)) {
267                         CERROR("found %s (%lu/%u) is mode %o\n", name,
268                                dchild->d_inode->i_ino,
269                                dchild->d_inode->i_generation, old_mode);
270                         GOTO(out_err, err = -ENOTDIR);
271                 }
272
273                 /* Fixup directory permissions if necessary */
274                 if (fix && (old_mode & S_IALLUGO) != (mode & S_IALLUGO)) {
275                         CDEBUG(D_CONFIG,
276                                "fixing permissions on %s from %o to %o\n",
277                                name, old_mode, mode);
278                         dchild->d_inode->i_mode = (mode & S_IALLUGO) |
279                                                   (old_mode & ~S_IALLUGO);
280                         mark_inode_dirty(dchild->d_inode);
281                 }
282                 GOTO(out_up, dchild);
283         }
284
285         err = ll_vfs_mkdir(dir->d_inode, dchild, mnt, mode);
286         if (err)
287                 GOTO(out_err, err);
288
289         RETURN(dchild);
290
291 out_err:
292         dput(dchild);
293         dchild = ERR_PTR(err);
294 out_up:
295         return dchild;
296 }
297 EXPORT_SYMBOL(simple_mkdir);
298
299 /* utility to rename a file */
300 int lustre_rename(struct dentry *dir, struct vfsmount *mnt,
301                   char *oldname, char *newname)
302 {
303         struct dentry *dchild_old, *dchild_new;
304         int err = 0;
305         ENTRY;
306
307         ASSERT_KERNEL_CTXT("kernel doing rename outside kernel context\n");
308         CDEBUG(D_INODE, "renaming file %.*s to %.*s\n",
309                (int)strlen(oldname), oldname, (int)strlen(newname), newname);
310
311         dchild_old = ll_lookup_one_len(oldname, dir, strlen(oldname));
312         if (IS_ERR(dchild_old))
313                 RETURN(PTR_ERR(dchild_old));
314
315         if (!dchild_old->d_inode)
316                 GOTO(put_old, err = -ENOENT);
317
318         dchild_new = ll_lookup_one_len(newname, dir, strlen(newname));
319         if (IS_ERR(dchild_new))
320                 GOTO(put_old, err = PTR_ERR(dchild_new));
321
322         err = ll_vfs_rename(dir->d_inode, dchild_old, mnt,
323                             dir->d_inode, dchild_new, mnt);
324
325         dput(dchild_new);
326 put_old:
327         dput(dchild_old);
328         RETURN(err);
329 }
330 EXPORT_SYMBOL(lustre_rename);
331
332 /*
333  * Read a file from within kernel context.  Prior to calling this
334  * function we should already have done a push_ctxt().
335  */
336 int lustre_fread(struct file *file, void *buf, int len, loff_t *off)
337 {
338         ASSERT_KERNEL_CTXT("kernel doing read outside kernel context\n");
339         if (!file || !file->f_op || !file->f_op->read || !off)
340                 RETURN(-ENOSYS);
341
342         return file->f_op->read(file, buf, len, off);
343 }
344 EXPORT_SYMBOL(lustre_fread);
345
346 /*
347  * Write a file from within kernel context.  Prior to calling this
348  * function we should already have done a push_ctxt().
349  */
350 int lustre_fwrite(struct file *file, const void *buf, int len, loff_t *off)
351 {
352         ENTRY;
353         ASSERT_KERNEL_CTXT("kernel doing write outside kernel context\n");
354         if (!file)
355                 RETURN(-ENOENT);
356         if (!file->f_op)
357                 RETURN(-ENOSYS);
358         if (!off)
359                 RETURN(-EINVAL);
360
361         if (!file->f_op->write)
362                 RETURN(-EROFS);
363
364         RETURN(file->f_op->write(file, buf, len, off));
365 }
366 EXPORT_SYMBOL(lustre_fwrite);
367
368 /*
369  * Sync a file from within kernel context.  Prior to calling this
370  * function we should already have done a push_ctxt().
371  */
372 int lustre_fsync(struct file *file)
373 {
374         ENTRY;
375         ASSERT_KERNEL_CTXT("kernel doing sync outside kernel context\n");
376         if (!file || !file->f_op || !file->f_op->fsync)
377                 RETURN(-ENOSYS);
378
379         RETURN(cfs_do_fsync(file, 0));
380 }
381 EXPORT_SYMBOL(lustre_fsync);
382
383 /* Note: dput(dchild) will be called if there is an error */
384 struct l_file *l_dentry_open(struct lvfs_run_ctxt *ctxt, struct l_dentry *de,
385                              int flags)
386 {
387         mntget(ctxt->pwdmnt);
388         return ll_dentry_open(de, ctxt->pwdmnt, flags, current_cred());
389 }
390 EXPORT_SYMBOL(l_dentry_open);
391
392 static int l_filldir(void *__buf, const char *name, int namlen, loff_t offset,
393                      u64 ino, unsigned int d_type)
394 {
395         struct l_linux_dirent *dirent;
396         struct l_readdir_callback *buf = (struct l_readdir_callback *)__buf;
397
398         dirent = buf->lrc_dirent;
399         if (dirent)
400                dirent->lld_off = offset;
401
402         OBD_ALLOC(dirent, sizeof(*dirent));
403
404         if (!dirent)
405                 return -ENOMEM;
406
407         cfs_list_add_tail(&dirent->lld_list, buf->lrc_list);
408
409         buf->lrc_dirent = dirent;
410         dirent->lld_ino = ino;
411         LASSERT(sizeof(dirent->lld_name) >= namlen + 1);
412         memcpy(dirent->lld_name, name, namlen);
413
414         return 0;
415 }
416
417 long l_readdir(struct file *file, cfs_list_t *dentry_list)
418 {
419         struct l_linux_dirent *lastdirent;
420         struct l_readdir_callback buf;
421         int error;
422
423         buf.lrc_dirent = NULL;
424         buf.lrc_list = dentry_list;
425
426         error = vfs_readdir(file, l_filldir, &buf);
427         if (error < 0)
428                 return error;
429
430         lastdirent = buf.lrc_dirent;
431         if (lastdirent)
432                 lastdirent->lld_off = file->f_pos;
433
434         return 0;
435 }
436 EXPORT_SYMBOL(l_readdir);
437
438 int l_notify_change(struct vfsmount *mnt, struct dentry *dchild,
439                  struct iattr *newattrs)
440 {
441         int rc;
442
443         LOCK_INODE_MUTEX(dchild->d_inode);
444 #ifdef HAVE_SECURITY_PLUG
445         rc = notify_change(dchild, mnt, newattrs);
446 #else
447         rc = notify_change(dchild, newattrs);
448 #endif
449         UNLOCK_INODE_MUTEX(dchild->d_inode);
450         return rc;
451 }
452 EXPORT_SYMBOL(l_notify_change);
453
454 /* utility to truncate a file */
455 int simple_truncate(struct dentry *dir, struct vfsmount *mnt, 
456                  char *name, loff_t length)
457 {
458         struct dentry *dchild;
459         struct iattr newattrs;
460         int err = 0;
461         ENTRY;
462
463         CDEBUG(D_INODE, "truncating file %.*s to %lld\n", (int)strlen(name),
464                name, (long long)length);
465         dchild = ll_lookup_one_len(name, dir, strlen(name));
466         if (IS_ERR(dchild))
467                 GOTO(out, err = PTR_ERR(dchild));
468
469         if (dchild->d_inode) {
470                 int old_mode = dchild->d_inode->i_mode;
471                 if (S_ISDIR(old_mode)) {
472                         CERROR("found %s (%lu/%u) is mode %o\n", name,
473                                dchild->d_inode->i_ino,
474                                dchild->d_inode->i_generation, old_mode);
475                         GOTO(out_dput, err = -EISDIR);
476                 }
477
478                 newattrs.ia_size = length;
479                 newattrs.ia_valid = ATTR_SIZE;
480                 err = l_notify_change(mnt, dchild, &newattrs);
481         }
482         EXIT;
483 out_dput:
484         dput(dchild);
485 out:
486         return err;
487 }
488 EXPORT_SYMBOL(simple_truncate);
489
490 int __lvfs_set_rdonly(lvfs_sbdev_type dev, lvfs_sbdev_type jdev)
491 {
492 #ifdef HAVE_DEV_SET_RDONLY
493         if (jdev && (jdev != dev)) {
494                 CDEBUG(D_IOCTL | D_HA, "set journal dev %lx rdonly\n",
495                        (long)jdev);
496                 dev_set_rdonly(jdev);
497         }
498         CDEBUG(D_IOCTL | D_HA, "set dev %lx rdonly\n", (long)dev);
499         dev_set_rdonly(dev);
500
501         return 0;
502 #else
503         CERROR("DEV %lx CANNOT BE SET READONLY\n", (long)dev);
504
505         return -EOPNOTSUPP;
506 #endif
507 }
508 EXPORT_SYMBOL(__lvfs_set_rdonly);
509
510 int lvfs_check_rdonly(lvfs_sbdev_type dev)
511 {
512 #ifdef HAVE_DEV_SET_RDONLY
513         return dev_check_rdonly(dev);
514 #else
515         return 0;
516 #endif
517 }
518 EXPORT_SYMBOL(lvfs_check_rdonly);
519
520 int lvfs_check_io_health(struct obd_device *obd, struct file *file)
521 {
522         char *write_page = NULL;
523         loff_t offset = 0;
524         int rc = 0;
525         ENTRY;
526
527         OBD_ALLOC(write_page, CFS_PAGE_SIZE);
528         if (!write_page)
529                 RETURN(-ENOMEM);
530
531         rc = fsfilt_write_record(obd, file, write_page, CFS_PAGE_SIZE, &offset, 1);
532
533         OBD_FREE(write_page, CFS_PAGE_SIZE);
534
535         CDEBUG(D_INFO, "write 1 page synchronously for checking io rc %d\n",rc);
536         RETURN(rc);
537 }
538 EXPORT_SYMBOL(lvfs_check_io_health);
539
540 void obd_update_maxusage()
541 {
542         __u64 max1, max2;
543
544         max1 = obd_pages_sum();
545         max2 = obd_memory_sum();
546
547         cfs_spin_lock(&obd_updatemax_lock);
548         if (max1 > obd_max_pages)
549                 obd_max_pages = max1;
550         if (max2 > obd_max_alloc)
551                 obd_max_alloc = max2;
552         cfs_spin_unlock(&obd_updatemax_lock);
553
554 }
555 EXPORT_SYMBOL(obd_update_maxusage);
556
557 __u64 obd_memory_max(void)
558 {
559         __u64 ret;
560
561         cfs_spin_lock(&obd_updatemax_lock);
562         ret = obd_max_alloc;
563         cfs_spin_unlock(&obd_updatemax_lock);
564
565         return ret;
566 }
567 EXPORT_SYMBOL(obd_memory_max);
568
569 __u64 obd_pages_max(void)
570 {
571         __u64 ret;
572
573         cfs_spin_lock(&obd_updatemax_lock);
574         ret = obd_max_pages;
575         cfs_spin_unlock(&obd_updatemax_lock);
576
577         return ret;
578 }
579 EXPORT_SYMBOL(obd_pages_max);
580
581 #ifdef LPROCFS
582 __s64 lprocfs_read_helper(struct lprocfs_counter *lc,
583                           enum lprocfs_fields_flags field)
584 {
585         __s64 ret = 0;
586         int centry;
587
588         if (!lc)
589                 RETURN(0);
590         do {
591                 centry = cfs_atomic_read(&lc->lc_cntl.la_entry);
592
593                 switch (field) {
594                         case LPROCFS_FIELDS_FLAGS_CONFIG:
595                                 ret = lc->lc_config;
596                                 break;
597                         case LPROCFS_FIELDS_FLAGS_SUM:
598                                 ret = lc->lc_sum + lc->lc_sum_irq;
599                                 break;
600                         case LPROCFS_FIELDS_FLAGS_MIN:
601                                 ret = lc->lc_min;
602                                 break;
603                         case LPROCFS_FIELDS_FLAGS_MAX:
604                                 ret = lc->lc_max;
605                                 break;
606                         case LPROCFS_FIELDS_FLAGS_AVG:
607                                 ret = (lc->lc_max - lc->lc_min)/2;
608                                 break;
609                         case LPROCFS_FIELDS_FLAGS_SUMSQUARE:
610                                 ret = lc->lc_sumsquare;
611                                 break;
612                         case LPROCFS_FIELDS_FLAGS_COUNT:
613                                 ret = lc->lc_count;
614                                 break;
615                         default:
616                                 break;
617                 };
618         } while (centry != cfs_atomic_read(&lc->lc_cntl.la_entry) &&
619                  centry != cfs_atomic_read(&lc->lc_cntl.la_exit));
620
621         RETURN(ret);
622 }
623 EXPORT_SYMBOL(lprocfs_read_helper);
624 #endif /* LPROCFS */
625
626 MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
627 MODULE_DESCRIPTION("Lustre VFS Filesystem Helper v0.1");
628 MODULE_LICENSE("GPL");