Whamcloud - gitweb
LU-2158 lvfs: Remove cruft from lvfs directory
[fs/lustre-release.git] / lustre / lvfs / lvfs_linux.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2011, 2012, Intel Corporation.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/lvfs/lvfs_linux.c
37  *
38  * Author: Andreas Dilger <adilger@clusterfs.com>
39  */
40
41 #define DEBUG_SUBSYSTEM S_FILTER
42
43 #include <linux/version.h>
44 #include <linux/fs.h>
45 #include <asm/unistd.h>
46 #include <linux/slab.h>
47 #include <linux/pagemap.h>
48 #include <linux/quotaops.h>
49 #include <linux/version.h>
50 #include <libcfs/libcfs.h>
51 #include <lustre_fsfilt.h>
52 #include <obd.h>
53 #include <linux/module.h>
54 #include <linux/init.h>
55 #include <linux/lustre_compat25.h>
56 #include <lvfs.h>
57
58 #include <obd.h>
59 #include <lustre_lib.h>
60
61 __u64 obd_max_pages = 0;
62 __u64 obd_max_alloc = 0;
63 struct lprocfs_stats *obd_memory = NULL;
64 EXPORT_SYMBOL(obd_memory);
65 DEFINE_SPINLOCK(obd_updatemax_lock);
66 /* refine later and change to seqlock or simlar from libcfs */
67
68 /* Debugging check only needed during development */
69 #ifdef OBD_CTXT_DEBUG
70 # define ASSERT_CTXT_MAGIC(magic) LASSERT((magic) == OBD_RUN_CTXT_MAGIC)
71 # define ASSERT_NOT_KERNEL_CTXT(msg) LASSERTF(!segment_eq(get_fs(), get_ds()),\
72                                               msg)
73 # define ASSERT_KERNEL_CTXT(msg) LASSERTF(segment_eq(get_fs(), get_ds()), msg)
74 #else
75 # define ASSERT_CTXT_MAGIC(magic) do {} while(0)
76 # define ASSERT_NOT_KERNEL_CTXT(msg) do {} while(0)
77 # define ASSERT_KERNEL_CTXT(msg) do {} while(0)
78 #endif
79
80 static void push_group_info(struct lvfs_run_ctxt *save,
81                             struct group_info *ginfo)
82 {
83         if (!ginfo) {
84                 save->ngroups = current_ngroups;
85                 current_ngroups = 0;
86         } else {
87                 struct cred *cred;
88                 task_lock(current);
89                 save->group_info = current_cred()->group_info;
90                 if ((cred = prepare_creds())) {
91                         cred->group_info = ginfo;
92                         commit_creds(cred);
93                 }
94                 task_unlock(current);
95         }
96 }
97
98 static void pop_group_info(struct lvfs_run_ctxt *save,
99                            struct group_info *ginfo)
100 {
101         if (!ginfo) {
102                 current_ngroups = save->ngroups;
103         } else {
104                 struct cred *cred;
105                 task_lock(current);
106                 if ((cred = prepare_creds())) {
107                         cred->group_info = save->group_info;
108                         commit_creds(cred);
109                 }
110                 task_unlock(current);
111         }
112 }
113
114 /* push / pop to root of obd store */
115 void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx,
116                struct lvfs_ucred *uc)
117 {
118         /* if there is underlaying dt_device then push_ctxt is not needed */
119         if (new_ctx->dt != NULL)
120                 return;
121
122         //ASSERT_NOT_KERNEL_CTXT("already in kernel context!\n");
123         ASSERT_CTXT_MAGIC(new_ctx->magic);
124         OBD_SET_CTXT_MAGIC(save);
125
126         save->fs = get_fs();
127         LASSERT(d_refcount(cfs_fs_pwd(current->fs)));
128         LASSERT(d_refcount(new_ctx->pwd));
129         save->pwd = dget(cfs_fs_pwd(current->fs));
130         save->pwdmnt = mntget(cfs_fs_mnt(current->fs));
131         save->luc.luc_umask = cfs_curproc_umask();
132         save->ngroups = current_cred()->group_info->ngroups;
133
134         LASSERT(save->pwd);
135         LASSERT(save->pwdmnt);
136         LASSERT(new_ctx->pwd);
137         LASSERT(new_ctx->pwdmnt);
138
139         if (uc) {
140                 struct cred *cred;
141                 save->luc.luc_uid = current_uid();
142                 save->luc.luc_gid = current_gid();
143                 save->luc.luc_fsuid = current_fsuid();
144                 save->luc.luc_fsgid = current_fsgid();
145                 save->luc.luc_cap = current_cap();
146
147                 if ((cred = prepare_creds())) {
148                         cred->uid = uc->luc_uid;
149                         cred->gid = uc->luc_gid;
150                         cred->fsuid = uc->luc_fsuid;
151                         cred->fsgid = uc->luc_fsgid;
152                         cred->cap_effective = uc->luc_cap;
153                         commit_creds(cred);
154                 }
155
156                 push_group_info(save,
157                                 uc->luc_ginfo ?:
158                                 uc->luc_identity ? uc->luc_identity->mi_ginfo :
159                                                    NULL);
160         }
161         current->fs->umask = 0; /* umask already applied on client */
162         set_fs(new_ctx->fs);
163         ll_set_fs_pwd(current->fs, new_ctx->pwdmnt, new_ctx->pwd);
164 }
165 EXPORT_SYMBOL(push_ctxt);
166
167 void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx,
168               struct lvfs_ucred *uc)
169 {
170         /* if there is underlaying dt_device then pop_ctxt is not needed */
171         if (new_ctx->dt != NULL)
172                 return;
173
174         ASSERT_CTXT_MAGIC(saved->magic);
175         ASSERT_KERNEL_CTXT("popping non-kernel context!\n");
176
177         LASSERTF(cfs_fs_pwd(current->fs) == new_ctx->pwd, "%p != %p\n",
178                  cfs_fs_pwd(current->fs), new_ctx->pwd);
179         LASSERTF(cfs_fs_mnt(current->fs) == new_ctx->pwdmnt, "%p != %p\n",
180                  cfs_fs_mnt(current->fs), new_ctx->pwdmnt);
181
182         set_fs(saved->fs);
183         ll_set_fs_pwd(current->fs, saved->pwdmnt, saved->pwd);
184
185         dput(saved->pwd);
186         mntput(saved->pwdmnt);
187         current->fs->umask = saved->luc.luc_umask;
188         if (uc) {
189                 struct cred *cred;
190                 if ((cred = prepare_creds())) {
191                         cred->uid = saved->luc.luc_uid;
192                         cred->gid = saved->luc.luc_gid;
193                         cred->fsuid = saved->luc.luc_fsuid;
194                         cred->fsgid = saved->luc.luc_fsgid;
195                         cred->cap_effective = saved->luc.luc_cap;
196                         commit_creds(cred);
197                 }
198
199                 pop_group_info(saved,
200                                uc->luc_ginfo ?:
201                                uc->luc_identity ? uc->luc_identity->mi_ginfo :
202                                                   NULL);
203         }
204 }
205 EXPORT_SYMBOL(pop_ctxt);
206
207 /* utility to make a file */
208 struct dentry *simple_mknod(struct dentry *dir, char *name, int mode, int fix)
209 {
210         struct dentry *dchild;
211         int err = 0;
212         ENTRY;
213
214         // ASSERT_KERNEL_CTXT("kernel doing mknod outside kernel context\n");
215         CDEBUG(D_INODE, "creating file %.*s\n", (int)strlen(name), name);
216
217         dchild = ll_lookup_one_len(name, dir, strlen(name));
218         if (IS_ERR(dchild))
219                 GOTO(out_up, dchild);
220
221         if (dchild->d_inode) {
222                 int old_mode = dchild->d_inode->i_mode;
223                 if (!S_ISREG(old_mode))
224                         GOTO(out_err, err = -EEXIST);
225
226                 /* Fixup file permissions if necessary */
227                 if (fix && (old_mode & S_IALLUGO) != (mode & S_IALLUGO)) {
228                         CWARN("fixing permissions on %s from %o to %o\n",
229                               name, old_mode, mode);
230                         dchild->d_inode->i_mode = (mode & S_IALLUGO) |
231                                                   (old_mode & ~S_IALLUGO);
232                         mark_inode_dirty(dchild->d_inode);
233                 }
234                 GOTO(out_up, dchild);
235         }
236
237         err = vfs_create(dir->d_inode, dchild, (mode & ~S_IFMT) | S_IFREG,
238                             NULL);
239         if (err)
240                 GOTO(out_err, err);
241
242         RETURN(dchild);
243
244 out_err:
245         dput(dchild);
246         dchild = ERR_PTR(err);
247 out_up:
248         return dchild;
249 }
250 EXPORT_SYMBOL(simple_mknod);
251
252 /* utility to make a directory */
253 struct dentry *simple_mkdir(struct dentry *dir, struct vfsmount *mnt, 
254                             const char *name, int mode, int fix)
255 {
256         struct dentry *dchild;
257         int err = 0;
258         ENTRY;
259
260         // ASSERT_KERNEL_CTXT("kernel doing mkdir outside kernel context\n");
261         CDEBUG(D_INODE, "creating directory %.*s\n", (int)strlen(name), name);
262         dchild = ll_lookup_one_len(name, dir, strlen(name));
263         if (IS_ERR(dchild))
264                 GOTO(out_up, dchild);
265
266         if (dchild->d_inode) {
267                 int old_mode = dchild->d_inode->i_mode;
268                 if (!S_ISDIR(old_mode)) {
269                         CERROR("found %s (%lu/%u) is mode %o\n", name,
270                                dchild->d_inode->i_ino,
271                                dchild->d_inode->i_generation, old_mode);
272                         GOTO(out_err, err = -ENOTDIR);
273                 }
274
275                 /* Fixup directory permissions if necessary */
276                 if (fix && (old_mode & S_IALLUGO) != (mode & S_IALLUGO)) {
277                         CDEBUG(D_CONFIG,
278                                "fixing permissions on %s from %o to %o\n",
279                                name, old_mode, mode);
280                         dchild->d_inode->i_mode = (mode & S_IALLUGO) |
281                                                   (old_mode & ~S_IALLUGO);
282                         mark_inode_dirty(dchild->d_inode);
283                 }
284                 GOTO(out_up, dchild);
285         }
286
287         err = ll_vfs_mkdir(dir->d_inode, dchild, mnt, mode);
288         if (err)
289                 GOTO(out_err, err);
290
291         RETURN(dchild);
292
293 out_err:
294         dput(dchild);
295         dchild = ERR_PTR(err);
296 out_up:
297         return dchild;
298 }
299 EXPORT_SYMBOL(simple_mkdir);
300
301 /* utility to rename a file */
302 int lustre_rename(struct dentry *dir, struct vfsmount *mnt,
303                   char *oldname, char *newname)
304 {
305         struct dentry *dchild_old, *dchild_new;
306         int err = 0;
307         ENTRY;
308
309         ASSERT_KERNEL_CTXT("kernel doing rename outside kernel context\n");
310         CDEBUG(D_INODE, "renaming file %.*s to %.*s\n",
311                (int)strlen(oldname), oldname, (int)strlen(newname), newname);
312
313         dchild_old = ll_lookup_one_len(oldname, dir, strlen(oldname));
314         if (IS_ERR(dchild_old))
315                 RETURN(PTR_ERR(dchild_old));
316
317         if (!dchild_old->d_inode)
318                 GOTO(put_old, err = -ENOENT);
319
320         dchild_new = ll_lookup_one_len(newname, dir, strlen(newname));
321         if (IS_ERR(dchild_new))
322                 GOTO(put_old, err = PTR_ERR(dchild_new));
323
324         err = ll_vfs_rename(dir->d_inode, dchild_old, mnt,
325                             dir->d_inode, dchild_new, mnt);
326
327         dput(dchild_new);
328 put_old:
329         dput(dchild_old);
330         RETURN(err);
331 }
332 EXPORT_SYMBOL(lustre_rename);
333
334 /*
335  * Read a file from within kernel context.  Prior to calling this
336  * function we should already have done a push_ctxt().
337  */
338 int lustre_fread(struct file *file, void *buf, int len, loff_t *off)
339 {
340         ASSERT_KERNEL_CTXT("kernel doing read outside kernel context\n");
341         if (!file || !file->f_op || !file->f_op->read || !off)
342                 RETURN(-ENOSYS);
343
344         return file->f_op->read(file, buf, len, off);
345 }
346 EXPORT_SYMBOL(lustre_fread);
347
348 /*
349  * Write a file from within kernel context.  Prior to calling this
350  * function we should already have done a push_ctxt().
351  */
352 int lustre_fwrite(struct file *file, const void *buf, int len, loff_t *off)
353 {
354         ENTRY;
355         ASSERT_KERNEL_CTXT("kernel doing write outside kernel context\n");
356         if (!file)
357                 RETURN(-ENOENT);
358         if (!file->f_op)
359                 RETURN(-ENOSYS);
360         if (!off)
361                 RETURN(-EINVAL);
362
363         if (!file->f_op->write)
364                 RETURN(-EROFS);
365
366         RETURN(file->f_op->write(file, buf, len, off));
367 }
368 EXPORT_SYMBOL(lustre_fwrite);
369
370 /*
371  * Sync a file from within kernel context.  Prior to calling this
372  * function we should already have done a push_ctxt().
373  */
374 int lustre_fsync(struct file *file)
375 {
376         ENTRY;
377         ASSERT_KERNEL_CTXT("kernel doing sync outside kernel context\n");
378         if (!file || !file->f_op || !file->f_op->fsync)
379                 RETURN(-ENOSYS);
380
381         RETURN(cfs_do_fsync(file, 0));
382 }
383 EXPORT_SYMBOL(lustre_fsync);
384
385 /* Note: dput(dchild) will be called if there is an error */
386 struct l_file *l_dentry_open(struct lvfs_run_ctxt *ctxt, struct l_dentry *de,
387                              int flags)
388 {
389         mntget(ctxt->pwdmnt);
390         return ll_dentry_open(de, ctxt->pwdmnt, flags, current_cred());
391 }
392 EXPORT_SYMBOL(l_dentry_open);
393
394 static int l_filldir(void *__buf, const char *name, int namlen, loff_t offset,
395                      u64 ino, unsigned int d_type)
396 {
397         struct l_linux_dirent *dirent;
398         struct l_readdir_callback *buf = (struct l_readdir_callback *)__buf;
399
400         dirent = buf->lrc_dirent;
401         if (dirent)
402                dirent->lld_off = offset;
403
404         OBD_ALLOC(dirent, sizeof(*dirent));
405
406         if (!dirent)
407                 return -ENOMEM;
408
409         cfs_list_add_tail(&dirent->lld_list, buf->lrc_list);
410
411         buf->lrc_dirent = dirent;
412         dirent->lld_ino = ino;
413         LASSERT(sizeof(dirent->lld_name) >= namlen + 1);
414         memcpy(dirent->lld_name, name, namlen);
415
416         return 0;
417 }
418
419 long l_readdir(struct file *file, cfs_list_t *dentry_list)
420 {
421         struct l_linux_dirent *lastdirent;
422         struct l_readdir_callback buf;
423         int error;
424
425         buf.lrc_dirent = NULL;
426         buf.lrc_list = dentry_list;
427
428         error = vfs_readdir(file, l_filldir, &buf);
429         if (error < 0)
430                 return error;
431
432         lastdirent = buf.lrc_dirent;
433         if (lastdirent)
434                 lastdirent->lld_off = file->f_pos;
435
436         return 0;
437 }
438 EXPORT_SYMBOL(l_readdir);
439
440 int l_notify_change(struct vfsmount *mnt, struct dentry *dchild,
441                     struct iattr *newattrs)
442 {
443         int rc;
444
445         mutex_lock(&dchild->d_inode->i_mutex);
446 #ifdef HAVE_SECURITY_PLUG
447         rc = notify_change(dchild, mnt, newattrs);
448 #else
449         rc = notify_change(dchild, newattrs);
450 #endif
451         mutex_unlock(&dchild->d_inode->i_mutex);
452         return rc;
453 }
454 EXPORT_SYMBOL(l_notify_change);
455
456 /* utility to truncate a file */
457 int simple_truncate(struct dentry *dir, struct vfsmount *mnt, 
458                  char *name, loff_t length)
459 {
460         struct dentry *dchild;
461         struct iattr newattrs;
462         int err = 0;
463         ENTRY;
464
465         CDEBUG(D_INODE, "truncating file %.*s to %lld\n", (int)strlen(name),
466                name, (long long)length);
467         dchild = ll_lookup_one_len(name, dir, strlen(name));
468         if (IS_ERR(dchild))
469                 GOTO(out, err = PTR_ERR(dchild));
470
471         if (dchild->d_inode) {
472                 int old_mode = dchild->d_inode->i_mode;
473                 if (S_ISDIR(old_mode)) {
474                         CERROR("found %s (%lu/%u) is mode %o\n", name,
475                                dchild->d_inode->i_ino,
476                                dchild->d_inode->i_generation, old_mode);
477                         GOTO(out_dput, err = -EISDIR);
478                 }
479
480                 newattrs.ia_size = length;
481                 newattrs.ia_valid = ATTR_SIZE;
482                 err = l_notify_change(mnt, dchild, &newattrs);
483         }
484         EXIT;
485 out_dput:
486         dput(dchild);
487 out:
488         return err;
489 }
490 EXPORT_SYMBOL(simple_truncate);
491
492 int __lvfs_set_rdonly(lvfs_sbdev_type dev, lvfs_sbdev_type jdev)
493 {
494 #ifdef HAVE_DEV_SET_RDONLY
495         if (jdev && (jdev != dev)) {
496                 CDEBUG(D_IOCTL | D_HA, "set journal dev %lx rdonly\n",
497                        (long)jdev);
498                 dev_set_rdonly(jdev);
499         }
500         CDEBUG(D_IOCTL | D_HA, "set dev %lx rdonly\n", (long)dev);
501         dev_set_rdonly(dev);
502
503         return 0;
504 #else
505         CERROR("DEV %lx CANNOT BE SET READONLY\n", (long)dev);
506
507         return -EOPNOTSUPP;
508 #endif
509 }
510 EXPORT_SYMBOL(__lvfs_set_rdonly);
511
512 int lvfs_check_rdonly(lvfs_sbdev_type dev)
513 {
514 #ifdef HAVE_DEV_SET_RDONLY
515         return dev_check_rdonly(dev);
516 #else
517         return 0;
518 #endif
519 }
520 EXPORT_SYMBOL(lvfs_check_rdonly);
521
522 int lvfs_check_io_health(struct obd_device *obd, struct file *file)
523 {
524         char *write_page = NULL;
525         loff_t offset = 0;
526         int rc = 0;
527         ENTRY;
528
529         OBD_ALLOC(write_page, CFS_PAGE_SIZE);
530         if (!write_page)
531                 RETURN(-ENOMEM);
532
533         rc = fsfilt_write_record(obd, file, write_page, CFS_PAGE_SIZE, &offset, 1);
534
535         OBD_FREE(write_page, CFS_PAGE_SIZE);
536
537         CDEBUG(D_INFO, "write 1 page synchronously for checking io rc %d\n",rc);
538         RETURN(rc);
539 }
540 EXPORT_SYMBOL(lvfs_check_io_health);
541
542 void obd_update_maxusage()
543 {
544         __u64 max1, max2;
545
546         max1 = obd_pages_sum();
547         max2 = obd_memory_sum();
548
549         spin_lock(&obd_updatemax_lock);
550         if (max1 > obd_max_pages)
551                 obd_max_pages = max1;
552         if (max2 > obd_max_alloc)
553                 obd_max_alloc = max2;
554         spin_unlock(&obd_updatemax_lock);
555
556 }
557 EXPORT_SYMBOL(obd_update_maxusage);
558
559 __u64 obd_memory_max(void)
560 {
561         __u64 ret;
562
563         spin_lock(&obd_updatemax_lock);
564         ret = obd_max_alloc;
565         spin_unlock(&obd_updatemax_lock);
566
567         return ret;
568 }
569 EXPORT_SYMBOL(obd_memory_max);
570
571 __u64 obd_pages_max(void)
572 {
573         __u64 ret;
574
575         spin_lock(&obd_updatemax_lock);
576         ret = obd_max_pages;
577         spin_unlock(&obd_updatemax_lock);
578
579         return ret;
580 }
581 EXPORT_SYMBOL(obd_pages_max);
582
583 #ifdef LPROCFS
584 __s64 lprocfs_read_helper(struct lprocfs_counter *lc,
585                           enum lprocfs_fields_flags field)
586 {
587         __s64 ret = 0;
588
589         if (lc == NULL)
590                 RETURN(0);
591
592         switch (field) {
593                 case LPROCFS_FIELDS_FLAGS_CONFIG:
594                         ret = lc->lc_config;
595                         break;
596                 case LPROCFS_FIELDS_FLAGS_SUM:
597                         ret = lc->lc_sum + lc->lc_sum_irq;
598                         break;
599                 case LPROCFS_FIELDS_FLAGS_MIN:
600                         ret = lc->lc_min;
601                         break;
602                 case LPROCFS_FIELDS_FLAGS_MAX:
603                         ret = lc->lc_max;
604                         break;
605                 case LPROCFS_FIELDS_FLAGS_AVG:
606                         ret = (lc->lc_max - lc->lc_min) / 2;
607                         break;
608                 case LPROCFS_FIELDS_FLAGS_SUMSQUARE:
609                         ret = lc->lc_sumsquare;
610                         break;
611                 case LPROCFS_FIELDS_FLAGS_COUNT:
612                         ret = lc->lc_count;
613                         break;
614                 default:
615                         break;
616         };
617
618         RETURN(ret);
619 }
620 EXPORT_SYMBOL(lprocfs_read_helper);
621 #endif /* LPROCFS */
622
623 MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
624 MODULE_DESCRIPTION("Lustre VFS Filesystem Helper v0.1");
625 MODULE_LICENSE("GPL");