1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2004, 2005 Cluster File Systems, Inc.
6 * Author: Phil Schwan <phil@clusterfs.com>
7 * Author: Oleg Drokin <green@clusterfs.com>
8 * Author: Yury Umanets <yury@clusterfs.com>
9 * Review: Nikita Danilov <nikita@clusterfs.com>
11 * This file is part of Lustre, http://www.lustre.org.
13 * Lustre is free software; you can redistribute it and/or
14 * modify it under the terms of version 2 of the GNU General Public
15 * License as published by the Free Software Foundation.
17 * Lustre is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
22 * You should have received a copy of the GNU General Public License
23 * along with Lustre; if not, write to the Free Software
24 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27 #define DEBUG_SUBSYSTEM S_LLITE
30 #include <linux/version.h>
31 #include <asm/uaccess.h>
32 #include <linux/file.h>
33 #include <linux/kmod.h>
35 #include <linux/lustre_lite.h>
36 #include "llite_internal.h"
38 static struct list_head gns_sbi_list = LIST_HEAD_INIT(gns_sbi_list);
39 static spinlock_t gns_lock = SPIN_LOCK_UNLOCKED;
40 static struct ptlrpc_thread gns_thread;
41 static struct ll_gns_ctl gns_ctl;
44 * waits until passed dentry gets mountpoint or timeout and attempts are
45 * exhausted. Returns 1 if dentry became mountpoint and 0 otherwise.
48 ll_gns_wait_for_mount(struct dentry *dentry,
49 int timeout, int tries)
51 struct l_wait_info lwi;
52 struct ll_sb_info *sbi;
56 LASSERT(dentry != NULL);
57 LASSERT(!IS_ERR(dentry));
58 sbi = ll_s2sbi(dentry->d_sb);
60 lwi = LWI_TIMEOUT(timeout * HZ, NULL, NULL);
61 for (; !d_mountpoint(dentry) && tries > 0; tries--) {
62 l_wait_event(sbi->ll_gns_waitq, d_mountpoint(dentry), &lwi);
63 if (signal_pending(current))
64 GOTO(out, rc = -EINTR);
67 if (!d_mountpoint(dentry))
72 spin_lock(&sbi->ll_gns_lock);
73 sbi->ll_gns_state = LL_GNS_FINISHED;
74 spin_unlock(&sbi->ll_gns_lock);
79 * tries to mount the mount object under passed @dentry. In the case of success
80 * @dentry will become mount point and 0 will be returned. Error code will be
84 ll_gns_mount_object(struct dentry *dentry, struct vfsmount *mnt)
86 char *path, *pathpage, *datapage = NULL, *argv[4];
87 struct file *mntinfo_fd = NULL;
88 int cleanup_phase = 0, rc = 0;
89 struct ll_sb_info *sbi;
90 struct dentry *dchild = NULL;
93 LASSERT(dentry->d_inode != NULL);
95 if (!S_ISDIR(dentry->d_inode->i_mode))
98 sbi = ll_i2sbi(dentry->d_inode);
101 CERROR("suid directory found, but no "
102 "vfsmount available.\n");
106 if (atomic_read(&sbi->ll_gns_enabled) == 0)
109 spin_lock(&sbi->ll_gns_lock);
112 * another thead is in progress or just finished mounting the
113 * dentry. Handling that.
115 if (sbi->ll_gns_state != LL_GNS_IDLE) {
117 * another thread is trying to mount GNS dentry. We'd like to
120 spin_unlock(&sbi->ll_gns_lock);
124 * check if dentry is mount point already, if so, do not restart
127 if (d_mountpoint(dentry))
130 spin_lock(&sbi->ll_gns_lock);
131 if (sbi->ll_gns_pending_dentry &&
132 is_subdir(sbi->ll_gns_pending_dentry, dentry)) {
133 spin_unlock(&sbi->ll_gns_lock);
136 spin_unlock(&sbi->ll_gns_lock);
139 * waiting for GNS complete and check dentry again, it may be
142 wait_for_completion(&sbi->ll_gns_mount_finished);
143 if (d_mountpoint(dentry))
147 * check for he case when there are few waiters and all they are
148 * awakened, but only one will find GNS state LL_GNS_IDLE, and
149 * the rest will face with LL_GNS_MOUNTING. --umka
151 spin_lock(&sbi->ll_gns_lock);
152 if (sbi->ll_gns_state != LL_GNS_IDLE) {
153 spin_unlock(&sbi->ll_gns_lock);
156 spin_unlock(&sbi->ll_gns_lock);
158 LASSERT(sbi->ll_gns_state == LL_GNS_IDLE);
159 CDEBUG(D_INODE, "mounting dentry %p\n", dentry);
161 /* mounting started */
162 sbi->ll_gns_state = LL_GNS_MOUNTING;
163 sbi->ll_gns_pending_dentry = dentry;
164 spin_unlock(&sbi->ll_gns_lock);
166 /* we need to build an absolute pathname to pass to mount */
167 pathpage = (char *)__get_free_page(GFP_KERNEL);
169 GOTO(cleanup, rc = -ENOMEM);
172 /* getting @dentry path stored in @pathpage. */
173 path = d_path(dentry, mnt, pathpage, PAGE_SIZE);
175 CERROR("can't build mount object path, err %d\n",
177 GOTO(cleanup, rc = PTR_ERR(path));
180 /* synchronizing with possible /proc/fs/...write */
181 down(&sbi->ll_gns_sem);
184 * mount object name is taken from sbi, where it is set in mount time or
185 * via /proc/fs... tunable. It may be ".mntinfo" or so.
189 * recursive lookup with trying to mount SUID bit marked directories on
190 * the way is not possible here, as lookup_one_len() does not pass @nd
191 * to ->lookup() and this is checked in ll_lookup_it().
193 dchild = ll_lookup_one_len(sbi->ll_gns_oname, dentry,
194 strlen(sbi->ll_gns_oname));
195 up(&sbi->ll_gns_sem);
197 if (IS_ERR(dchild)) {
198 rc = PTR_ERR(dchild);
199 CERROR("can't find mount object %*s/%*s err = %d.\n",
200 (int)dentry->d_name.len, dentry->d_name.name,
201 strlen(sbi->ll_gns_oname), sbi->ll_gns_oname,
206 /* mount object is not found */
207 if (!dchild->d_inode) {
209 GOTO(cleanup, rc = -ENOENT);
212 /* check if found child is regular file */
213 if (!S_ISREG(dchild->d_inode->i_mode)) {
215 GOTO(cleanup, rc = -EBADF);
218 /* ok, mount object if found, opening it. */
219 mntinfo_fd = dentry_open(dchild, mntget(mnt), 0);
220 if (IS_ERR(mntinfo_fd)) {
221 CERROR("can't open mount object %*s/%*s err = %d.\n",
222 (int)dentry->d_name.len, dentry->d_name.name,
223 strlen(sbi->ll_gns_oname), sbi->ll_gns_oname,
224 (int)PTR_ERR(mntinfo_fd));
227 GOTO(cleanup, rc = PTR_ERR(mntinfo_fd));
231 /* make sure that inode size is up-to-date */
232 rc = ll_inode_revalidate_it(mntinfo_fd->f_dentry);
234 CERROR("can't revalidate mount object %*s/%*s, err %d\n",
235 (int)dentry->d_name.len, dentry->d_name.name,
236 strlen(sbi->ll_gns_oname), sbi->ll_gns_oname,
241 if (mntinfo_fd->f_dentry->d_inode->i_size > PAGE_SIZE - 1) {
242 CERROR("mount object %*s/%*s is too big (%Ld)\n",
243 (int)dentry->d_name.len, dentry->d_name.name,
244 strlen(sbi->ll_gns_oname), sbi->ll_gns_oname,
245 mntinfo_fd->f_dentry->d_inode->i_size);
246 GOTO(cleanup, rc = -EFBIG);
249 datapage = (char *)__get_free_page(GFP_KERNEL);
251 GOTO(cleanup, rc = -ENOMEM);
255 /* read data from mount object. */
256 rc = kernel_read(mntinfo_fd, 0, datapage, PAGE_SIZE - 1);
258 CERROR("can't read mount object %*s/%*s data, err %d\n",
259 (int)dentry->d_name.len, dentry->d_name.name,
260 strlen(sbi->ll_gns_oname), sbi->ll_gns_oname,
265 /* no data in mount object? */
267 CERROR("mount object %*s/%*s is empty?\n",
268 (int)dentry->d_name.len, dentry->d_name.name,
269 strlen(sbi->ll_gns_oname), sbi->ll_gns_oname);
278 /* synchronizing with possible /proc/fs/...write */
279 down(&sbi->ll_gns_sem);
282 * upcall is initialized in mount time or via /proc/fs/... tuneable and
283 * may be /usr/lib/lustre/gns-upcall.sh
285 argv[0] = sbi->ll_gns_upcall;
290 up(&sbi->ll_gns_sem);
292 /* do not wait for helper complete here. */
293 rc = call_usermodehelper(argv[0], argv, NULL, 1);
295 CWARN("failed to call GNS upcall %s, err = %d, "
296 "checking for mount anyway\n", sbi->ll_gns_upcall, rc);
300 * waiting for dentry become mount point GNS_WAIT_ATTEMPTS times by 1
303 rc = ll_gns_wait_for_mount(dentry, 1, GNS_WAIT_ATTEMPTS);
304 LASSERT(sbi->ll_gns_state == LL_GNS_FINISHED);
306 /* checking for mount point anyway to not loss mounts */
307 if (d_mountpoint(dentry)) {
308 struct dentry *rdentry;
309 struct vfsmount *rmnt;
312 rdentry = dget(dentry);
314 if (follow_down(&rmnt, &rdentry)) {
316 * registering new mount in GNS mounts list and thus
317 * make it accessible from GNS control thread.
319 spin_lock(&dcache_lock);
320 LASSERT(list_empty(&rmnt->mnt_lustre_list));
321 list_add_tail(&rmnt->mnt_lustre_list,
323 spin_unlock(&dcache_lock);
324 rmnt->mnt_last_used = jiffies;
334 CERROR("usermode upcall %s failed to mount %s, err %d\n",
335 sbi->ll_gns_upcall, path, rc);
340 switch (cleanup_phase) {
342 free_page((unsigned long)datapage);
344 if (mntinfo_fd != NULL) {
349 free_page((unsigned long)pathpage);
351 spin_lock(&sbi->ll_gns_lock);
352 sbi->ll_gns_state = LL_GNS_IDLE;
353 sbi->ll_gns_pending_dentry = NULL;
354 spin_unlock(&sbi->ll_gns_lock);
356 /* waking up all waiters after GNS state is LL_GNS_IDLE */
357 complete_all(&sbi->ll_gns_mount_finished);
358 init_completion(&sbi->ll_gns_mount_finished);
363 /* tries to umount passed @mnt. */
364 int ll_gns_umount_object(struct vfsmount *mnt)
369 CDEBUG(D_INODE, "unmounting mnt %p\n", mnt);
370 rc = do_umount(mnt, 0);
372 CDEBUG(D_INODE, "can't umount 0x%p, err = %d\n",
379 int ll_gns_check_mounts(struct ll_sb_info *sbi, int flags)
381 struct list_head check_list = LIST_HEAD_INIT(check_list);
382 struct vfsmount *mnt;
386 spin_lock(&dcache_lock);
387 list_splice_init(&sbi->ll_mnt_list, &check_list);
390 * walk the list in reverse order, and put them on the front of the sbi
391 * list each iteration; this avoids list-ordering problems if we race
392 * with another gns-mounting thread.
394 while (!list_empty(&check_list)) {
395 mnt = list_entry(check_list.prev,
401 list_del_init(&mnt->mnt_lustre_list);
403 list_add(&mnt->mnt_lustre_list,
406 /* check for timeout if needed */
407 pass = jiffies - mnt->mnt_last_used;
409 if (flags == LL_GNS_CHECK &&
410 pass < sbi->ll_gns_timeout * HZ)
415 spin_unlock(&dcache_lock);
418 ll_gns_umount_object(mnt);
421 spin_lock(&dcache_lock);
423 spin_unlock(&dcache_lock);
428 * GNS timer callback function. It restarts gns timer and wakes up GNS control
429 * thread to process mounts list.
431 void ll_gns_timer_callback(unsigned long data)
433 struct ll_sb_info *sbi = (void *)data;
436 spin_lock(&gns_lock);
437 if (list_empty(&sbi->ll_gns_sbi_head))
438 list_add(&sbi->ll_gns_sbi_head, &gns_sbi_list);
439 spin_unlock(&gns_lock);
441 wake_up(&gns_thread.t_ctl_waitq);
442 mod_timer(&sbi->ll_gns_timer,
443 jiffies + sbi->ll_gns_tick * HZ);
446 /* this function checks if something new happened to exist in gns list. */
447 static int inline ll_gns_check_event(void)
451 spin_lock(&gns_lock);
452 rc = !list_empty(&gns_sbi_list);
453 spin_unlock(&gns_lock);
458 /* should we stop GNS control thread? */
459 static int inline ll_gns_check_stop(void)
462 return (gns_thread.t_flags & SVC_STOPPING) ? 1 : 0;
465 /* GNS control thread function. */
466 static int ll_gns_thread(void *arg)
468 struct ll_gns_ctl *ctl = arg;
473 char name[sizeof(current->comm)];
474 snprintf(name, sizeof(name) - 1, "ll_gns");
475 kportal_daemonize(name);
478 SIGNAL_MASK_LOCK(current, flags);
479 sigfillset(¤t->blocked);
481 SIGNAL_MASK_UNLOCK(current, flags);
484 * letting starting function know, that we are ready and control may be
487 gns_thread.t_flags = SVC_RUNNING;
488 complete(&ctl->gc_starting);
490 while (!ll_gns_check_stop()) {
491 struct l_wait_info lwi = { 0 };
493 l_wait_event(gns_thread.t_ctl_waitq,
494 (ll_gns_check_event() ||
495 ll_gns_check_stop()), &lwi);
497 spin_lock(&gns_lock);
498 while (!list_empty(&gns_sbi_list)) {
499 struct ll_sb_info *sbi;
501 sbi = list_entry(gns_sbi_list.prev,
505 list_del_init(&sbi->ll_gns_sbi_head);
506 spin_unlock(&gns_lock);
507 ll_gns_check_mounts(sbi, LL_GNS_CHECK);
508 spin_lock(&gns_lock);
510 spin_unlock(&gns_lock);
514 gns_thread.t_flags = SVC_STOPPED;
516 /* this is SMP-safe way to finish thread. */
517 complete_and_exit(&ctl->gc_finishing, 0);
520 void ll_gns_add_timer(struct ll_sb_info *sbi)
522 mod_timer(&sbi->ll_gns_timer,
523 jiffies + sbi->ll_gns_tick * HZ);
526 void ll_gns_del_timer(struct ll_sb_info *sbi)
528 del_timer(&sbi->ll_gns_timer);
532 * starts GNS control thread and waits for a signal it is up and work may be
535 int ll_gns_thread_start(void)
540 LASSERT(gns_thread.t_flags == 0);
541 init_completion(&gns_ctl.gc_starting);
542 init_completion(&gns_ctl.gc_finishing);
543 init_waitqueue_head(&gns_thread.t_ctl_waitq);
545 rc = kernel_thread(ll_gns_thread, &gns_ctl,
546 (CLONE_VM | CLONE_FILES));
548 CERROR("cannot start GNS control thread, "
552 wait_for_completion(&gns_ctl.gc_starting);
553 LASSERT(gns_thread.t_flags == SVC_RUNNING);
557 /* stops GNS control thread and waits its actual stop. */
558 void ll_gns_thread_stop(void)
561 gns_thread.t_flags = SVC_STOPPING;
562 wake_up(&gns_thread.t_ctl_waitq);
563 wait_for_completion(&gns_ctl.gc_finishing);
564 LASSERT(gns_thread.t_flags == SVC_STOPPED);
565 gns_thread.t_flags = 0;