1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2004 Cluster File Systems, Inc.
5 * Author: Phil Schwan <phil@clusterfs.com>
7 * This file is part of Lustre, http://www.lustre.org.
9 * Lustre is free software; you can redistribute it and/or
10 * modify it under the terms of version 2 of the GNU General Public
11 * License as published by the Free Software Foundation.
13 * Lustre is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with Lustre; if not, write to the Free Software
20 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 #define DEBUG_SUBSYSTEM S_LLITE
26 #include <linux/version.h>
27 #include <asm/uaccess.h>
28 #include <linux/file.h>
29 #include <linux/kmod.h>
31 #include <linux/lustre_lite.h>
32 #include "llite_internal.h"
34 static struct list_head gns_sbi_list = LIST_HEAD_INIT(gns_sbi_list);
35 static spinlock_t gns_lock = SPIN_LOCK_UNLOCKED;
36 static struct ptlrpc_thread gns_thread;
37 static struct ll_gns_ctl gns_ctl;
40 * waits until passed dentry gets mountpoint or timeout and attempts are
41 * exhausted. Returns 1 if dentry became mountpoint and 0 otherwise.
44 ll_gns_wait_for_mount(struct dentry *dentry,
45 int timeout, int tries)
47 struct l_wait_info lwi;
48 struct ll_sb_info *sbi;
52 LASSERT(dentry != NULL);
53 LASSERT(!IS_ERR(dentry));
54 sbi = ll_s2sbi(dentry->d_sb);
56 for (; !d_mountpoint(dentry) && tries > 0; tries--) {
57 lwi = LWI_TIMEOUT(timeout * HZ, NULL, NULL);
58 l_wait_event(sbi->ll_gns_waitq, d_mountpoint(dentry), &lwi);
61 if ((rc = d_mountpoint(dentry) ? 1 : 0)) {
62 spin_lock(&sbi->ll_gns_lock);
63 LASSERT(sbi->ll_gns_state == LL_GNS_MOUNTING);
64 sbi->ll_gns_state = LL_GNS_FINISHED;
65 spin_unlock(&sbi->ll_gns_lock);
68 complete(&sbi->ll_gns_mount_finished);
73 * tries to mount the mount object under passed @dentry. In the case of success
74 * @dentry will become mount point and 0 will be retuned. Error code will be
77 int ll_gns_mount_object(struct dentry *dentry,
80 struct ll_dentry_data *lld = dentry->d_fsdata;
81 char *p, *path, *pathpage, *argv[4];
82 struct file *mntinfo_fd = NULL;
83 struct address_space *mapping;
84 int cleanup_phase = 0, rc = 0;
85 struct ll_sb_info *sbi;
86 struct dentry *dchild;
87 struct page *datapage;
92 CERROR("suid directory found, but no "
93 "vfsmount available.\n");
97 CDEBUG(D_INODE, "mounting dentry %p\n", dentry);
99 LASSERT(dentry->d_inode != NULL);
100 LASSERT(S_ISDIR(dentry->d_inode->i_mode));
101 LASSERT(lld != NULL);
103 sbi = ll_i2sbi(dentry->d_inode);
104 LASSERT(sbi != NULL);
106 /* another thead is in progress of mouning some entry */
107 spin_lock(&sbi->ll_gns_lock);
108 if (sbi->ll_gns_state == LL_GNS_MOUNTING) {
109 spin_unlock(&sbi->ll_gns_lock);
111 wait_for_completion(&sbi->ll_gns_mount_finished);
112 if (d_mountpoint(dentry))
116 /* another thread mounted it already */
117 if (sbi->ll_gns_state == LL_GNS_FINISHED) {
118 spin_unlock(&sbi->ll_gns_lock);
120 /* we lost a race; just return */
121 if (d_mountpoint(dentry))
124 LASSERT(sbi->ll_gns_state == LL_GNS_IDLE);
126 spin_lock(&dentry->d_lock);
127 dentry->d_flags |= DCACHE_GNS_MOUNTING;
128 spin_unlock(&dentry->d_lock);
130 /* mounting started */
131 sbi->ll_gns_state = LL_GNS_MOUNTING;
132 spin_unlock(&sbi->ll_gns_lock);
134 /* we need to build an absolute pathname to pass to mount */
135 pathpage = (char *)__get_free_page(GFP_KERNEL);
137 GOTO(cleanup, rc = -ENOMEM);
140 /* getting @dentry path stored in @pathpage. */
141 path = d_path(dentry, mnt, pathpage, PAGE_SIZE);
143 CERROR("can't build mount object path, err %d\n",
144 (int)PTR_ERR(dchild));
145 GOTO(cleanup, rc = PTR_ERR(dchild));
148 /* sychronizing with possible /proc/fs/...write */
149 down(&sbi->ll_gns_sem);
152 * mount object name is taken from sbi, where it is set in mount time or
153 * via /proc/fs... tunable. It may be ".mntinfo" or so.
155 dchild = ll_d_lookup(sbi->ll_gns_oname, dentry,
156 strlen(sbi->ll_gns_oname));
157 up(&sbi->ll_gns_sem);
160 GOTO(cleanup, rc = -ENOENT);
162 if (IS_ERR(dchild)) {
163 CERROR("can't find mount object %*s/%*s err = %d.\n",
164 (int)dentry->d_name.len, dentry->d_name.name,
165 (int)dchild->d_name.len, dchild->d_name.name,
166 (int)PTR_ERR(dchild));
167 GOTO(cleanup, rc = PTR_ERR(dchild));
172 /* ok, mount object if found, opening it. */
173 mntinfo_fd = dentry_open(dchild, mnt, 0);
174 if (IS_ERR(mntinfo_fd)) {
175 CERROR("can't open mount object %*s/%*s err = %d.\n",
176 (int)dentry->d_name.len, dentry->d_name.name,
177 (int)dchild->d_name.len, dchild->d_name.name,
178 (int)PTR_ERR(mntinfo_fd));
181 GOTO(cleanup, rc = PTR_ERR(mntinfo_fd));
185 if (mntinfo_fd->f_dentry->d_inode->i_size > PAGE_SIZE) {
186 CERROR("mount object %*s/%*s is too big (%Ld)\n",
187 (int)dentry->d_name.len, dentry->d_name.name,
188 (int)dchild->d_name.len, dchild->d_name.name,
189 mntinfo_fd->f_dentry->d_inode->i_size);
190 GOTO(cleanup, rc = -EFBIG);
193 /* read data from mount object. */
194 mapping = mntinfo_fd->f_dentry->d_inode->i_mapping;
195 filler = (filler_t *)mapping->a_ops->readpage;
196 datapage = read_cache_page(mapping, 0, filler,
198 if (IS_ERR(datapage)) {
199 CERROR("can't read data from mount object %*s/%*s\n",
200 (int)dentry->d_name.len, dentry->d_name.name,
201 (int)dchild->d_name.len, dchild->d_name.name);
202 GOTO(cleanup, rc = PTR_ERR(datapage));
207 p[PAGE_SIZE - 1] = '\0';
213 /* sychronizing with possible /proc/fs/...write */
214 down(&sbi->ll_gns_sem);
217 * upcall is initialized in mount time or via /proc/fs/... tuneable and
218 * may be /usr/lib/lustre/gns-upcall.sh
220 argv[0] = sbi->ll_gns_upcall;
225 up(&sbi->ll_gns_sem);
227 rc = USERMODEHELPER(argv[0], argv, NULL);
229 CERROR("failed to call GNS upcall %s, err = %d\n",
230 sbi->ll_gns_upcall, rc);
235 * wait for mount completion. This is actually not need, because
236 * USERMODEHELPER() returns only when usermode process finishes. But we
237 * doing this just for case USERMODEHELPER() semanthics will be changed
238 * or usermode upcall program will start mounting in backgound and
239 * return instantly. --umka
241 if (ll_gns_wait_for_mount(dentry, 1, GNS_WAIT_ATTEMPTS)) {
242 struct dentry *rdentry;
243 struct vfsmount *rmnt;
245 /* mount is successful */
246 LASSERT(sbi->ll_gns_state == LL_GNS_FINISHED);
249 rdentry = dget(dentry);
251 if (follow_down(&rmnt, &rdentry)) {
253 * registering new mount in GNS mounts list and thus
254 * make it accessible from GNS control thread.
256 spin_lock(&dcache_lock);
257 LASSERT(list_empty(&rmnt->mnt_lustre_list));
258 list_add_tail(&rmnt->mnt_lustre_list,
260 spin_unlock(&dcache_lock);
261 rmnt->mnt_last_used = jiffies;
268 spin_lock(&dentry->d_lock);
269 dentry->d_flags &= ~DCACHE_GNS_PENDING;
270 spin_unlock(&dentry->d_lock);
272 CERROR("usermode upcall %s failed to mount %s\n",
273 sbi->ll_gns_upcall, path);
279 switch (cleanup_phase) {
282 page_cache_release(datapage);
284 if (mntinfo_fd != NULL)
287 free_page((unsigned long)pathpage);
289 spin_lock(&sbi->ll_gns_lock);
290 sbi->ll_gns_state = LL_GNS_IDLE;
291 spin_unlock(&sbi->ll_gns_lock);
293 spin_lock(&dentry->d_lock);
294 dentry->d_flags &= ~DCACHE_GNS_MOUNTING;
295 spin_unlock(&dentry->d_lock);
300 /* tries to umount passed @mnt. */
301 int ll_gns_umount_object(struct vfsmount *mnt)
306 CDEBUG(D_INODE, "unmounting mnt %p\n", mnt);
307 rc = do_umount(mnt, 0);
309 CDEBUG(D_INODE, "can't umount 0x%p, err = %d\n",
316 int ll_gns_check_mounts(struct ll_sb_info *sbi, int flags)
318 struct list_head check_list = LIST_HEAD_INIT(check_list);
319 struct vfsmount *mnt;
323 spin_lock(&dcache_lock);
324 list_splice_init(&sbi->ll_mnt_list, &check_list);
327 * walk the list in reverse order, and put them on the front of the sbi
328 * list each iteration; this avoids list-ordering problems if we race
329 * with another gns-mounting thread.
331 while (!list_empty(&check_list)) {
332 mnt = list_entry(check_list.prev,
338 list_del_init(&mnt->mnt_lustre_list);
340 list_add(&mnt->mnt_lustre_list,
343 /* check for timeout if needed */
344 pass = jiffies - mnt->mnt_last_used;
346 if (flags == LL_GNS_CHECK &&
347 pass < sbi->ll_gns_timeout * HZ)
352 spin_unlock(&dcache_lock);
355 ll_gns_umount_object(mnt);
358 spin_lock(&dcache_lock);
360 spin_unlock(&dcache_lock);
365 * GNS timer callback function. It restarts gns timer and wakes up GNS cvontrol
366 * thread to process mounts list.
368 void ll_gns_timer_callback(unsigned long data)
370 struct ll_sb_info *sbi = (void *)data;
373 spin_lock(&gns_lock);
374 if (list_empty(&sbi->ll_gns_sbi_head))
375 list_add(&sbi->ll_gns_sbi_head, &gns_sbi_list);
376 spin_unlock(&gns_lock);
378 wake_up(&gns_thread.t_ctl_waitq);
379 mod_timer(&sbi->ll_gns_timer,
380 jiffies + sbi->ll_gns_tick * HZ);
383 /* this function checks if something new happened to exist in gns list. */
384 static int inline ll_gns_check_event(void)
388 spin_lock(&gns_lock);
389 rc = !list_empty(&gns_sbi_list);
390 spin_unlock(&gns_lock);
395 /* should we staop GNS control thread? */
396 static int inline ll_gns_check_stop(void)
399 return (gns_thread.t_flags & SVC_STOPPING) ? 1 : 0;
402 /* GNS control thread function. */
403 static int ll_gns_thread_main(void *arg)
405 struct ll_gns_ctl *ctl = arg;
410 char name[sizeof(current->comm)];
411 snprintf(name, sizeof(name) - 1, "ll_gns");
412 kportal_daemonize(name);
415 SIGNAL_MASK_LOCK(current, flags);
416 sigfillset(¤t->blocked);
418 SIGNAL_MASK_UNLOCK(current, flags);
421 * letting starting function know, that we are ready and control may be
424 gns_thread.t_flags = SVC_RUNNING;
425 complete(&ctl->gc_starting);
427 while (!ll_gns_check_stop()) {
428 struct l_wait_info lwi = { 0 };
430 l_wait_event(gns_thread.t_ctl_waitq,
431 (ll_gns_check_event() ||
432 ll_gns_check_stop()), &lwi);
434 spin_lock(&gns_lock);
435 while (!list_empty(&gns_sbi_list)) {
436 struct ll_sb_info *sbi;
438 sbi = list_entry(gns_sbi_list.prev,
442 list_del_init(&sbi->ll_gns_sbi_head);
443 spin_unlock(&gns_lock);
444 ll_gns_check_mounts(sbi, LL_GNS_CHECK);
445 spin_lock(&gns_lock);
447 spin_unlock(&gns_lock);
451 * letting know stop function know that thread is stoped and it may
455 gns_thread.t_flags = SVC_STOPPED;
457 /* this is SMP-safe way to finish thread. */
458 complete_and_exit(&ctl->gc_finishing, 0);
461 void ll_gns_add_timer(struct ll_sb_info *sbi)
463 mod_timer(&sbi->ll_gns_timer,
464 jiffies + sbi->ll_gns_tick * HZ);
467 void ll_gns_del_timer(struct ll_sb_info *sbi)
469 del_timer(&sbi->ll_gns_timer);
473 * starts GNS control thread and waits for a signal it is up and work may be
476 int ll_gns_start_thread(void)
481 LASSERT(gns_thread.t_flags == 0);
482 init_completion(&gns_ctl.gc_starting);
483 init_completion(&gns_ctl.gc_finishing);
484 init_waitqueue_head(&gns_thread.t_ctl_waitq);
486 rc = kernel_thread(ll_gns_thread_main, &gns_ctl,
487 (CLONE_VM | CLONE_FILES));
489 CERROR("cannot start GNS control thread, "
493 wait_for_completion(&gns_ctl.gc_starting);
494 LASSERT(gns_thread.t_flags == SVC_RUNNING);
498 /* stops GNS control thread and waits its actual stop. */
499 void ll_gns_stop_thread(void)
502 gns_thread.t_flags = SVC_STOPPING;
503 wake_up(&gns_thread.t_ctl_waitq);
504 wait_for_completion(&gns_ctl.gc_finishing);
505 LASSERT(gns_thread.t_flags == SVC_STOPPED);
506 gns_thread.t_flags = 0;