1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2004, 2005 Cluster File Systems, Inc.
6 * Author: Phil Schwan <phil@clusterfs.com>
7 * Author: Oleg Drokin <green@clusterfs.com>
8 * Author: Yury Umanets <yury@clusterfs.com>
9 * Review: Nikita Danilov <nikita@clusterfs.com>
11 * This file is part of Lustre, http://www.lustre.org.
13 * Lustre is free software; you can redistribute it and/or
14 * modify it under the terms of version 2 of the GNU General Public
15 * License as published by the Free Software Foundation.
17 * Lustre is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
22 * You should have received a copy of the GNU General Public License
23 * along with Lustre; if not, write to the Free Software
24 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27 #define DEBUG_SUBSYSTEM S_LLITE
30 #include <linux/version.h>
31 #include <asm/uaccess.h>
32 #include <linux/file.h>
33 #include <linux/kmod.h>
35 #include <linux/lustre_lite.h>
36 #include "llite_internal.h"
38 static struct list_head gns_sbi_list = LIST_HEAD_INIT(gns_sbi_list);
39 static spinlock_t gns_lock = SPIN_LOCK_UNLOCKED;
40 static struct ptlrpc_thread gns_thread;
41 static struct ll_gns_ctl gns_ctl;
44 * waits until passed dentry gets mountpoint or timeout and attempts are
45 * exhausted. Returns 1 if dentry became mountpoint and 0 otherwise.
48 ll_gns_wait_for_mount(struct dentry *dentry,
49 int timeout, int tries)
51 struct l_wait_info lwi;
52 struct ll_sb_info *sbi;
56 LASSERT(dentry != NULL);
57 LASSERT(!IS_ERR(dentry));
58 sbi = ll_s2sbi(dentry->d_sb);
60 for (; !d_mountpoint(dentry) && tries > 0; tries--) {
61 lwi = LWI_TIMEOUT(timeout * HZ, NULL, NULL);
62 l_wait_event(sbi->ll_gns_waitq, d_mountpoint(dentry), &lwi);
65 if ((rc = d_mountpoint(dentry) ? 1 : 0)) {
66 spin_lock(&sbi->ll_gns_lock);
67 LASSERT(sbi->ll_gns_state == LL_GNS_MOUNTING);
68 sbi->ll_gns_state = LL_GNS_FINISHED;
69 spin_unlock(&sbi->ll_gns_lock);
72 complete_all(&sbi->ll_gns_mount_finished);
77 * tries to mount the mount object under passed @dentry. In the case of success
78 * @dentry will become mount point and 0 will be returned. Error code will be
82 ll_gns_mount_object(struct dentry *dentry, struct vfsmount *mnt)
84 struct ll_dentry_data *lld = dentry->d_fsdata;
85 char *path, *pathpage, *datapage, *argv[4];
86 struct file *mntinfo_fd = NULL;
87 int cleanup_phase = 0, rc = 0;
88 struct ll_sb_info *sbi;
89 struct dentry *dchild;
93 CERROR("suid directory found, but no "
94 "vfsmount available.\n");
98 CDEBUG(D_INODE, "mounting dentry %p\n", dentry);
100 LASSERT(dentry->d_inode != NULL);
101 LASSERT(S_ISDIR(dentry->d_inode->i_mode));
102 LASSERT(lld != NULL);
104 sbi = ll_i2sbi(dentry->d_inode);
105 LASSERT(sbi != NULL);
108 * another thead is in progress or just finished mounting the
109 * dentry. Handling that.
111 spin_lock(&sbi->ll_gns_lock);
112 if (sbi->ll_gns_state == LL_GNS_MOUNTING ||
113 sbi->ll_gns_state == LL_GNS_FINISHED)
115 spin_unlock(&sbi->ll_gns_lock);
116 CDEBUG(D_INODE, "GNS is in progress now, throwing "
117 "-ERESTARTSYS to restart syscall and let "
119 RETURN(-ERESTARTSYS);
121 LASSERT(sbi->ll_gns_state == LL_GNS_IDLE);
123 spin_lock(&dentry->d_lock);
124 dentry->d_flags |= DCACHE_GNS_MOUNTING;
125 spin_unlock(&dentry->d_lock);
127 /* mounting started */
128 sbi->ll_gns_state = LL_GNS_MOUNTING;
129 spin_unlock(&sbi->ll_gns_lock);
131 /* we need to build an absolute pathname to pass to mount */
132 pathpage = (char *)__get_free_page(GFP_KERNEL);
134 GOTO(cleanup, rc = -ENOMEM);
137 /* getting @dentry path stored in @pathpage. */
138 path = d_path(dentry, mnt, pathpage, PAGE_SIZE);
140 CERROR("can't build mount object path, err %d\n",
141 (int)PTR_ERR(dchild));
142 GOTO(cleanup, rc = PTR_ERR(dchild));
145 /* synchronizing with possible /proc/fs/...write */
146 down(&sbi->ll_gns_sem);
149 * mount object name is taken from sbi, where it is set in mount time or
150 * via /proc/fs... tunable. It may be ".mntinfo" or so.
152 dchild = lookup_one_len(sbi->ll_gns_oname, dentry,
153 strlen(sbi->ll_gns_oname));
154 up(&sbi->ll_gns_sem);
157 GOTO(cleanup, rc = -ENOENT);
159 if (IS_ERR(dchild)) {
160 CERROR("can't find mount object %*s/%*s err = %d.\n",
161 (int)dentry->d_name.len, dentry->d_name.name,
162 (int)dchild->d_name.len, dchild->d_name.name,
163 (int)PTR_ERR(dchild));
164 GOTO(cleanup, rc = PTR_ERR(dchild));
167 /* mount object is not found */
168 if (!dchild->d_inode)
169 GOTO(cleanup, rc = -ENOENT);
173 /* ok, mount object if found, opening it. */
174 mntinfo_fd = dentry_open(dchild, mnt, 0);
175 if (IS_ERR(mntinfo_fd)) {
176 CERROR("can't open mount object %*s/%*s err = %d.\n",
177 (int)dentry->d_name.len, dentry->d_name.name,
178 (int)dchild->d_name.len, dchild->d_name.name,
179 (int)PTR_ERR(mntinfo_fd));
182 GOTO(cleanup, rc = PTR_ERR(mntinfo_fd));
186 if (mntinfo_fd->f_dentry->d_inode->i_size > PAGE_SIZE) {
187 CERROR("mount object %*s/%*s is too big (%Ld)\n",
188 (int)dentry->d_name.len, dentry->d_name.name,
189 (int)dchild->d_name.len, dchild->d_name.name,
190 mntinfo_fd->f_dentry->d_inode->i_size);
191 GOTO(cleanup, rc = -EFBIG);
194 datapage = (char *)__get_free_page(GFP_KERNEL);
196 GOTO(cleanup, rc = -ENOMEM);
200 /* read data from mount object. */
201 rc = kernel_read(mntinfo_fd, 0, datapage, PAGE_SIZE);
203 CERROR("can't read mount object %*s/%*s data, err %d\n",
204 (int)dentry->d_name.len, dentry->d_name.name,
205 (int)dchild->d_name.len, dchild->d_name.name,
210 datapage[PAGE_SIZE - 1] = '\0';
215 /* synchronizing with possible /proc/fs/...write */
216 down(&sbi->ll_gns_sem);
219 * upcall is initialized in mount time or via /proc/fs/... tuneable and
220 * may be /usr/lib/lustre/gns-upcall.sh
222 argv[0] = sbi->ll_gns_upcall;
227 up(&sbi->ll_gns_sem);
229 rc = USERMODEHELPER(argv[0], argv, NULL);
231 CERROR("failed to call GNS upcall %s, err = %d\n",
232 sbi->ll_gns_upcall, rc);
237 * wait for mount completion. This is actually not need, because
238 * USERMODEHELPER() returns only when usermode process finishes. But we
239 * doing this just for case USERMODEHELPER() semantics will be changed
240 * or usermode upcall program will start mounting in backgound and
241 * return instantly. --umka
243 if (ll_gns_wait_for_mount(dentry, 1, GNS_WAIT_ATTEMPTS)) {
244 struct dentry *rdentry;
245 struct vfsmount *rmnt;
247 /* mount is successful */
248 LASSERT(sbi->ll_gns_state == LL_GNS_FINISHED);
251 rdentry = dget(dentry);
253 if (follow_down(&rmnt, &rdentry)) {
255 * registering new mount in GNS mounts list and thus
256 * make it accessible from GNS control thread.
258 spin_lock(&dcache_lock);
259 LASSERT(list_empty(&rmnt->mnt_lustre_list));
260 list_add_tail(&rmnt->mnt_lustre_list,
262 spin_unlock(&dcache_lock);
263 rmnt->mnt_last_used = jiffies;
270 spin_lock(&dentry->d_lock);
271 dentry->d_flags &= ~DCACHE_GNS_PENDING;
272 spin_unlock(&dentry->d_lock);
274 CERROR("usermode upcall %s failed to mount %s\n",
275 sbi->ll_gns_upcall, path);
281 switch (cleanup_phase) {
283 free_page((unsigned long)datapage);
285 if (mntinfo_fd != NULL)
288 free_page((unsigned long)pathpage);
291 * waking up all waiters after gns state is set to
294 if (cleanup_phase > 0)
295 complete_all(&sbi->ll_gns_mount_finished);
297 spin_lock(&sbi->ll_gns_lock);
298 sbi->ll_gns_state = LL_GNS_IDLE;
299 spin_unlock(&sbi->ll_gns_lock);
301 spin_lock(&dentry->d_lock);
302 dentry->d_flags &= ~DCACHE_GNS_MOUNTING;
303 spin_unlock(&dentry->d_lock);
308 /* tries to umount passed @mnt. */
309 int ll_gns_umount_object(struct vfsmount *mnt)
314 CDEBUG(D_INODE, "unmounting mnt %p\n", mnt);
315 rc = do_umount(mnt, 0);
317 CDEBUG(D_INODE, "can't umount 0x%p, err = %d\n",
324 int ll_gns_check_mounts(struct ll_sb_info *sbi, int flags)
326 struct list_head check_list = LIST_HEAD_INIT(check_list);
327 struct vfsmount *mnt;
331 spin_lock(&dcache_lock);
332 list_splice_init(&sbi->ll_mnt_list, &check_list);
335 * walk the list in reverse order, and put them on the front of the sbi
336 * list each iteration; this avoids list-ordering problems if we race
337 * with another gns-mounting thread.
339 while (!list_empty(&check_list)) {
340 mnt = list_entry(check_list.prev,
346 list_del_init(&mnt->mnt_lustre_list);
348 list_add(&mnt->mnt_lustre_list,
351 /* check for timeout if needed */
352 pass = jiffies - mnt->mnt_last_used;
354 if (flags == LL_GNS_CHECK &&
355 pass < sbi->ll_gns_timeout * HZ)
360 spin_unlock(&dcache_lock);
363 ll_gns_umount_object(mnt);
366 spin_lock(&dcache_lock);
368 spin_unlock(&dcache_lock);
373 * GNS timer callback function. It restarts gns timer and wakes up GNS control
374 * thread to process mounts list.
376 void ll_gns_timer_callback(unsigned long data)
378 struct ll_sb_info *sbi = (void *)data;
381 spin_lock(&gns_lock);
382 if (list_empty(&sbi->ll_gns_sbi_head))
383 list_add(&sbi->ll_gns_sbi_head, &gns_sbi_list);
384 spin_unlock(&gns_lock);
386 wake_up(&gns_thread.t_ctl_waitq);
387 mod_timer(&sbi->ll_gns_timer,
388 jiffies + sbi->ll_gns_tick * HZ);
391 /* this function checks if something new happened to exist in gns list. */
392 static int inline ll_gns_check_event(void)
396 spin_lock(&gns_lock);
397 rc = !list_empty(&gns_sbi_list);
398 spin_unlock(&gns_lock);
403 /* should we stop GNS control thread? */
404 static int inline ll_gns_check_stop(void)
407 return (gns_thread.t_flags & SVC_STOPPING) ? 1 : 0;
410 /* GNS control thread function. */
411 static int ll_gns_thread_main(void *arg)
413 struct ll_gns_ctl *ctl = arg;
418 char name[sizeof(current->comm)];
419 snprintf(name, sizeof(name) - 1, "ll_gns");
420 kportal_daemonize(name);
423 SIGNAL_MASK_LOCK(current, flags);
424 sigfillset(¤t->blocked);
426 SIGNAL_MASK_UNLOCK(current, flags);
429 * letting starting function know, that we are ready and control may be
432 gns_thread.t_flags = SVC_RUNNING;
433 complete(&ctl->gc_starting);
435 while (!ll_gns_check_stop()) {
436 struct l_wait_info lwi = { 0 };
438 l_wait_event(gns_thread.t_ctl_waitq,
439 (ll_gns_check_event() ||
440 ll_gns_check_stop()), &lwi);
442 spin_lock(&gns_lock);
443 while (!list_empty(&gns_sbi_list)) {
444 struct ll_sb_info *sbi;
446 sbi = list_entry(gns_sbi_list.prev,
450 list_del_init(&sbi->ll_gns_sbi_head);
451 spin_unlock(&gns_lock);
452 ll_gns_check_mounts(sbi, LL_GNS_CHECK);
453 spin_lock(&gns_lock);
455 spin_unlock(&gns_lock);
459 gns_thread.t_flags = SVC_STOPPED;
461 /* this is SMP-safe way to finish thread. */
462 complete_and_exit(&ctl->gc_finishing, 0);
465 void ll_gns_add_timer(struct ll_sb_info *sbi)
467 mod_timer(&sbi->ll_gns_timer,
468 jiffies + sbi->ll_gns_tick * HZ);
471 void ll_gns_del_timer(struct ll_sb_info *sbi)
473 del_timer(&sbi->ll_gns_timer);
477 * starts GNS control thread and waits for a signal it is up and work may be
480 int ll_gns_start_thread(void)
485 LASSERT(gns_thread.t_flags == 0);
486 init_completion(&gns_ctl.gc_starting);
487 init_completion(&gns_ctl.gc_finishing);
488 init_waitqueue_head(&gns_thread.t_ctl_waitq);
490 rc = kernel_thread(ll_gns_thread_main, &gns_ctl,
491 (CLONE_VM | CLONE_FILES));
493 CERROR("cannot start GNS control thread, "
497 wait_for_completion(&gns_ctl.gc_starting);
498 LASSERT(gns_thread.t_flags == SVC_RUNNING);
502 /* stops GNS control thread and waits its actual stop. */
503 void ll_gns_stop_thread(void)
506 gns_thread.t_flags = SVC_STOPPING;
507 wake_up(&gns_thread.t_ctl_waitq);
508 wait_for_completion(&gns_ctl.gc_finishing);
509 LASSERT(gns_thread.t_flags == SVC_STOPPED);
510 gns_thread.t_flags = 0;