/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * * Copyright (C) 2004 Cluster File Systems, Inc. * Author: Phil Schwan * * This file is part of Lustre, http://www.lustre.org. * * Lustre is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. * * Lustre is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Lustre; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #define DEBUG_SUBSYSTEM S_LLITE #include #include #include #include #include #include #include "llite_internal.h" /* After roughly how long should we remove an inactive mount? */ #define GNS_MOUNT_TIMEOUT 120 /* How often should the GNS timer look for mounts to cleanup? */ #define GNS_TICK 30 int ll_finish_gns(struct ll_sb_info *sbi) { down(&sbi->ll_gns_sem); if (sbi->ll_gns_state != LL_GNS_STATE_MOUNTING) { up(&sbi->ll_gns_sem); CERROR("FINISH_GNS called on mount which was not expecting " "completion.\n"); return -EINVAL; } sbi->ll_gns_state = LL_GNS_STATE_FINISHED; up(&sbi->ll_gns_sem); complete(&sbi->ll_gns_completion); return 0; } /* Pass exactly one (1) page in; when this function returns "page" will point * somewhere into the middle of the page. */ int fill_page_with_path(struct dentry *dentry, struct vfsmount *mnt, char **pagep) { char *path = *pagep, *p; path[PAGE_SIZE - 1] = '\0'; p = path + PAGE_SIZE - 1; while (1) { if (p - path < dentry->d_name.len + 1) return -ENAMETOOLONG; if (dentry->d_name.name[0] != '/') { p -= dentry->d_name.len; memcpy(p, dentry->d_name.name, dentry->d_name.len); p--; *p = '/'; } dentry = dentry->d_parent; if (dentry->d_parent == dentry) { if (mnt->mnt_parent == mnt) break; /* finished walking up */ mnt = mntget(mnt); dget(dentry); while (dentry->d_parent == dentry && follow_up(&mnt, &dentry)) ; mntput(mnt); dput(dentry); } } *pagep = p; return 0; } int ll_dir_process_mount_object(struct dentry *dentry, struct vfsmount *mnt) { struct ll_sb_info *sbi; struct file *mntinfo_fd = NULL; struct page *datapage = NULL, *pathpage; struct address_space *mapping; struct ll_dentry_data *lld = dentry->d_fsdata; struct dentry *dchild, *tmp_dentry; struct vfsmount *tmp_mnt; char *p, *path, *argv[4]; int stage = 0, rc = 0; ENTRY; if (mnt == NULL) { CERROR("suid directory found, but no vfsmount available.\n"); RETURN(-1); } LASSERT(dentry->d_inode != NULL); LASSERT(lld != NULL); sbi = ll_i2sbi(dentry->d_inode); LASSERT(sbi != NULL); down(&sbi->ll_gns_sem); if (sbi->ll_gns_state == LL_GNS_STATE_MOUNTING) { up(&sbi->ll_gns_sem); wait_for_completion(&sbi->ll_gns_completion); if (d_mountpoint(dentry)) RETURN(0); RETURN(-1); } if (sbi->ll_gns_state == LL_GNS_STATE_FINISHED) { /* we lost a race; just return */ up(&sbi->ll_gns_sem); if (d_mountpoint(dentry)) RETURN(0); RETURN(-1); } LASSERT(sbi->ll_gns_state == LL_GNS_STATE_IDLE); sbi->ll_gns_state = LL_GNS_STATE_MOUNTING; up(&sbi->ll_gns_sem); /* We need to build an absolute pathname to pass to mount */ pathpage = alloc_pages(GFP_HIGHUSER, 0); if (pathpage == NULL) GOTO(cleanup, rc = -ENOMEM); path = kmap(pathpage); LASSERT(path != NULL); stage = 1; fill_page_with_path(dentry, mnt, &path); dchild = lookup_one_len(".mntinfo", dentry, strlen(".mntinfo")); if (dchild == NULL || IS_ERR(dchild)) { CERROR("Directory %*s is setuid, but without a mount object.\n", dentry->d_name.len, dentry->d_name.name); GOTO(cleanup, rc = -1); } mntget(mnt); mntinfo_fd = dentry_open(dchild, mnt, 0); if (IS_ERR(mntinfo_fd)) { dput(dchild); mntput(mnt); GOTO(cleanup, rc = PTR_ERR(mntinfo_fd)); } stage = 2; if (mntinfo_fd->f_dentry->d_inode->i_size > PAGE_SIZE) { CERROR("Mount object file is too big (%Ld)\n", mntinfo_fd->f_dentry->d_inode->i_size); GOTO(cleanup, rc = -1); } mapping = mntinfo_fd->f_dentry->d_inode->i_mapping; datapage = read_cache_page(mapping, 0, (filler_t *)mapping->a_ops->readpage, mntinfo_fd); if (IS_ERR(datapage)) GOTO(cleanup, rc = PTR_ERR(datapage)); p = kmap(datapage); LASSERT(p != NULL); stage = 3; p[PAGE_SIZE - 1] = '\0'; fput(mntinfo_fd); mntinfo_fd = NULL; argv[0] = "/usr/lib/lustre/gns-upcall.sh"; argv[1] = p; argv[2] = path; argv[3] = NULL; rc = USERMODEHELPER(argv[0], argv, NULL); if (rc != 0) { CERROR("GNS mount failed: %d\n", rc); GOTO(cleanup, rc); } wait_for_completion(&sbi->ll_gns_completion); LASSERT(sbi->ll_gns_state == LL_GNS_STATE_FINISHED); if (d_mountpoint(dentry)) { /* successful follow_down will mntput and dput */ tmp_mnt = mntget(mnt); tmp_dentry = dget(dentry); rc = follow_down(&tmp_mnt, &tmp_dentry); if (rc == 1) { struct ll_sb_info *sbi = ll_s2sbi(dentry->d_sb); spin_lock(&dcache_lock); LASSERT(list_empty(&tmp_mnt->mnt_lustre_list)); list_add_tail(&tmp_mnt->mnt_lustre_list, &sbi->ll_mnt_list); spin_unlock(&dcache_lock); tmp_mnt->mnt_last_used = jiffies; mntput(tmp_mnt); dput(tmp_dentry); rc = 0; } else { mntput(mnt); dput(dentry); } } else { CERROR("Woke up from GNS mount, but no mountpoint in place.\n"); rc = -1; } EXIT; cleanup: switch (stage) { case 3: kunmap(datapage); page_cache_release(datapage); case 2: if (mntinfo_fd != NULL) fput(mntinfo_fd); case 1: kunmap(pathpage); __free_pages(pathpage, 0); case 0: down(&sbi->ll_gns_sem); sbi->ll_gns_state = LL_GNS_STATE_IDLE; up(&sbi->ll_gns_sem); } return rc; } /* If timeout == 1, only remove the mounts which are properly aged. * * If timeout == 0, we are unmounting -- remove them all. */ int ll_gns_umount_all(struct ll_sb_info *sbi, int timeout) { struct list_head kill_list = LIST_HEAD_INIT(kill_list); struct page *page = NULL; char *kpage = NULL, *path; int rc; ENTRY; if (timeout == 0) { page = alloc_pages(GFP_HIGHUSER, 0); if (page == NULL) RETURN(-ENOMEM); kpage = kmap(page); LASSERT(kpage != NULL); } spin_lock(&dcache_lock); list_splice_init(&sbi->ll_mnt_list, &kill_list); /* Walk the list in reverse order, and put them on the front of the * sbi list each iteration; this avoids list-ordering problems if we * race with another gns-mounting thread */ while (!list_empty(&kill_list)) { struct vfsmount *mnt = list_entry(kill_list.prev, struct vfsmount, mnt_lustre_list); mntget(mnt); list_del_init(&mnt->mnt_lustre_list); list_add(&mnt->mnt_lustre_list, &sbi->ll_mnt_list); if (timeout && jiffies - mnt->mnt_last_used < GNS_MOUNT_TIMEOUT * HZ) { mntput(mnt); continue; } spin_unlock(&dcache_lock); CDEBUG(D_INODE, "unmounting mnt %p from sbi %p\n", mnt, sbi); rc = do_umount(mnt, 0); if (rc != 0 && page != NULL) { int rc2; path = kpage; rc2 = fill_page_with_path(mnt->mnt_root, mnt, &path); CERROR("GNS umount(%s): %d\n", rc2 == 0 ? path : "", rc); } mntput(mnt); spin_lock(&dcache_lock); } spin_unlock(&dcache_lock); if (page != NULL) { kunmap(page); __free_pages(page, 0); } RETURN(0); } static struct list_head gns_sbi_list = LIST_HEAD_INIT(gns_sbi_list); static struct semaphore gns_sem; static struct ptlrpc_thread gns_thread; void ll_gns_timer_callback(unsigned long data) { struct ll_sb_info *sbi = (void *)data; ENTRY; down(&gns_sem); if (list_empty(&sbi->ll_gns_sbi_head)) list_add(&sbi->ll_gns_sbi_head, &gns_sbi_list); up(&gns_sem); wake_up(&gns_thread.t_ctl_waitq); mod_timer(&sbi->ll_gns_timer, jiffies + GNS_TICK * HZ); } static int gns_check_event(void) { int rc; down(&gns_sem); rc = !list_empty(&gns_sbi_list); up(&gns_sem); return rc; } static int ll_gns_thread_main(void *arg) { unsigned long flags; ENTRY; { char name[sizeof(current->comm)]; snprintf(name, sizeof(name) - 1, "ll_gns"); kportal_daemonize(name); } SIGNAL_MASK_LOCK(current, flags); sigfillset(¤t->blocked); RECALC_SIGPENDING; SIGNAL_MASK_UNLOCK(current, flags); gns_thread.t_flags = SVC_RUNNING; wake_up(&gns_thread.t_ctl_waitq); while ((gns_thread.t_flags & SVC_STOPPING) == 0) { struct l_wait_info lwi = { 0 }; l_wait_event(gns_thread.t_ctl_waitq, gns_check_event() || gns_thread.t_flags & SVC_STOPPING, &lwi); down(&gns_sem); while (!list_empty(&gns_sbi_list)) { struct ll_sb_info *sbi = list_entry(gns_sbi_list.prev, struct ll_sb_info, ll_gns_sbi_head); list_del_init(&sbi->ll_gns_sbi_head); ll_gns_umount_all(sbi, 1); } up(&gns_sem); } gns_thread.t_flags = SVC_STOPPED; wake_up(&gns_thread.t_ctl_waitq); RETURN(0); } void ll_gns_add_timer(struct ll_sb_info *sbi) { mod_timer(&sbi->ll_gns_timer, jiffies + GNS_TICK * HZ); } void ll_gns_del_timer(struct ll_sb_info *sbi) { del_timer(&sbi->ll_gns_timer); } int ll_gns_start_thread(void) { struct l_wait_info lwi = { 0 }; int rc; LASSERT(gns_thread.t_flags == 0); sema_init(&gns_sem, 1); init_waitqueue_head(&gns_thread.t_ctl_waitq); rc = kernel_thread(ll_gns_thread_main, NULL, CLONE_VM | CLONE_FILES); if (rc < 0) { CERROR("cannot start thread: %d\n", rc); return rc; } l_wait_event(gns_thread.t_ctl_waitq, gns_thread.t_flags & SVC_RUNNING, &lwi); return 0; } void ll_gns_stop_thread(void) { struct l_wait_info lwi = { 0 }; down(&gns_sem); gns_thread.t_flags = SVC_STOPPING; up(&gns_sem); wake_up(&gns_thread.t_ctl_waitq); l_wait_event(gns_thread.t_ctl_waitq, gns_thread.t_flags & SVC_STOPPED, &lwi); gns_thread.t_flags = 0; }