Whamcloud - gitweb
land b_gns onto HEAD. If you are working on CMD, you MUST UPDATE YOUR
[fs/lustre-release.git] / lustre / llite / llite_gns.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Copyright (C) 2004 Cluster File Systems, Inc.
5  *   Author: Phil Schwan <phil@clusterfs.com>
6  *
7  *   This file is part of Lustre, http://www.lustre.org.
8  *
9  *   Lustre is free software; you can redistribute it and/or
10  *   modify it under the terms of version 2 of the GNU General Public
11  *   License as published by the Free Software Foundation.
12  *
13  *   Lustre is distributed in the hope that it will be useful,
14  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
15  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  *   GNU General Public License for more details.
17  *
18  *   You should have received a copy of the GNU General Public License
19  *   along with Lustre; if not, write to the Free Software
20  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21  */
22
23 #define DEBUG_SUBSYSTEM S_LLITE
24
25 #include <linux/fs.h>
26 #include <linux/version.h>
27 #include <asm/uaccess.h>
28 #include <linux/file.h>
29 #include <linux/kmod.h>
30
31 #include <linux/lustre_lite.h>
32 #include "llite_internal.h"
33
34 /* After roughly how long should we remove an inactive mount? */
35 #define GNS_MOUNT_TIMEOUT 120
36 /* How often should the GNS timer look for mounts to cleanup? */
37 #define GNS_TICK 30
38
39 int ll_finish_gns(struct ll_sb_info *sbi)
40 {
41         down(&sbi->ll_gns_sem);
42         if (sbi->ll_gns_state != LL_GNS_STATE_MOUNTING) {
43                 up(&sbi->ll_gns_sem);
44                 CERROR("FINISH_GNS called on mount which was not expecting "
45                        "completion.\n");
46                 return -EINVAL;
47         }
48
49         sbi->ll_gns_state = LL_GNS_STATE_FINISHED;
50         up(&sbi->ll_gns_sem);
51         complete(&sbi->ll_gns_completion);
52
53         return 0;
54 }
55
56 /* Pass exactly one (1) page in; when this function returns "page" will point
57  * somewhere into the middle of the page. */
58 int fill_page_with_path(struct dentry *dentry, struct vfsmount *mnt,
59                         char **pagep)
60 {
61         char *path = *pagep, *p;
62
63         path[PAGE_SIZE - 1] = '\0';
64         p = path + PAGE_SIZE - 1;
65
66         while (1) {
67                 if (p - path < dentry->d_name.len + 1)
68                         return -ENAMETOOLONG;
69                 if (dentry->d_name.name[0] != '/') {
70                         p -= dentry->d_name.len;
71                         memcpy(p, dentry->d_name.name, dentry->d_name.len);
72                         p--;
73                         *p = '/';
74                 }
75
76                 dentry = dentry->d_parent;
77                 if (dentry->d_parent == dentry) {
78                         if (mnt->mnt_parent == mnt)
79                                 break; /* finished walking up */
80                         mnt = mntget(mnt);
81                         dget(dentry);
82                         while (dentry->d_parent == dentry &&
83                                follow_up(&mnt, &dentry))
84                                 ;
85                         mntput(mnt);
86                         dput(dentry);
87                 }
88         }
89         *pagep = p;
90         return 0;
91 }
92
93 int ll_dir_process_mount_object(struct dentry *dentry, struct vfsmount *mnt)
94 {
95         struct ll_sb_info *sbi;
96         struct file *mntinfo_fd = NULL;
97         struct page *datapage = NULL, *pathpage;
98         struct address_space *mapping;
99         struct ll_dentry_data *lld = dentry->d_fsdata;
100         struct dentry *dchild, *tmp_dentry;
101         struct vfsmount *tmp_mnt;
102         char *p, *path, *argv[4];
103         int stage = 0, rc = 0;
104         ENTRY;
105
106         if (mnt == NULL) {
107                 CERROR("suid directory found, but no vfsmount available.\n");
108                 RETURN(-1);
109         }
110
111         LASSERT(dentry->d_inode != NULL);
112         LASSERT(lld != NULL);
113         sbi = ll_i2sbi(dentry->d_inode);
114         LASSERT(sbi != NULL);
115
116         down(&sbi->ll_gns_sem);
117         if (sbi->ll_gns_state == LL_GNS_STATE_MOUNTING) {
118                 up(&sbi->ll_gns_sem);
119                 wait_for_completion(&sbi->ll_gns_completion);
120                 if (d_mountpoint(dentry))
121                         RETURN(0);
122                 RETURN(-1);
123         }
124         if (sbi->ll_gns_state == LL_GNS_STATE_FINISHED) {
125                 /* we lost a race; just return */
126                 up(&sbi->ll_gns_sem);
127                 if (d_mountpoint(dentry))
128                         RETURN(0);
129                 RETURN(-1);
130         }
131         LASSERT(sbi->ll_gns_state == LL_GNS_STATE_IDLE);
132         sbi->ll_gns_state = LL_GNS_STATE_MOUNTING;
133         up(&sbi->ll_gns_sem);
134
135         /* We need to build an absolute pathname to pass to mount */
136         pathpage = alloc_pages(GFP_HIGHUSER, 0);
137         if (pathpage == NULL)
138                 GOTO(cleanup, rc = -ENOMEM);
139         path = kmap(pathpage);
140         LASSERT(path != NULL);
141         stage = 1;
142         fill_page_with_path(dentry, mnt, &path);
143
144         dchild = lookup_one_len(".mntinfo", dentry, strlen(".mntinfo"));
145         if (dchild == NULL || IS_ERR(dchild)) {
146                 CERROR("Directory %*s is setuid, but without a mount object.\n",
147                        dentry->d_name.len, dentry->d_name.name);
148                 GOTO(cleanup, rc = -1);
149         }
150
151         mntget(mnt);
152
153         mntinfo_fd = dentry_open(dchild, mnt, 0);
154         if (IS_ERR(mntinfo_fd)) {
155                 dput(dchild);
156                 mntput(mnt);
157                 GOTO(cleanup, rc = PTR_ERR(mntinfo_fd));
158         }
159         stage = 2;
160
161         if (mntinfo_fd->f_dentry->d_inode->i_size > PAGE_SIZE) {
162                 CERROR("Mount object file is too big (%Ld)\n",
163                        mntinfo_fd->f_dentry->d_inode->i_size);
164                 GOTO(cleanup, rc = -1);
165         }
166         mapping = mntinfo_fd->f_dentry->d_inode->i_mapping;
167         datapage = read_cache_page(mapping, 0,
168                                    (filler_t *)mapping->a_ops->readpage,
169                                    mntinfo_fd);
170         if (IS_ERR(datapage))
171                 GOTO(cleanup, rc = PTR_ERR(datapage));
172
173         p = kmap(datapage);
174         LASSERT(p != NULL);
175         stage = 3;
176
177         p[PAGE_SIZE - 1] = '\0';
178
179         fput(mntinfo_fd);
180         mntinfo_fd = NULL;
181
182         argv[0] = "/usr/lib/lustre/gns-upcall.sh";
183         argv[1] = p;
184         argv[2] = path;
185         argv[3] = NULL;
186         rc = USERMODEHELPER(argv[0], argv, NULL);
187
188         if (rc != 0) {
189                 CERROR("GNS mount failed: %d\n", rc);
190                 GOTO(cleanup, rc);
191         }
192
193         wait_for_completion(&sbi->ll_gns_completion);
194         LASSERT(sbi->ll_gns_state == LL_GNS_STATE_FINISHED);
195
196         if (d_mountpoint(dentry)) {
197                 /* successful follow_down will mntput and dput */
198                 tmp_mnt = mntget(mnt);
199                 tmp_dentry = dget(dentry);
200                 rc = follow_down(&tmp_mnt, &tmp_dentry);
201                 if (rc == 1) {
202                         struct ll_sb_info *sbi = ll_s2sbi(dentry->d_sb);
203                         spin_lock(&dcache_lock);
204                         LASSERT(list_empty(&tmp_mnt->mnt_lustre_list));
205                         list_add_tail(&tmp_mnt->mnt_lustre_list,
206                                       &sbi->ll_mnt_list);
207                         spin_unlock(&dcache_lock);
208
209                         tmp_mnt->mnt_last_used = jiffies;
210
211                         mntput(tmp_mnt);
212                         dput(tmp_dentry);
213                         rc = 0;
214                 } else {
215                         mntput(mnt);
216                         dput(dentry);
217                 }
218         } else {
219                 CERROR("Woke up from GNS mount, but no mountpoint in place.\n");
220                 rc = -1;
221         }
222
223         EXIT;
224 cleanup:
225         switch (stage) {
226         case 3:
227                 kunmap(datapage);
228                 page_cache_release(datapage);
229         case 2:
230                 if (mntinfo_fd != NULL)
231                         fput(mntinfo_fd);
232         case 1:
233                 kunmap(pathpage);
234                 __free_pages(pathpage, 0);
235         case 0:
236                 down(&sbi->ll_gns_sem);
237                 sbi->ll_gns_state = LL_GNS_STATE_IDLE;
238                 up(&sbi->ll_gns_sem);
239         }
240         return rc;
241 }
242
243 /* If timeout == 1, only remove the mounts which are properly aged.
244  *
245  * If timeout == 0, we are unmounting -- remove them all. */
246 int ll_gns_umount_all(struct ll_sb_info *sbi, int timeout)
247 {
248         struct list_head kill_list = LIST_HEAD_INIT(kill_list);
249         struct page *page = NULL;
250         char *kpage = NULL, *path;
251         int rc;
252         ENTRY;
253
254         if (timeout == 0) {
255                 page = alloc_pages(GFP_HIGHUSER, 0);
256                 if (page == NULL)
257                         RETURN(-ENOMEM);
258                 kpage = kmap(page);
259                 LASSERT(kpage != NULL);
260         }
261
262         spin_lock(&dcache_lock);
263         list_splice_init(&sbi->ll_mnt_list, &kill_list);
264
265         /* Walk the list in reverse order, and put them on the front of the
266          * sbi list each iteration; this avoids list-ordering problems if we
267          * race with another gns-mounting thread */
268         while (!list_empty(&kill_list)) {
269                 struct vfsmount *mnt =
270                         list_entry(kill_list.prev, struct vfsmount,
271                                    mnt_lustre_list);
272                 mntget(mnt);
273                 list_del_init(&mnt->mnt_lustre_list);
274                 list_add(&mnt->mnt_lustre_list, &sbi->ll_mnt_list);
275
276                 if (timeout &&
277                     jiffies - mnt->mnt_last_used < GNS_MOUNT_TIMEOUT * HZ) {
278                         mntput(mnt);
279                         continue;
280                 }
281                 spin_unlock(&dcache_lock);
282
283                 CDEBUG(D_INODE, "unmounting mnt %p from sbi %p\n", mnt, sbi);
284
285                 rc = do_umount(mnt, 0);
286                 if (rc != 0 && page != NULL) {
287                         int rc2;
288                         path = kpage;
289                         rc2 = fill_page_with_path(mnt->mnt_root, mnt, &path);
290                         CERROR("GNS umount(%s): %d\n", rc2 == 0 ? path : "",
291                                rc);
292                 }
293                 mntput(mnt);
294                 spin_lock(&dcache_lock);
295         }
296         spin_unlock(&dcache_lock);
297
298         if (page != NULL) {
299                 kunmap(page);
300                 __free_pages(page, 0);
301         }
302         RETURN(0);
303 }
304
305 static struct list_head gns_sbi_list = LIST_HEAD_INIT(gns_sbi_list);
306 static struct semaphore gns_sem;
307 static struct ptlrpc_thread gns_thread;
308
309 void ll_gns_timer_callback(unsigned long data)
310 {
311         struct ll_sb_info *sbi = (void *)data;
312         ENTRY;
313
314         down(&gns_sem);
315         if (list_empty(&sbi->ll_gns_sbi_head))
316                 list_add(&sbi->ll_gns_sbi_head, &gns_sbi_list);
317         up(&gns_sem);
318         wake_up(&gns_thread.t_ctl_waitq);
319         mod_timer(&sbi->ll_gns_timer, jiffies + GNS_TICK * HZ);
320 }
321
322 static int gns_check_event(void)
323 {
324         int rc;
325         down(&gns_sem);
326         rc = !list_empty(&gns_sbi_list);
327         up(&gns_sem);
328         return rc;
329 }
330
331 static int ll_gns_thread_main(void *arg)
332 {
333         unsigned long flags;
334         ENTRY;
335
336         {
337                 char name[sizeof(current->comm)];
338                 snprintf(name, sizeof(name) - 1, "ll_gns");
339                 kportal_daemonize(name);
340         }
341         SIGNAL_MASK_LOCK(current, flags);
342         sigfillset(&current->blocked);
343         RECALC_SIGPENDING;
344         SIGNAL_MASK_UNLOCK(current, flags);
345
346         gns_thread.t_flags = SVC_RUNNING;
347         wake_up(&gns_thread.t_ctl_waitq);
348
349         while ((gns_thread.t_flags & SVC_STOPPING) == 0) {
350                 struct l_wait_info lwi = { 0 };
351
352                 l_wait_event(gns_thread.t_ctl_waitq, gns_check_event() ||
353                              gns_thread.t_flags & SVC_STOPPING, &lwi);
354
355                 down(&gns_sem);
356                 while (!list_empty(&gns_sbi_list)) {
357                         struct ll_sb_info *sbi =
358                                 list_entry(gns_sbi_list.prev, struct ll_sb_info,
359                                            ll_gns_sbi_head);
360                         list_del_init(&sbi->ll_gns_sbi_head);
361                         ll_gns_umount_all(sbi, 1);
362                 }
363                 up(&gns_sem);
364         }
365
366         gns_thread.t_flags = SVC_STOPPED;
367         wake_up(&gns_thread.t_ctl_waitq);
368
369         RETURN(0);
370 }
371
372 void ll_gns_add_timer(struct ll_sb_info *sbi)
373 {
374         mod_timer(&sbi->ll_gns_timer, jiffies + GNS_TICK * HZ);
375 }
376
377 void ll_gns_del_timer(struct ll_sb_info *sbi)
378 {
379         del_timer(&sbi->ll_gns_timer);
380 }
381
382 int ll_gns_start_thread(void)
383 {
384         struct l_wait_info lwi = { 0 };
385         int rc;
386
387         LASSERT(gns_thread.t_flags == 0);
388         sema_init(&gns_sem, 1);
389
390         init_waitqueue_head(&gns_thread.t_ctl_waitq);
391         rc = kernel_thread(ll_gns_thread_main, NULL, CLONE_VM | CLONE_FILES);
392         if (rc < 0) {
393                 CERROR("cannot start thread: %d\n", rc);
394                 return rc;
395         }
396         l_wait_event(gns_thread.t_ctl_waitq, gns_thread.t_flags & SVC_RUNNING,
397                      &lwi);
398         return 0;
399 }
400
401 void ll_gns_stop_thread(void)
402 {
403         struct l_wait_info lwi = { 0 };
404
405         down(&gns_sem);
406         gns_thread.t_flags = SVC_STOPPING;
407         up(&gns_sem);
408
409         wake_up(&gns_thread.t_ctl_waitq);
410         l_wait_event(gns_thread.t_ctl_waitq, gns_thread.t_flags & SVC_STOPPED,
411                      &lwi);
412         gns_thread.t_flags = 0;
413 }