Whamcloud - gitweb
b=3643
[fs/lustre-release.git] / lustre / llite / llite_gns.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Copyright (C) 2004 Cluster File Systems, Inc.
5  *   Author: Phil Schwan <phil@clusterfs.com>
6  *
7  *   This file is part of Lustre, http://www.lustre.org.
8  *
9  *   Lustre is free software; you can redistribute it and/or
10  *   modify it under the terms of version 2 of the GNU General Public
11  *   License as published by the Free Software Foundation.
12  *
13  *   Lustre is distributed in the hope that it will be useful,
14  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
15  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  *   GNU General Public License for more details.
17  *
18  *   You should have received a copy of the GNU General Public License
19  *   along with Lustre; if not, write to the Free Software
20  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21  */
22
23 #define DEBUG_SUBSYSTEM S_LLITE
24
25 #include <linux/fs.h>
26 #include <linux/version.h>
27 #include <asm/uaccess.h>
28 #include <linux/file.h>
29 #include <linux/kmod.h>
30
31 #include <linux/lustre_lite.h>
32 #include "llite_internal.h"
33
34 /* After roughly how long should we remove an inactive mount? */
35 #define GNS_MOUNT_TIMEOUT 120
36 /* How often should the GNS timer look for mounts to cleanup? */
37 #define GNS_TICK 30
38
39 int ll_finish_gns(struct ll_sb_info *sbi)
40 {
41         down(&sbi->ll_gns_sem);
42         if (sbi->ll_gns_state != LL_GNS_STATE_MOUNTING) {
43                 up(&sbi->ll_gns_sem);
44                 CERROR("FINISH_GNS called on mount which was not expecting "
45                        "completion.\n");
46                 return -EINVAL;
47         }
48
49         sbi->ll_gns_state = LL_GNS_STATE_FINISHED;
50         up(&sbi->ll_gns_sem);
51         complete(&sbi->ll_gns_completion);
52
53         return 0;
54 }
55
56 /* Pass exactly one (1) page in; when this function returns "page" will point
57  * somewhere into the middle of the page. */
58 int fill_page_with_path(struct dentry *dentry, struct vfsmount *mnt,
59                         char **pagep)
60 {
61         char *path = *pagep, *p;
62
63         path[PAGE_SIZE - 1] = '\0';
64         p = path + PAGE_SIZE - 1;
65
66         while (1) {
67                 if (p - path < dentry->d_name.len + 1)
68                         return -ENAMETOOLONG;
69                 if (dentry->d_name.name[0] != '/') {
70                         p -= dentry->d_name.len;
71                         memcpy(p, dentry->d_name.name, dentry->d_name.len);
72                         p--;
73                         *p = '/';
74                 }
75
76                 dentry = dentry->d_parent;
77                 if (dentry->d_parent == dentry) {
78                         if (mnt->mnt_parent == mnt)
79                                 break; /* finished walking up */
80                         mnt = mntget(mnt);
81                         dget(dentry);
82                         while (dentry->d_parent == dentry &&
83                                follow_up(&mnt, &dentry))
84                                 ;
85                         mntput(mnt);
86                         dput(dentry);
87                 }
88         }
89         *pagep = p;
90         return 0;
91 }
92
93 int ll_dir_process_mount_object(struct dentry *dentry, struct vfsmount *mnt)
94 {
95         struct ll_sb_info *sbi;
96         struct file *mntinfo_fd = NULL;
97         struct page *datapage = NULL, *pathpage;
98         struct address_space *mapping;
99         struct ll_dentry_data *lld = dentry->d_fsdata;
100         struct dentry *dchild, *tmp_dentry;
101         struct vfsmount *tmp_mnt;
102         char *p, *path, *argv[4];
103         int stage = 0, rc = 0;
104         ENTRY;
105
106         if (mnt == NULL) {
107                 CERROR("suid directory found, but no vfsmount available.\n");
108                 RETURN(-1);
109         }
110
111         LASSERT(dentry->d_inode != NULL);
112         LASSERT(S_ISDIR(dentry->d_inode->i_mode));
113         LASSERT(lld != NULL);
114         sbi = ll_i2sbi(dentry->d_inode);
115         LASSERT(sbi != NULL);
116
117         down(&sbi->ll_gns_sem);
118         if (sbi->ll_gns_state == LL_GNS_STATE_MOUNTING) {
119                 up(&sbi->ll_gns_sem);
120                 wait_for_completion(&sbi->ll_gns_completion);
121                 if (d_mountpoint(dentry))
122                         RETURN(0);
123                 RETURN(-1);
124         }
125         if (sbi->ll_gns_state == LL_GNS_STATE_FINISHED) {
126                 /* we lost a race; just return */
127                 up(&sbi->ll_gns_sem);
128                 if (d_mountpoint(dentry))
129                         RETURN(0);
130                 RETURN(-1);
131         }
132         LASSERT(sbi->ll_gns_state == LL_GNS_STATE_IDLE);
133         sbi->ll_gns_state = LL_GNS_STATE_MOUNTING;
134         up(&sbi->ll_gns_sem);
135
136         /* We need to build an absolute pathname to pass to mount */
137         pathpage = alloc_pages(GFP_HIGHUSER, 0);
138         if (pathpage == NULL)
139                 GOTO(cleanup, rc = -ENOMEM);
140         path = kmap(pathpage);
141         LASSERT(path != NULL);
142         stage = 1;
143         fill_page_with_path(dentry, mnt, &path);
144
145         dchild = lookup_one_len(".mntinfo", dentry, strlen(".mntinfo"));
146         if (dchild == NULL || IS_ERR(dchild)) {
147                 CERROR("Directory %*s is setuid, but without a mount object.\n",
148                        dentry->d_name.len, dentry->d_name.name);
149                 GOTO(cleanup, rc = -1);
150         }
151
152         mntget(mnt);
153
154         mntinfo_fd = dentry_open(dchild, mnt, 0);
155         if (IS_ERR(mntinfo_fd)) {
156                 dput(dchild);
157                 mntput(mnt);
158                 GOTO(cleanup, rc = PTR_ERR(mntinfo_fd));
159         }
160         stage = 2;
161
162         if (mntinfo_fd->f_dentry->d_inode->i_size > PAGE_SIZE) {
163                 CERROR("Mount object file is too big (%Ld)\n",
164                        mntinfo_fd->f_dentry->d_inode->i_size);
165                 GOTO(cleanup, rc = -1);
166         }
167         mapping = mntinfo_fd->f_dentry->d_inode->i_mapping;
168         datapage = read_cache_page(mapping, 0,
169                                    (filler_t *)mapping->a_ops->readpage,
170                                    mntinfo_fd);
171         if (IS_ERR(datapage))
172                 GOTO(cleanup, rc = PTR_ERR(datapage));
173
174         p = kmap(datapage);
175         LASSERT(p != NULL);
176         stage = 3;
177
178         p[PAGE_SIZE - 1] = '\0';
179
180         fput(mntinfo_fd);
181         mntinfo_fd = NULL;
182
183         argv[0] = "/usr/lib/lustre/gns-upcall.sh";
184         argv[1] = p;
185         argv[2] = path;
186         argv[3] = NULL;
187         rc = USERMODEHELPER(argv[0], argv, NULL);
188
189         if (rc != 0) {
190                 CERROR("GNS mount failed: %d\n", rc);
191                 GOTO(cleanup, rc);
192         }
193
194         wait_for_completion(&sbi->ll_gns_completion);
195         LASSERT(sbi->ll_gns_state == LL_GNS_STATE_FINISHED);
196
197         if (d_mountpoint(dentry)) {
198                 /* successful follow_down will mntput and dput */
199                 tmp_mnt = mntget(mnt);
200                 tmp_dentry = dget(dentry);
201                 rc = follow_down(&tmp_mnt, &tmp_dentry);
202                 if (rc == 1) {
203                         struct ll_sb_info *sbi = ll_s2sbi(dentry->d_sb);
204                         spin_lock(&dcache_lock);
205                         LASSERT(list_empty(&tmp_mnt->mnt_lustre_list));
206                         list_add_tail(&tmp_mnt->mnt_lustre_list,
207                                       &sbi->ll_mnt_list);
208                         spin_unlock(&dcache_lock);
209
210                         tmp_mnt->mnt_last_used = jiffies;
211
212                         mntput(tmp_mnt);
213                         dput(tmp_dentry);
214                         rc = 0;
215                 } else {
216                         mntput(mnt);
217                         dput(dentry);
218                 }
219         } else {
220                 CERROR("Woke up from GNS mount, but no mountpoint in place.\n");
221                 rc = -1;
222         }
223
224         EXIT;
225 cleanup:
226         switch (stage) {
227         case 3:
228                 kunmap(datapage);
229                 page_cache_release(datapage);
230         case 2:
231                 if (mntinfo_fd != NULL)
232                         fput(mntinfo_fd);
233         case 1:
234                 kunmap(pathpage);
235                 __free_pages(pathpage, 0);
236         case 0:
237                 down(&sbi->ll_gns_sem);
238                 sbi->ll_gns_state = LL_GNS_STATE_IDLE;
239                 up(&sbi->ll_gns_sem);
240         }
241         return rc;
242 }
243
244 /* If timeout == 1, only remove the mounts which are properly aged.
245  *
246  * If timeout == 0, we are unmounting -- remove them all. */
247 int ll_gns_umount_all(struct ll_sb_info *sbi, int timeout)
248 {
249         struct list_head kill_list = LIST_HEAD_INIT(kill_list);
250         struct page *page = NULL;
251         char *kpage = NULL, *path;
252         int rc;
253         ENTRY;
254
255         if (timeout == 0) {
256                 page = alloc_pages(GFP_HIGHUSER, 0);
257                 if (page == NULL)
258                         RETURN(-ENOMEM);
259                 kpage = kmap(page);
260                 LASSERT(kpage != NULL);
261         }
262
263         spin_lock(&dcache_lock);
264         list_splice_init(&sbi->ll_mnt_list, &kill_list);
265
266         /* Walk the list in reverse order, and put them on the front of the
267          * sbi list each iteration; this avoids list-ordering problems if we
268          * race with another gns-mounting thread */
269         while (!list_empty(&kill_list)) {
270                 struct vfsmount *mnt =
271                         list_entry(kill_list.prev, struct vfsmount,
272                                    mnt_lustre_list);
273                 mntget(mnt);
274                 list_del_init(&mnt->mnt_lustre_list);
275                 list_add(&mnt->mnt_lustre_list, &sbi->ll_mnt_list);
276
277                 if (timeout &&
278                     jiffies - mnt->mnt_last_used < GNS_MOUNT_TIMEOUT * HZ) {
279                         mntput(mnt);
280                         continue;
281                 }
282                 spin_unlock(&dcache_lock);
283
284                 CDEBUG(D_INODE, "unmounting mnt %p from sbi %p\n", mnt, sbi);
285
286                 rc = do_umount(mnt, 0);
287                 if (rc != 0 && page != NULL) {
288                         int rc2;
289                         path = kpage;
290                         rc2 = fill_page_with_path(mnt->mnt_root, mnt, &path);
291                         CERROR("GNS umount(%s): %d\n", rc2 == 0 ? path : "",
292                                rc);
293                 }
294                 mntput(mnt);
295                 spin_lock(&dcache_lock);
296         }
297         spin_unlock(&dcache_lock);
298
299         if (page != NULL) {
300                 kunmap(page);
301                 __free_pages(page, 0);
302         }
303         RETURN(0);
304 }
305
306 static struct list_head gns_sbi_list = LIST_HEAD_INIT(gns_sbi_list);
307 static struct semaphore gns_sem;
308 static struct ptlrpc_thread gns_thread;
309
310 void ll_gns_timer_callback(unsigned long data)
311 {
312         struct ll_sb_info *sbi = (void *)data;
313         ENTRY;
314
315         down(&gns_sem);
316         if (list_empty(&sbi->ll_gns_sbi_head))
317                 list_add(&sbi->ll_gns_sbi_head, &gns_sbi_list);
318         up(&gns_sem);
319         wake_up(&gns_thread.t_ctl_waitq);
320         mod_timer(&sbi->ll_gns_timer, jiffies + GNS_TICK * HZ);
321 }
322
323 static int gns_check_event(void)
324 {
325         int rc;
326         down(&gns_sem);
327         rc = !list_empty(&gns_sbi_list);
328         up(&gns_sem);
329         return rc;
330 }
331
332 static int ll_gns_thread_main(void *arg)
333 {
334         unsigned long flags;
335         ENTRY;
336
337         {
338                 char name[sizeof(current->comm)];
339                 snprintf(name, sizeof(name) - 1, "ll_gns");
340                 kportal_daemonize(name);
341         }
342         SIGNAL_MASK_LOCK(current, flags);
343         sigfillset(&current->blocked);
344         RECALC_SIGPENDING;
345         SIGNAL_MASK_UNLOCK(current, flags);
346
347         gns_thread.t_flags = SVC_RUNNING;
348         wake_up(&gns_thread.t_ctl_waitq);
349
350         while ((gns_thread.t_flags & SVC_STOPPING) == 0) {
351                 struct l_wait_info lwi = { 0 };
352
353                 l_wait_event(gns_thread.t_ctl_waitq, gns_check_event() ||
354                              gns_thread.t_flags & SVC_STOPPING, &lwi);
355
356                 down(&gns_sem);
357                 while (!list_empty(&gns_sbi_list)) {
358                         struct ll_sb_info *sbi =
359                                 list_entry(gns_sbi_list.prev, struct ll_sb_info,
360                                            ll_gns_sbi_head);
361                         list_del_init(&sbi->ll_gns_sbi_head);
362                         ll_gns_umount_all(sbi, 1);
363                 }
364                 up(&gns_sem);
365         }
366
367         gns_thread.t_flags = SVC_STOPPED;
368         wake_up(&gns_thread.t_ctl_waitq);
369
370         RETURN(0);
371 }
372
373 void ll_gns_add_timer(struct ll_sb_info *sbi)
374 {
375         mod_timer(&sbi->ll_gns_timer, jiffies + GNS_TICK * HZ);
376 }
377
378 void ll_gns_del_timer(struct ll_sb_info *sbi)
379 {
380         del_timer(&sbi->ll_gns_timer);
381 }
382
383 int ll_gns_start_thread(void)
384 {
385         struct l_wait_info lwi = { 0 };
386         int rc;
387
388         LASSERT(gns_thread.t_flags == 0);
389         sema_init(&gns_sem, 1);
390
391         init_waitqueue_head(&gns_thread.t_ctl_waitq);
392         rc = kernel_thread(ll_gns_thread_main, NULL, CLONE_VM | CLONE_FILES);
393         if (rc < 0) {
394                 CERROR("cannot start thread: %d\n", rc);
395                 return rc;
396         }
397         l_wait_event(gns_thread.t_ctl_waitq, gns_thread.t_flags & SVC_RUNNING,
398                      &lwi);
399         return 0;
400 }
401
402 void ll_gns_stop_thread(void)
403 {
404         struct l_wait_info lwi = { 0 };
405
406         down(&gns_sem);
407         gns_thread.t_flags = SVC_STOPPING;
408         up(&gns_sem);
409
410         wake_up(&gns_thread.t_ctl_waitq);
411         l_wait_event(gns_thread.t_ctl_waitq, gns_thread.t_flags & SVC_STOPPED,
412                      &lwi);
413         gns_thread.t_flags = 0;
414 }