Whamcloud - gitweb
- unland b_fid to HEAD
[fs/lustre-release.git] / lustre / llite / llite_gns.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Copyright (C) 2004 Cluster File Systems, Inc.
5  *   Author: Phil Schwan <phil@clusterfs.com>
6  *
7  *   This file is part of Lustre, http://www.lustre.org.
8  *
9  *   Lustre is free software; you can redistribute it and/or
10  *   modify it under the terms of version 2 of the GNU General Public
11  *   License as published by the Free Software Foundation.
12  *
13  *   Lustre is distributed in the hope that it will be useful,
14  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
15  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  *   GNU General Public License for more details.
17  *
18  *   You should have received a copy of the GNU General Public License
19  *   along with Lustre; if not, write to the Free Software
20  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21  */
22
23 #define DEBUG_SUBSYSTEM S_LLITE
24
25 #include <linux/fs.h>
26 #include <linux/version.h>
27 #include <asm/uaccess.h>
28 #include <linux/file.h>
29 #include <linux/kmod.h>
30
31 #include <linux/lustre_lite.h>
32 #include "llite_internal.h"
33
34 /* After roughly how long should we remove an inactive mount? */
35 #define GNS_MOUNT_TIMEOUT 120
36 /* How often should the GNS timer look for mounts to cleanup? */
37 #define GNS_TICK 30
38
39 int ll_finish_gns(struct ll_sb_info *sbi)
40 {
41         down(&sbi->ll_gns_sem);
42         if (sbi->ll_gns_state != LL_GNS_STATE_MOUNTING) {
43                 up(&sbi->ll_gns_sem);
44                 CERROR("FINISH_GNS called on mount which was not expecting "
45                        "completion.\n");
46                 return -EINVAL;
47         }
48
49         sbi->ll_gns_state = LL_GNS_STATE_FINISHED;
50         up(&sbi->ll_gns_sem);
51         complete(&sbi->ll_gns_completion);
52
53         return 0;
54 }
55
56 /* Pass exactly one (1) page in; when this function returns "page" will point
57  * somewhere into the middle of the page. */
58 int fill_page_with_path(struct dentry *dentry, struct vfsmount *mnt,
59                         char **pagep)
60 {
61         char *path = *pagep, *p;
62
63         path[PAGE_SIZE - 1] = '\0';
64         p = path + PAGE_SIZE - 1;
65
66         while (1) {
67                 if (p - path < dentry->d_name.len + 1)
68                         return -ENAMETOOLONG;
69                 if (dentry->d_name.name[0] != '/') {
70                         p -= dentry->d_name.len;
71                         memcpy(p, dentry->d_name.name, dentry->d_name.len);
72                         p--;
73                         *p = '/';
74                 }
75
76                 dentry = dentry->d_parent;
77                 if (dentry->d_parent == dentry) {
78                         if (mnt->mnt_parent == mnt)
79                                 break; /* finished walking up */
80                         mnt = mntget(mnt);
81                         dget(dentry);
82                         while (dentry->d_parent == dentry &&
83                                follow_up(&mnt, &dentry))
84                                 ;
85                         mntput(mnt);
86                         dput(dentry);
87                 }
88         }
89         *pagep = p;
90         return 0;
91 }
92
93 int ll_dir_process_mount_object(struct dentry *dentry, struct vfsmount *mnt)
94 {
95         struct ll_sb_info *sbi;
96         struct file *mntinfo_fd = NULL;
97         struct page *datapage = NULL, *pathpage;
98         struct address_space *mapping;
99         struct ll_dentry_data *lld = dentry->d_fsdata;
100         struct dentry *dchild, *tmp_dentry;
101         struct vfsmount *tmp_mnt;
102         char *p, *path, *argv[4];
103         int stage = 0, rc = 0;
104         ENTRY;
105
106         if (mnt == NULL) {
107                 CERROR("suid directory found, but no vfsmount available.\n");
108                 RETURN(-1);
109         }
110
111         LASSERT(dentry->d_inode != NULL);
112         LASSERT(S_ISDIR(dentry->d_inode->i_mode));
113         LASSERT(lld != NULL);
114         sbi = ll_i2sbi(dentry->d_inode);
115         LASSERT(sbi != NULL);
116
117         down(&sbi->ll_gns_sem);
118         if (sbi->ll_gns_state == LL_GNS_STATE_MOUNTING) {
119                 up(&sbi->ll_gns_sem);
120                 wait_for_completion(&sbi->ll_gns_completion);
121                 if (d_mountpoint(dentry))
122                         RETURN(0);
123                 RETURN(-1);
124         }
125         if (sbi->ll_gns_state == LL_GNS_STATE_FINISHED) {
126                 /* we lost a race; just return */
127                 up(&sbi->ll_gns_sem);
128                 if (d_mountpoint(dentry))
129                         RETURN(0);
130                 RETURN(-1);
131         }
132         LASSERT(sbi->ll_gns_state == LL_GNS_STATE_IDLE);
133         sbi->ll_gns_state = LL_GNS_STATE_MOUNTING;
134         up(&sbi->ll_gns_sem);
135
136         /* We need to build an absolute pathname to pass to mount */
137         pathpage = alloc_pages(GFP_HIGHUSER, 0);
138         if (pathpage == NULL)
139                 GOTO(cleanup, rc = -ENOMEM);
140         path = kmap(pathpage);
141         LASSERT(path != NULL);
142         stage = 1;
143         fill_page_with_path(dentry, mnt, &path);
144
145         dchild = lookup_one_len(".mntinfo", dentry, strlen(".mntinfo"));
146         if (dchild == NULL || IS_ERR(dchild)) {
147                 CERROR("Directory %*s is setuid, but without a mount object.\n",
148                        dentry->d_name.len, dentry->d_name.name);
149                 GOTO(cleanup, rc = -1);
150         }
151
152         mntget(mnt);
153
154         mntinfo_fd = dentry_open(dchild, mnt, 0);
155         if (IS_ERR(mntinfo_fd)) {
156                 dput(dchild);
157                 mntput(mnt);
158                 GOTO(cleanup, rc = PTR_ERR(mntinfo_fd));
159         }
160         stage = 2;
161
162         if (mntinfo_fd->f_dentry->d_inode->i_size > PAGE_SIZE) {
163                 CERROR("Mount object file is too big (%Ld)\n",
164                        mntinfo_fd->f_dentry->d_inode->i_size);
165                 GOTO(cleanup, rc = -1);
166         }
167         mapping = mntinfo_fd->f_dentry->d_inode->i_mapping;
168         datapage = read_cache_page(mapping, 0,
169                                    (filler_t *)mapping->a_ops->readpage,
170                                    mntinfo_fd);
171         if (IS_ERR(datapage))
172                 GOTO(cleanup, rc = PTR_ERR(datapage));
173
174         p = kmap(datapage);
175         LASSERT(p != NULL);
176         stage = 3;
177
178         p[PAGE_SIZE - 1] = '\0';
179
180         fput(mntinfo_fd);
181         mntinfo_fd = NULL;
182
183         argv[0] = "/usr/lib/lustre/gns-upcall.sh";
184         argv[1] = p;
185         argv[2] = path;
186         argv[3] = NULL;
187         rc = USERMODEHELPER(argv[0], argv, NULL);
188
189         if (rc != 0) {
190                 CERROR("GNS mount failed: %d\n", rc);
191                 GOTO(cleanup, rc);
192         }
193
194         wait_for_completion(&sbi->ll_gns_completion);
195         LASSERT(sbi->ll_gns_state == LL_GNS_STATE_FINISHED);
196
197         if (d_mountpoint(dentry)) {
198                 /* successful follow_down will mntput and dput */
199                 tmp_mnt = mntget(mnt);
200                 tmp_dentry = dget(dentry);
201                 rc = follow_down(&tmp_mnt, &tmp_dentry);
202                 if (rc == 1) {
203                         struct ll_sb_info *sbi = ll_s2sbi(dentry->d_sb);
204                         spin_lock(&dcache_lock);
205                         LASSERT(list_empty(&tmp_mnt->mnt_lustre_list));
206                         list_add_tail(&tmp_mnt->mnt_lustre_list,
207                                       &sbi->ll_mnt_list);
208                         spin_unlock(&dcache_lock);
209
210                         tmp_mnt->mnt_last_used = jiffies;
211
212                         mntput(tmp_mnt);
213                         dput(tmp_dentry);
214                         rc = 0;
215                 } else {
216                         mntput(mnt);
217                         dput(dentry);
218                 }
219         } else {
220                 CERROR("Woke up from GNS mount, but no mountpoint in place.\n");
221                 rc = -1;
222         }
223
224         EXIT;
225 cleanup:
226         switch (stage) {
227         case 3:
228                 kunmap(datapage);
229                 page_cache_release(datapage);
230         case 2:
231                 if (mntinfo_fd != NULL)
232                         fput(mntinfo_fd);
233         case 1:
234                 kunmap(pathpage);
235                 __free_pages(pathpage, 0);
236         case 0:
237                 down(&sbi->ll_gns_sem);
238                 sbi->ll_gns_state = LL_GNS_STATE_IDLE;
239                 up(&sbi->ll_gns_sem);
240         }
241         return rc;
242 }
243
244 /* If timeout == 1, only remove the mounts which are properly aged.
245  *
246  * If timeout == 0, we are unmounting -- remove them all. */
247 int ll_gns_umount_all(struct ll_sb_info *sbi, int timeout)
248 {
249         struct list_head kill_list = LIST_HEAD_INIT(kill_list);
250         struct page *page = NULL;
251         char *kpage = NULL, *path;
252         int rc;
253         ENTRY;
254
255         if (timeout == 0) {
256                 page = alloc_pages(GFP_HIGHUSER, 0);
257                 if (page == NULL)
258                         RETURN(-ENOMEM);
259                 kpage = kmap(page);
260                 LASSERT(kpage != NULL);
261         }
262
263         spin_lock(&dcache_lock);
264         list_splice_init(&sbi->ll_mnt_list, &kill_list);
265
266         /* Walk the list in reverse order, and put them on the front of the
267          * sbi list each iteration; this avoids list-ordering problems if we
268          * race with another gns-mounting thread */
269         while (!list_empty(&kill_list)) {
270                 struct vfsmount *mnt =
271                         list_entry(kill_list.prev, struct vfsmount,
272                                    mnt_lustre_list);
273                 mntget(mnt);
274                 list_del_init(&mnt->mnt_lustre_list);
275                 list_add(&mnt->mnt_lustre_list, &sbi->ll_mnt_list);
276
277                 if (timeout &&
278                     jiffies - mnt->mnt_last_used < GNS_MOUNT_TIMEOUT * HZ) {
279                         mntput(mnt);
280                         continue;
281                 }
282                 spin_unlock(&dcache_lock);
283
284                 CDEBUG(D_INODE, "unmounting mnt %p from sbi %p\n", mnt, sbi);
285
286                 rc = do_umount(mnt, 0);
287                 if (rc != 0 && page != NULL) {
288                         int rc2;
289                         path = kpage;
290                         rc2 = fill_page_with_path(mnt->mnt_root, mnt, &path);
291                         CERROR("GNS umount(%s): %d\n", rc2 == 0 ? path : "",
292                                rc);
293                 }
294                 mntput(mnt);
295                 spin_lock(&dcache_lock);
296         }
297         spin_unlock(&dcache_lock);
298
299         if (page != NULL) {
300                 kunmap(page);
301                 __free_pages(page, 0);
302         }
303         RETURN(0);
304 }
305
306 static struct list_head gns_sbi_list = LIST_HEAD_INIT(gns_sbi_list);
307 static spinlock_t gns_lock = SPIN_LOCK_UNLOCKED;
308 static struct ptlrpc_thread gns_thread;
309
310 void ll_gns_timer_callback(unsigned long data)
311 {
312         struct ll_sb_info *sbi = (void *)data;
313         ENTRY;
314
315         spin_lock(&gns_lock);
316         if (list_empty(&sbi->ll_gns_sbi_head))
317                 list_add(&sbi->ll_gns_sbi_head, &gns_sbi_list);
318         spin_unlock(&gns_lock);
319         wake_up(&gns_thread.t_ctl_waitq);
320         mod_timer(&sbi->ll_gns_timer, jiffies + GNS_TICK * HZ);
321 }
322
323 static int gns_check_event(void)
324 {
325         int rc;
326         spin_lock(&gns_lock);
327         rc = !list_empty(&gns_sbi_list);
328         spin_unlock(&gns_lock);
329         return rc;
330 }
331
332 static int inline gns_check_stopping(void)
333 {
334         mb();
335         return (gns_thread.t_flags & SVC_STOPPING) ? 1 : 0;
336 }
337
338 static int ll_gns_thread_main(void *arg)
339 {
340         unsigned long flags;
341         ENTRY;
342
343         {
344                 char name[sizeof(current->comm)];
345                 snprintf(name, sizeof(name) - 1, "ll_gns");
346                 kportal_daemonize(name);
347         }
348         SIGNAL_MASK_LOCK(current, flags);
349         sigfillset(&current->blocked);
350         RECALC_SIGPENDING;
351         SIGNAL_MASK_UNLOCK(current, flags);
352
353         gns_thread.t_flags = SVC_RUNNING;
354         wake_up(&gns_thread.t_ctl_waitq);
355
356         while (!gns_check_stopping()) {
357                 struct l_wait_info lwi = { 0 };
358
359                 l_wait_event(gns_thread.t_ctl_waitq, gns_check_event() ||
360                              gns_check_stopping(), &lwi);
361
362                 spin_lock(&gns_lock);
363                 while (!list_empty(&gns_sbi_list)) {
364                         struct ll_sb_info *sbi =
365                                 list_entry(gns_sbi_list.prev, struct ll_sb_info,
366                                            ll_gns_sbi_head);
367                         list_del_init(&sbi->ll_gns_sbi_head);
368                         spin_unlock(&gns_lock);
369                         ll_gns_umount_all(sbi, 1);
370                         spin_lock(&gns_lock);
371                 }
372                 spin_unlock(&gns_lock);
373         }
374
375         gns_thread.t_flags = SVC_STOPPED;
376         wake_up(&gns_thread.t_ctl_waitq);
377
378         RETURN(0);
379 }
380
381 void ll_gns_add_timer(struct ll_sb_info *sbi)
382 {
383         mod_timer(&sbi->ll_gns_timer, jiffies + GNS_TICK * HZ);
384 }
385
386 void ll_gns_del_timer(struct ll_sb_info *sbi)
387 {
388         del_timer(&sbi->ll_gns_timer);
389 }
390
391 int ll_gns_start_thread(void)
392 {
393         struct l_wait_info lwi = { 0 };
394         int rc;
395
396         LASSERT(gns_thread.t_flags == 0);
397
398         init_waitqueue_head(&gns_thread.t_ctl_waitq);
399         rc = kernel_thread(ll_gns_thread_main, NULL, CLONE_VM | CLONE_FILES);
400         if (rc < 0) {
401                 CERROR("cannot start thread: %d\n", rc);
402                 return rc;
403         }
404         l_wait_event(gns_thread.t_ctl_waitq, gns_thread.t_flags & SVC_RUNNING,
405                      &lwi);
406         return 0;
407 }
408
409 void ll_gns_stop_thread(void)
410 {
411         struct l_wait_info lwi = { 0 };
412
413         gns_thread.t_flags = SVC_STOPPING;
414
415         wake_up(&gns_thread.t_ctl_waitq);
416         l_wait_event(gns_thread.t_ctl_waitq, gns_thread.t_flags & SVC_STOPPED,
417                      &lwi);
418         gns_thread.t_flags = 0;
419 }