Whamcloud - gitweb
- check for GNS mount even if upcall returned to error (to not miss them)
[fs/lustre-release.git] / lustre / llite / llite_gns.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Copyright (C) 2004, 2005 Cluster File Systems, Inc.
5  *
6  * Author: Phil Schwan <phil@clusterfs.com>
7  * Author: Oleg Drokin <green@clusterfs.com>
8  * Author: Yury Umanets <yury@clusterfs.com>
9  * Review: Nikita Danilov <nikita@clusterfs.com>
10  *
11  *   This file is part of Lustre, http://www.lustre.org.
12  *
13  *   Lustre is free software; you can redistribute it and/or
14  *   modify it under the terms of version 2 of the GNU General Public
15  *   License as published by the Free Software Foundation.
16  *
17  *   Lustre is distributed in the hope that it will be useful,
18  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
19  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20  *   GNU General Public License for more details.
21  *
22  *   You should have received a copy of the GNU General Public License
23  *   along with Lustre; if not, write to the Free Software
24  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25  */
26
27 #define DEBUG_SUBSYSTEM S_LLITE
28
29 #include <linux/fs.h>
30 #include <linux/version.h>
31 #include <asm/uaccess.h>
32 #include <linux/file.h>
33 #include <linux/kmod.h>
34
35 #include <linux/lustre_lite.h>
36 #include "llite_internal.h"
37
38 static struct list_head gns_sbi_list = LIST_HEAD_INIT(gns_sbi_list);
39 static spinlock_t gns_lock = SPIN_LOCK_UNLOCKED;
40 static struct ptlrpc_thread gns_thread;
41 static struct ll_gns_ctl gns_ctl;
42
43 /*
44  * waits until passed dentry gets mountpoint or timeout and attempts are
45  * exhausted. Returns 1 if dentry became mountpoint and 0 otherwise.
46  */
47 static int
48 ll_gns_wait_for_mount(struct dentry *dentry,
49                       int timeout, int tries)
50 {
51         struct l_wait_info lwi;
52         struct ll_sb_info *sbi;
53         ENTRY;
54
55         LASSERT(dentry != NULL);
56         LASSERT(!IS_ERR(dentry));
57         sbi = ll_s2sbi(dentry->d_sb);
58         
59         lwi = LWI_TIMEOUT(timeout * HZ, NULL, NULL);
60         for (; !d_mountpoint(dentry) && tries > 0; tries--)
61                 l_wait_event(sbi->ll_gns_waitq, d_mountpoint(dentry), &lwi);
62
63         if (d_mountpoint(dentry)) {
64                 spin_lock(&sbi->ll_gns_lock);
65                 sbi->ll_gns_state = LL_GNS_FINISHED;
66                 spin_unlock(&sbi->ll_gns_lock);
67                 RETURN(0);
68         }
69         RETURN(-ETIME);
70 }
71
72 /* 
73  * sending a signal known to be ignored to cause restarting syscall if GNS mount
74  * function returns -ERESTARTSYS.
75  */
76 static void
77 ll_gns_send_signal(void)
78 {
79         struct task_struct *task = current;
80         int signal = SIGCONT;
81
82         read_lock(&tasklist_lock);
83         spin_lock_irq(&task->sighand->siglock);
84         sigaddset(&task->pending.signal, signal);
85         spin_unlock_irq(&task->sighand->siglock);
86         read_unlock(&tasklist_lock);
87         set_tsk_thread_flag(task, TIF_SIGPENDING);
88 }
89
90 /*
91  * tries to mount the mount object under passed @dentry. In the case of success
92  * @dentry will become mount point and 0 will be returned. Error code will be
93  * returned otherwise.
94  */
95 int
96 ll_gns_mount_object(struct dentry *dentry, struct vfsmount *mnt)
97 {
98         char *path, *pathpage, *datapage, *argv[4];
99         struct file *mntinfo_fd = NULL;
100         int cleanup_phase = 0, rc = 0;
101         struct ll_sb_info *sbi;
102         struct dentry *dchild;
103         ENTRY;
104
105         LASSERT(dentry->d_inode != NULL);
106
107         if (!S_ISDIR(dentry->d_inode->i_mode))
108                 RETURN(-EINVAL);
109
110         sbi = ll_i2sbi(dentry->d_inode);
111         LASSERT(sbi != NULL);
112
113         spin_lock(&sbi->ll_gns_lock);
114
115         if (sbi->ll_gns_state == LL_GNS_DISABLED) {
116                 spin_unlock(&sbi->ll_gns_lock);
117                 RETURN(-EINVAL);
118         }
119         
120         if (mnt == NULL) {
121                 CERROR("suid directory found, but no "
122                        "vfsmount available.\n");
123                 RETURN(-EINVAL);
124         }
125
126         /* 
127          * another thead is in progress or just finished mounting the
128          * dentry. Handling that.
129          */
130         if (sbi->ll_gns_state == LL_GNS_MOUNTING ||
131             sbi->ll_gns_state == LL_GNS_FINISHED) {
132                 /* 
133                  * another thread is trying to mount GNS dentry. We'd like to
134                  * handling that.
135                  */
136                 spin_unlock(&sbi->ll_gns_lock);
137
138                 /* 
139                  * check if dentry is mount point already, if so, do not restart
140                  * syscal.
141                  */
142                 if (d_mountpoint(dentry))
143                         RETURN(0);
144
145                 /* 
146                  * causing syscall to restart and find this dentry already
147                  * mounted.
148                  */
149                 ll_gns_send_signal();
150                 RETURN(-ERESTARTSYS);
151
152 #if 0
153                 wait_for_completion(&sbi->ll_gns_mount_finished);
154                 if (d_mountpoint(dentry))
155                         RETURN(0);
156 #endif
157         }
158         LASSERT(sbi->ll_gns_state == LL_GNS_IDLE);
159
160         CDEBUG(D_INODE, "mounting dentry %p\n", dentry);
161
162         /* mounting started */
163         sbi->ll_gns_state = LL_GNS_MOUNTING;
164         spin_unlock(&sbi->ll_gns_lock);
165
166         /* we need to build an absolute pathname to pass to mount */
167         pathpage = (char *)__get_free_page(GFP_KERNEL);
168         if (!pathpage)
169                 GOTO(cleanup, rc = -ENOMEM);
170         cleanup_phase = 1;
171
172         /* getting @dentry path stored in @pathpage. */
173         path = d_path(dentry, mnt, pathpage, PAGE_SIZE);
174         if (IS_ERR(path)) {
175                 CERROR("can't build mount object path, err %d\n",
176                        (int)PTR_ERR(dchild));
177                 GOTO(cleanup, rc = PTR_ERR(dchild));
178         }
179
180         /* synchronizing with possible /proc/fs/...write */
181         down(&sbi->ll_gns_sem);
182         
183         /* 
184          * mount object name is taken from sbi, where it is set in mount time or
185          * via /proc/fs... tunable. It may be ".mntinfo" or so.
186          */
187
188         /* 
189          * recursive lookup with trying to mount SUID bit marked directories on
190          * the way is not possible here, as lookup_one_len() does not pass @nd
191          * to ->lookup() and this is checked in ll_lookup_it(). So, do not
192          * handle possible -EAGAIN here.
193          */
194         dchild = ll_lookup_one_len(sbi->ll_gns_oname, dentry,
195                                    strlen(sbi->ll_gns_oname));
196         up(&sbi->ll_gns_sem);
197
198         cleanup_phase = 2;
199         
200         if (IS_ERR(dchild)) {
201                 rc = PTR_ERR(dchild);
202                 CERROR("can't find mount object %*s/%*s err = %d.\n",
203                        (int)dentry->d_name.len, dentry->d_name.name,
204                        strlen(sbi->ll_gns_oname), sbi->ll_gns_oname,
205                        rc);
206                 GOTO(cleanup, rc);
207         }
208
209         /* mount object is not found */
210         if (!dchild->d_inode)
211                 GOTO(cleanup, rc = -ENOENT);
212
213         /* check if found child is regular file */
214         if (!S_ISREG(dchild->d_inode->i_mode))
215                 GOTO(cleanup, rc = -EOPNOTSUPP);
216
217         mntget(mnt);
218
219         /* ok, mount object if found, opening it. */
220         mntinfo_fd = dentry_open(dchild, mnt, 0);
221         if (IS_ERR(mntinfo_fd)) {
222                 CERROR("can't open mount object %*s/%*s err = %d.\n",
223                        (int)dentry->d_name.len, dentry->d_name.name,
224                        strlen(sbi->ll_gns_oname), sbi->ll_gns_oname,
225                        (int)PTR_ERR(mntinfo_fd));
226                 mntput(mnt);
227                 GOTO(cleanup, rc = PTR_ERR(mntinfo_fd));
228         }
229         cleanup_phase = 3;
230
231         if (mntinfo_fd->f_dentry->d_inode->i_size > PAGE_SIZE - 1) {
232                 CERROR("mount object %*s/%*s is too big (%Ld)\n",
233                        (int)dentry->d_name.len, dentry->d_name.name,
234                        strlen(sbi->ll_gns_oname), sbi->ll_gns_oname,
235                        mntinfo_fd->f_dentry->d_inode->i_size);
236                 GOTO(cleanup, rc = -EFBIG);
237         }
238
239         datapage = (char *)__get_free_page(GFP_KERNEL);
240         if (!datapage)
241                 GOTO(cleanup, rc = -ENOMEM);
242
243         cleanup_phase = 4;
244         
245         /* read data from mount object. */
246         rc = kernel_read(mntinfo_fd, 0, datapage, PAGE_SIZE - 1);
247         if (rc < 0) {
248                 CERROR("can't read mount object %*s/%*s data, err %d\n",
249                        (int)dentry->d_name.len, dentry->d_name.name,
250                        strlen(sbi->ll_gns_oname), sbi->ll_gns_oname,
251                        rc);
252                 GOTO(cleanup, rc);
253         }
254
255         /* no data in mount object? */
256         if (rc == 0) {
257                 CERROR("mount object %*s/%*s is empty?\n",
258                        (int)dentry->d_name.len, dentry->d_name.name,
259                        strlen(sbi->ll_gns_oname), sbi->ll_gns_oname);
260                 GOTO(cleanup, rc);
261         }
262
263         datapage[rc] = '\0';
264         fput(mntinfo_fd);
265         mntinfo_fd = NULL;
266         dchild = NULL;
267
268         /* synchronizing with possible /proc/fs/...write */
269         down(&sbi->ll_gns_sem);
270
271         /*
272          * upcall is initialized in mount time or via /proc/fs/... tuneable and
273          * may be /usr/lib/lustre/gns-upcall.sh
274          */
275         argv[0] = sbi->ll_gns_upcall;
276         argv[1] = datapage;
277         argv[2] = path;
278         argv[3] = NULL;
279         
280         up(&sbi->ll_gns_sem);
281
282         /* do not wait for helper complete here. */
283         rc = call_usermodehelper(argv[0], argv, NULL, 0);
284         if (rc) {
285                 CWARN("failed to call GNS upcall %s, err = %d, "
286                       "checking for mount anyway\n", sbi->ll_gns_upcall, rc);
287         }
288
289         /*
290          * waiting for dentry become mount point GNS_WAIT_ATTEMPTS times by 1
291          * second.
292          */
293         rc = ll_gns_wait_for_mount(dentry, 1, GNS_WAIT_ATTEMPTS);
294         complete_all(&sbi->ll_gns_mount_finished);
295         if (rc == 0) {
296                 struct dentry *rdentry;
297                 struct vfsmount *rmnt;
298                 
299                 /* mount is successful */
300                 LASSERT(sbi->ll_gns_state == LL_GNS_FINISHED);
301
302                 rmnt = mntget(mnt);
303                 rdentry = dget(dentry);
304                 
305                 if (follow_down(&rmnt, &rdentry)) {
306                         /* 
307                          * registering new mount in GNS mounts list and thus
308                          * make it accessible from GNS control thread.
309                          */
310                         spin_lock(&dcache_lock);
311                         LASSERT(list_empty(&rmnt->mnt_lustre_list));
312                         list_add_tail(&rmnt->mnt_lustre_list,
313                                       &sbi->ll_mnt_list);
314                         spin_unlock(&dcache_lock);
315                         rmnt->mnt_last_used = jiffies;
316                         mntput(rmnt);
317                         dput(rdentry);
318                 } else {
319                         mntput(mnt);
320                         dput(dentry);
321                 }
322         } else {
323                 CERROR("usermode upcall %s failed to mount %s, err %d\n",
324                        sbi->ll_gns_upcall, path, rc);
325         }
326                 
327         EXIT;
328 cleanup:
329         switch (cleanup_phase) {
330         case 4:
331                 free_page((unsigned long)datapage);
332         case 3:
333                 if (mntinfo_fd != NULL)
334                         fput(mntinfo_fd);
335         case 2:
336                 if (dchild != NULL)
337                         dput(dchild);
338         case 1:
339                 free_page((unsigned long)pathpage);
340                 complete_all(&sbi->ll_gns_mount_finished);
341         case 0:
342                 spin_lock(&sbi->ll_gns_lock);
343                 sbi->ll_gns_state = LL_GNS_IDLE;
344                 spin_unlock(&sbi->ll_gns_lock);
345         }
346         return rc;
347 }
348
349 /* tries to umount passed @mnt. */
350 int ll_gns_umount_object(struct vfsmount *mnt)
351 {
352         int rc = 0;
353         ENTRY;
354         
355         CDEBUG(D_INODE, "unmounting mnt %p\n", mnt);
356         rc = do_umount(mnt, 0);
357         if (rc) {
358                 CDEBUG(D_INODE, "can't umount 0x%p, err = %d\n",
359                        mnt, rc);
360         }
361         
362         RETURN(rc);
363 }
364
365 int ll_gns_check_mounts(struct ll_sb_info *sbi, int flags)
366 {
367         struct list_head check_list = LIST_HEAD_INIT(check_list);
368         struct vfsmount *mnt;
369         unsigned long pass;
370         ENTRY;
371
372         spin_lock(&dcache_lock);
373         list_splice_init(&sbi->ll_mnt_list, &check_list);
374
375         /*
376          * walk the list in reverse order, and put them on the front of the sbi
377          * list each iteration; this avoids list-ordering problems if we race
378          * with another gns-mounting thread.
379          */
380         while (!list_empty(&check_list)) {
381                 mnt = list_entry(check_list.prev,
382                                  struct vfsmount,
383                                  mnt_lustre_list);
384
385                 mntget(mnt);
386
387                 list_del_init(&mnt->mnt_lustre_list);
388
389                 list_add(&mnt->mnt_lustre_list,
390                          &sbi->ll_mnt_list);
391
392                 /* check for timeout if needed */
393                 pass = jiffies - mnt->mnt_last_used;
394                 
395                 if (flags == LL_GNS_CHECK &&
396                     pass < sbi->ll_gns_timeout * HZ)
397                 {
398                         mntput(mnt);
399                         continue;
400                 }
401                 spin_unlock(&dcache_lock);
402
403                 /* umounting @mnt */
404                 ll_gns_umount_object(mnt);
405
406                 mntput(mnt);
407                 spin_lock(&dcache_lock);
408         }
409         spin_unlock(&dcache_lock);
410         RETURN(0);
411 }
412
413 /*
414  * GNS timer callback function. It restarts gns timer and wakes up GNS control
415  * thread to process mounts list.
416  */
417 void ll_gns_timer_callback(unsigned long data)
418 {
419         struct ll_sb_info *sbi = (void *)data;
420         ENTRY;
421
422         spin_lock(&gns_lock);
423         if (list_empty(&sbi->ll_gns_sbi_head))
424                 list_add(&sbi->ll_gns_sbi_head, &gns_sbi_list);
425         spin_unlock(&gns_lock);
426         
427         wake_up(&gns_thread.t_ctl_waitq);
428         mod_timer(&sbi->ll_gns_timer,
429                   jiffies + sbi->ll_gns_tick * HZ);
430 }
431
432 /* this function checks if something new happened to exist in gns list. */
433 static int inline ll_gns_check_event(void)
434 {
435         int rc;
436         
437         spin_lock(&gns_lock);
438         rc = !list_empty(&gns_sbi_list);
439         spin_unlock(&gns_lock);
440
441         return rc;
442 }
443
444 /* should we stop GNS control thread? */
445 static int inline ll_gns_check_stop(void)
446 {
447         mb();
448         return (gns_thread.t_flags & SVC_STOPPING) ? 1 : 0;
449 }
450
451 /* GNS control thread function. */
452 static int ll_gns_thread_main(void *arg)
453 {
454         struct ll_gns_ctl *ctl = arg;
455         unsigned long flags;
456         ENTRY;
457
458         {
459                 char name[sizeof(current->comm)];
460                 snprintf(name, sizeof(name) - 1, "ll_gns");
461                 kportal_daemonize(name);
462         }
463         
464         SIGNAL_MASK_LOCK(current, flags);
465         sigfillset(&current->blocked);
466         RECALC_SIGPENDING;
467         SIGNAL_MASK_UNLOCK(current, flags);
468
469         /*
470          * letting starting function know, that we are ready and control may be
471          * returned.
472          */
473         gns_thread.t_flags = SVC_RUNNING;
474         complete(&ctl->gc_starting);
475
476         while (!ll_gns_check_stop()) {
477                 struct l_wait_info lwi = { 0 };
478
479                 l_wait_event(gns_thread.t_ctl_waitq,
480                              (ll_gns_check_event() ||
481                               ll_gns_check_stop()), &lwi);
482                 
483                 spin_lock(&gns_lock);
484                 while (!list_empty(&gns_sbi_list)) {
485                         struct ll_sb_info *sbi;
486
487                         sbi = list_entry(gns_sbi_list.prev,
488                                          struct ll_sb_info,
489                                          ll_gns_sbi_head);
490                         
491                         list_del_init(&sbi->ll_gns_sbi_head);
492                         spin_unlock(&gns_lock);
493                         ll_gns_check_mounts(sbi, LL_GNS_CHECK);
494                         spin_lock(&gns_lock);
495                 }
496                 spin_unlock(&gns_lock);
497         }
498
499         EXIT;
500         gns_thread.t_flags = SVC_STOPPED;
501
502         /* this is SMP-safe way to finish thread. */
503         complete_and_exit(&ctl->gc_finishing, 0);
504 }
505
506 void ll_gns_add_timer(struct ll_sb_info *sbi)
507 {
508         mod_timer(&sbi->ll_gns_timer,
509                   jiffies + sbi->ll_gns_tick * HZ);
510 }
511
512 void ll_gns_del_timer(struct ll_sb_info *sbi)
513 {
514         del_timer(&sbi->ll_gns_timer);
515 }
516
517 /*
518  * starts GNS control thread and waits for a signal it is up and work may be
519  * continued.
520  */
521 int ll_gns_start_thread(void)
522 {
523         int rc;
524         ENTRY;
525
526         LASSERT(gns_thread.t_flags == 0);
527         init_completion(&gns_ctl.gc_starting);
528         init_completion(&gns_ctl.gc_finishing);
529         init_waitqueue_head(&gns_thread.t_ctl_waitq);
530         
531         rc = kernel_thread(ll_gns_thread_main, &gns_ctl,
532                            (CLONE_VM | CLONE_FILES));
533         if (rc < 0) {
534                 CERROR("cannot start GNS control thread, "
535                        "err = %d\n", rc);
536                 RETURN(rc);
537         }
538         wait_for_completion(&gns_ctl.gc_starting);
539         LASSERT(gns_thread.t_flags == SVC_RUNNING);
540         RETURN(0);
541 }
542
543 /* stops GNS control thread and waits its actual stop. */
544 void ll_gns_stop_thread(void)
545 {
546         ENTRY;
547         gns_thread.t_flags = SVC_STOPPING;
548         wake_up(&gns_thread.t_ctl_waitq);
549         wait_for_completion(&gns_ctl.gc_finishing);
550         LASSERT(gns_thread.t_flags == SVC_STOPPED);
551         gns_thread.t_flags = 0;
552         EXIT;
553 }