Whamcloud - gitweb
land lustre part of b_hd_sec on HEAD.
[fs/lustre-release.git] / lustre / llite / llite_gns.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Copyright (C) 2004 Cluster File Systems, Inc.
5  *   Author: Phil Schwan <phil@clusterfs.com>
6  *
7  *   This file is part of Lustre, http://www.lustre.org.
8  *
9  *   Lustre is free software; you can redistribute it and/or
10  *   modify it under the terms of version 2 of the GNU General Public
11  *   License as published by the Free Software Foundation.
12  *
13  *   Lustre is distributed in the hope that it will be useful,
14  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
15  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  *   GNU General Public License for more details.
17  *
18  *   You should have received a copy of the GNU General Public License
19  *   along with Lustre; if not, write to the Free Software
20  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21  */
22
23 #define DEBUG_SUBSYSTEM S_LLITE
24
25 #include <linux/fs.h>
26 #include <linux/version.h>
27 #include <asm/uaccess.h>
28 #include <linux/file.h>
29 #include <linux/kmod.h>
30
31 #include <linux/lustre_lite.h>
32 #include "llite_internal.h"
33
34 static struct list_head gns_sbi_list = LIST_HEAD_INIT(gns_sbi_list);
35 static spinlock_t gns_lock = SPIN_LOCK_UNLOCKED;
36 static struct ptlrpc_thread gns_thread;
37 static struct ll_gns_ctl gns_ctl;
38
39 /*
40  * waits until passed dentry gets mountpoint or timeout and attempts are
41  * exhausted. Returns 1 if dentry became mountpoint and 0 otherwise.
42  */
43 static int
44 ll_gns_wait_for_mount(struct dentry *dentry,
45                       int timeout, int tries)
46 {
47         struct l_wait_info lwi;
48         struct ll_sb_info *sbi;
49         int rc;
50         ENTRY;
51
52         LASSERT(dentry != NULL);
53         LASSERT(!IS_ERR(dentry));
54         sbi = ll_s2sbi(dentry->d_sb);
55         
56         for (; !d_mountpoint(dentry) && tries > 0; tries--) {
57                 lwi = LWI_TIMEOUT(timeout * HZ, NULL, NULL);
58                 l_wait_event(sbi->ll_gns_waitq, d_mountpoint(dentry), &lwi);
59         }
60
61         if ((rc = d_mountpoint(dentry) ? 1 : 0)) {
62                 spin_lock(&sbi->ll_gns_lock);
63                 LASSERT(sbi->ll_gns_state == LL_GNS_MOUNTING);
64                 sbi->ll_gns_state = LL_GNS_FINISHED;
65                 spin_unlock(&sbi->ll_gns_lock);
66         }
67
68         complete(&sbi->ll_gns_mount_finished);
69         RETURN(rc);
70 }
71
72 /*
73  * tries to mount the mount object under passed @dentry. In the case of success
74  * @dentry will become mount point and 0 will be retuned. Error code will be
75  * returned otherwise.
76  */
77 int ll_gns_mount_object(struct dentry *dentry,
78                         struct vfsmount *mnt)
79 {
80         struct ll_dentry_data *lld = dentry->d_fsdata;
81         char *p, *path, *pathpage, *argv[4];
82         struct file *mntinfo_fd = NULL;
83         struct address_space *mapping;
84         int cleanup_phase = 0, rc = 0;
85         struct ll_sb_info *sbi;
86         struct dentry *dchild;
87         struct page *datapage;
88         filler_t *filler;
89         ENTRY;
90
91         if (mnt == NULL) {
92                 CERROR("suid directory found, but no "
93                        "vfsmount available.\n");
94                 RETURN(-EINVAL);
95         }
96
97         CDEBUG(D_INODE, "mounting dentry %p\n", dentry);
98
99         LASSERT(dentry->d_inode != NULL);
100         LASSERT(S_ISDIR(dentry->d_inode->i_mode));
101         LASSERT(lld != NULL);
102         
103         sbi = ll_i2sbi(dentry->d_inode);
104         LASSERT(sbi != NULL);
105
106         /* another thead is in progress of mouning some entry */
107         spin_lock(&sbi->ll_gns_lock);
108         if (sbi->ll_gns_state == LL_GNS_MOUNTING) {
109                 spin_unlock(&sbi->ll_gns_lock);
110
111                 wait_for_completion(&sbi->ll_gns_mount_finished);
112                 if (d_mountpoint(dentry))
113                         RETURN(0);
114         }
115
116         /* another thread mounted it already */
117         if (sbi->ll_gns_state == LL_GNS_FINISHED) {
118                 spin_unlock(&sbi->ll_gns_lock);
119
120                 /* we lost a race; just return */
121                 if (d_mountpoint(dentry))
122                         RETURN(0);
123         }
124         LASSERT(sbi->ll_gns_state == LL_GNS_IDLE);
125
126         spin_lock(&dentry->d_lock);
127         dentry->d_flags |= DCACHE_GNS_MOUNTING;
128         spin_unlock(&dentry->d_lock);
129         
130         /* mounting started */
131         sbi->ll_gns_state = LL_GNS_MOUNTING;
132         spin_unlock(&sbi->ll_gns_lock);
133
134         /* we need to build an absolute pathname to pass to mount */
135         pathpage = (char *)__get_free_page(GFP_KERNEL);
136         if (!pathpage)
137                 GOTO(cleanup, rc = -ENOMEM);
138         cleanup_phase = 1;
139
140         /* getting @dentry path stored in @pathpage. */
141         path = d_path(dentry, mnt, pathpage, PAGE_SIZE);
142         if (IS_ERR(path)) {
143                 CERROR("can't build mount object path, err %d\n",
144                        (int)PTR_ERR(dchild));
145                 GOTO(cleanup, rc = PTR_ERR(dchild));
146         }
147
148         /* sychronizing with possible /proc/fs/...write */
149         down(&sbi->ll_gns_sem);
150         
151         /* 
152          * mount object name is taken from sbi, where it is set in mount time or
153          * via /proc/fs... tunable. It may be ".mntinfo" or so.
154          */
155         dchild = ll_d_lookup(sbi->ll_gns_oname, dentry,
156                              strlen(sbi->ll_gns_oname));
157         up(&sbi->ll_gns_sem);
158
159         if (!dchild)
160                 GOTO(cleanup, rc = -ENOENT);
161         
162         if (IS_ERR(dchild)) {
163                 CERROR("can't find mount object %*s/%*s err = %d.\n",
164                        (int)dentry->d_name.len, dentry->d_name.name,
165                        (int)dchild->d_name.len, dchild->d_name.name,
166                        (int)PTR_ERR(dchild));
167                 GOTO(cleanup, rc = PTR_ERR(dchild));
168         }
169
170         mntget(mnt);
171
172         /* ok, mount object if found, opening it. */
173         mntinfo_fd = dentry_open(dchild, mnt, 0);
174         if (IS_ERR(mntinfo_fd)) {
175                 CERROR("can't open mount object %*s/%*s err = %d.\n",
176                        (int)dentry->d_name.len, dentry->d_name.name,
177                        (int)dchild->d_name.len, dchild->d_name.name,
178                        (int)PTR_ERR(mntinfo_fd));
179                 dput(dchild);
180                 mntput(mnt);
181                 GOTO(cleanup, rc = PTR_ERR(mntinfo_fd));
182         }
183         cleanup_phase = 2;
184
185         if (mntinfo_fd->f_dentry->d_inode->i_size > PAGE_SIZE) {
186                 CERROR("mount object %*s/%*s is too big (%Ld)\n",
187                        (int)dentry->d_name.len, dentry->d_name.name,
188                        (int)dchild->d_name.len, dchild->d_name.name,
189                        mntinfo_fd->f_dentry->d_inode->i_size);
190                 GOTO(cleanup, rc = -EFBIG);
191         }
192
193         /* read data from mount object. */
194         mapping = mntinfo_fd->f_dentry->d_inode->i_mapping;
195         filler = (filler_t *)mapping->a_ops->readpage;
196         datapage = read_cache_page(mapping, 0, filler,
197                                    mntinfo_fd);
198         if (IS_ERR(datapage)) {
199                 CERROR("can't read data from mount object %*s/%*s\n",
200                        (int)dentry->d_name.len, dentry->d_name.name,
201                        (int)dchild->d_name.len, dchild->d_name.name);
202                 GOTO(cleanup, rc = PTR_ERR(datapage));
203         }
204
205         p = kmap(datapage);
206         LASSERT(p != NULL);
207         p[PAGE_SIZE - 1] = '\0';
208         cleanup_phase = 3;
209
210         fput(mntinfo_fd);
211         mntinfo_fd = NULL;
212
213         /* sychronizing with possible /proc/fs/...write */
214         down(&sbi->ll_gns_sem);
215
216         /*
217          * upcall is initialized in mount time or via /proc/fs/... tuneable and
218          * may be /usr/lib/lustre/gns-upcall.sh
219          */
220         argv[0] = sbi->ll_gns_upcall;
221         argv[1] = p;
222         argv[2] = path;
223         argv[3] = NULL;
224         
225         up(&sbi->ll_gns_sem);
226
227         rc = USERMODEHELPER(argv[0], argv, NULL);
228         if (rc) {
229                 CERROR("failed to call GNS upcall %s, err = %d\n",
230                        sbi->ll_gns_upcall, rc);
231                 GOTO(cleanup, rc);
232         }
233
234         /*
235          * wait for mount completion. This is actually not need, because
236          * USERMODEHELPER() returns only when usermode process finishes. But we
237          * doing this just for case USERMODEHELPER() semanthics will be changed
238          * or usermode upcall program will start mounting in backgound and
239          * return instantly. --umka
240          */
241         if (ll_gns_wait_for_mount(dentry, 1, GNS_WAIT_ATTEMPTS)) {
242                 struct dentry *rdentry;
243                 struct vfsmount *rmnt;
244                 
245                 /* mount is successful */
246                 LASSERT(sbi->ll_gns_state == LL_GNS_FINISHED);
247
248                 rmnt = mntget(mnt);
249                 rdentry = dget(dentry);
250                 
251                 if (follow_down(&rmnt, &rdentry)) {
252                         /* 
253                          * registering new mount in GNS mounts list and thus
254                          * make it accessible from GNS control thread.
255                          */
256                         spin_lock(&dcache_lock);
257                         LASSERT(list_empty(&rmnt->mnt_lustre_list));
258                         list_add_tail(&rmnt->mnt_lustre_list,
259                                       &sbi->ll_mnt_list);
260                         spin_unlock(&dcache_lock);
261                         rmnt->mnt_last_used = jiffies;
262                         mntput(rmnt);
263                         dput(rdentry);
264                 } else {
265                         mntput(mnt);
266                         dput(dentry);
267                 }
268                 spin_lock(&dentry->d_lock);
269                 dentry->d_flags &= ~DCACHE_GNS_PENDING;
270                 spin_unlock(&dentry->d_lock);
271         } else {
272                 CERROR("usermode upcall %s failed to mount %s\n",
273                        sbi->ll_gns_upcall, path);
274                 rc = -ETIME;
275         }
276
277         EXIT;
278 cleanup:
279         switch (cleanup_phase) {
280         case 3:
281                 kunmap(datapage);
282                 page_cache_release(datapage);
283         case 2:
284                 if (mntinfo_fd != NULL)
285                         fput(mntinfo_fd);
286         case 1:
287                 free_page((unsigned long)pathpage);
288         case 0:
289                 spin_lock(&sbi->ll_gns_lock);
290                 sbi->ll_gns_state = LL_GNS_IDLE;
291                 spin_unlock(&sbi->ll_gns_lock);
292
293                 spin_lock(&dentry->d_lock);
294                 dentry->d_flags &= ~DCACHE_GNS_MOUNTING;
295                 spin_unlock(&dentry->d_lock);
296         }
297         return rc;
298 }
299
300 /* tries to umount passed @mnt. */
301 int ll_gns_umount_object(struct vfsmount *mnt)
302 {
303         int rc = 0;
304         ENTRY;
305         
306         CDEBUG(D_INODE, "unmounting mnt %p\n", mnt);
307         rc = do_umount(mnt, 0);
308         if (rc) {
309                 CDEBUG(D_INODE, "can't umount 0x%p, err = %d\n",
310                        mnt, rc);
311         }
312         
313         RETURN(rc);
314 }
315
316 int ll_gns_check_mounts(struct ll_sb_info *sbi, int flags)
317 {
318         struct list_head check_list = LIST_HEAD_INIT(check_list);
319         struct vfsmount *mnt;
320         unsigned long pass;
321         ENTRY;
322
323         spin_lock(&dcache_lock);
324         list_splice_init(&sbi->ll_mnt_list, &check_list);
325
326         /*
327          * walk the list in reverse order, and put them on the front of the sbi
328          * list each iteration; this avoids list-ordering problems if we race
329          * with another gns-mounting thread.
330          */
331         while (!list_empty(&check_list)) {
332                 mnt = list_entry(check_list.prev,
333                                  struct vfsmount,
334                                  mnt_lustre_list);
335
336                 mntget(mnt);
337
338                 list_del_init(&mnt->mnt_lustre_list);
339
340                 list_add(&mnt->mnt_lustre_list,
341                          &sbi->ll_mnt_list);
342
343                 /* check for timeout if needed */
344                 pass = jiffies - mnt->mnt_last_used;
345                 
346                 if (flags == LL_GNS_CHECK &&
347                     pass < sbi->ll_gns_timeout * HZ)
348                 {
349                         mntput(mnt);
350                         continue;
351                 }
352                 spin_unlock(&dcache_lock);
353
354                 /* umounting @mnt */
355                 ll_gns_umount_object(mnt);
356
357                 mntput(mnt);
358                 spin_lock(&dcache_lock);
359         }
360         spin_unlock(&dcache_lock);
361         RETURN(0);
362 }
363
364 /*
365  * GNS timer callback function. It restarts gns timer and wakes up GNS cvontrol
366  * thread to process mounts list.
367  */
368 void ll_gns_timer_callback(unsigned long data)
369 {
370         struct ll_sb_info *sbi = (void *)data;
371         ENTRY;
372
373         spin_lock(&gns_lock);
374         if (list_empty(&sbi->ll_gns_sbi_head))
375                 list_add(&sbi->ll_gns_sbi_head, &gns_sbi_list);
376         spin_unlock(&gns_lock);
377         
378         wake_up(&gns_thread.t_ctl_waitq);
379         mod_timer(&sbi->ll_gns_timer,
380                   jiffies + sbi->ll_gns_tick * HZ);
381 }
382
383 /* this function checks if something new happened to exist in gns list. */
384 static int inline ll_gns_check_event(void)
385 {
386         int rc;
387         
388         spin_lock(&gns_lock);
389         rc = !list_empty(&gns_sbi_list);
390         spin_unlock(&gns_lock);
391
392         return rc;
393 }
394
395 /* should we staop GNS control thread? */
396 static int inline ll_gns_check_stop(void)
397 {
398         mb();
399         return (gns_thread.t_flags & SVC_STOPPING) ? 1 : 0;
400 }
401
402 /* GNS control thread function. */
403 static int ll_gns_thread_main(void *arg)
404 {
405         struct ll_gns_ctl *ctl = arg;
406         unsigned long flags;
407         ENTRY;
408
409         {
410                 char name[sizeof(current->comm)];
411                 snprintf(name, sizeof(name) - 1, "ll_gns");
412                 kportal_daemonize(name);
413         }
414         
415         SIGNAL_MASK_LOCK(current, flags);
416         sigfillset(&current->blocked);
417         RECALC_SIGPENDING;
418         SIGNAL_MASK_UNLOCK(current, flags);
419
420         /*
421          * letting starting function know, that we are ready and control may be
422          * returned.
423          */
424         gns_thread.t_flags = SVC_RUNNING;
425         complete(&ctl->gc_starting);
426
427         while (!ll_gns_check_stop()) {
428                 struct l_wait_info lwi = { 0 };
429
430                 l_wait_event(gns_thread.t_ctl_waitq,
431                              (ll_gns_check_event() ||
432                               ll_gns_check_stop()), &lwi);
433                 
434                 spin_lock(&gns_lock);
435                 while (!list_empty(&gns_sbi_list)) {
436                         struct ll_sb_info *sbi;
437
438                         sbi = list_entry(gns_sbi_list.prev,
439                                          struct ll_sb_info,
440                                          ll_gns_sbi_head);
441                         
442                         list_del_init(&sbi->ll_gns_sbi_head);
443                         spin_unlock(&gns_lock);
444                         ll_gns_check_mounts(sbi, LL_GNS_CHECK);
445                         spin_lock(&gns_lock);
446                 }
447                 spin_unlock(&gns_lock);
448         }
449
450         /* 
451          * letting know stop function know that thread is stoped and it may
452          * return.
453          */
454         EXIT;
455         gns_thread.t_flags = SVC_STOPPED;
456
457         /* this is SMP-safe way to finish thread. */
458         complete_and_exit(&ctl->gc_finishing, 0);
459 }
460
461 void ll_gns_add_timer(struct ll_sb_info *sbi)
462 {
463         mod_timer(&sbi->ll_gns_timer,
464                   jiffies + sbi->ll_gns_tick * HZ);
465 }
466
467 void ll_gns_del_timer(struct ll_sb_info *sbi)
468 {
469         del_timer(&sbi->ll_gns_timer);
470 }
471
472 /*
473  * starts GNS control thread and waits for a signal it is up and work may be
474  * continued.
475  */
476 int ll_gns_start_thread(void)
477 {
478         int rc;
479         ENTRY;
480
481         LASSERT(gns_thread.t_flags == 0);
482         init_completion(&gns_ctl.gc_starting);
483         init_completion(&gns_ctl.gc_finishing);
484         init_waitqueue_head(&gns_thread.t_ctl_waitq);
485         
486         rc = kernel_thread(ll_gns_thread_main, &gns_ctl,
487                            (CLONE_VM | CLONE_FILES));
488         if (rc < 0) {
489                 CERROR("cannot start GNS control thread, "
490                        "err = %d\n", rc);
491                 RETURN(rc);
492         }
493         wait_for_completion(&gns_ctl.gc_starting);
494         LASSERT(gns_thread.t_flags == SVC_RUNNING);
495         RETURN(0);
496 }
497
498 /* stops GNS control thread and waits its actual stop. */
499 void ll_gns_stop_thread(void)
500 {
501         ENTRY;
502         gns_thread.t_flags = SVC_STOPPING;
503         wake_up(&gns_thread.t_ctl_waitq);
504         wait_for_completion(&gns_ctl.gc_finishing);
505         LASSERT(gns_thread.t_flags == SVC_STOPPED);
506         gns_thread.t_flags = 0;
507         EXIT;
508 }