Whamcloud - gitweb
- remove non-needed check
[fs/lustre-release.git] / lustre / llite / llite_gns.c
index d3ae81c..0b88b4e 100644 (file)
@@ -1,8 +1,12 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- *   Author: Phil Schwan <phil@clusterfs.com>
+ * Copyright (C) 2004, 2005 Cluster File Systems, Inc.
+ *
+ * Author: Phil Schwan <phil@clusterfs.com>
+ * Author: Oleg Drokin <green@clusterfs.com>
+ * Author: Yury Umanets <yury@clusterfs.com>
+ * Review: Nikita Danilov <nikita@clusterfs.com>
  *
  *   This file is part of Lustre, http://www.lustre.org.
  *
@@ -46,89 +50,117 @@ ll_gns_wait_for_mount(struct dentry *dentry,
 {
         struct l_wait_info lwi;
         struct ll_sb_info *sbi;
-        int rc;
+       int rc = 0;
         ENTRY;
 
         LASSERT(dentry != NULL);
         LASSERT(!IS_ERR(dentry));
         sbi = ll_s2sbi(dentry->d_sb);
         
+        lwi = LWI_TIMEOUT(timeout * HZ, NULL, NULL);
         for (; !d_mountpoint(dentry) && tries > 0; tries--) {
-                lwi = LWI_TIMEOUT(timeout * HZ, NULL, NULL);
                 l_wait_event(sbi->ll_gns_waitq, d_mountpoint(dentry), &lwi);
-        }
-
-        if ((rc = d_mountpoint(dentry) ? 1 : 0)) {
-                spin_lock(&sbi->ll_gns_lock);
-                LASSERT(sbi->ll_gns_state == LL_GNS_MOUNTING);
-                sbi->ll_gns_state = LL_GNS_FINISHED;
-                spin_unlock(&sbi->ll_gns_lock);
-        }
-
-        complete(&sbi->ll_gns_mount_finished);
-        RETURN(rc);
+               if (signal_pending(current))
+                       GOTO(out, rc = -EINTR);
+       }
+
+        if (!d_mountpoint(dentry))
+               rc = -ETIME;
+       
+       EXIT;
+out:   
+        spin_lock(&sbi->ll_gns_lock);
+        sbi->ll_gns_state = LL_GNS_FINISHED;
+        spin_unlock(&sbi->ll_gns_lock);
+        return rc;
 }
 
 /*
  * tries to mount the mount object under passed @dentry. In the case of success
- * @dentry will become mount point and 0 will be retuned. Error code will be
+ * @dentry will become mount point and 0 will be returned. Error code will be
  * returned otherwise.
  */
-int ll_gns_mount_object(struct dentry *dentry,
-                        struct vfsmount *mnt)
+int
+ll_gns_mount_object(struct dentry *dentry, struct vfsmount *mnt)
 {
-        struct ll_dentry_data *lld = dentry->d_fsdata;
-        char *p, *path, *pathpage, *argv[4];
+        char *path, *pathpage, *datapage, *argv[4];
         struct file *mntinfo_fd = NULL;
-        struct address_space *mapping;
         int cleanup_phase = 0, rc = 0;
         struct ll_sb_info *sbi;
         struct dentry *dchild;
-        struct page *datapage;
-        filler_t *filler;
         ENTRY;
 
+        LASSERT(dentry->d_inode != NULL);
+
+        if (!S_ISDIR(dentry->d_inode->i_mode))
+                RETURN(-EINVAL);
+
+        sbi = ll_i2sbi(dentry->d_inode);
+        
         if (mnt == NULL) {
                 CERROR("suid directory found, but no "
                        "vfsmount available.\n");
                 RETURN(-EINVAL);
         }
 
-        CDEBUG(D_INODE, "mounting dentry %p\n", dentry);
-
-        LASSERT(dentry->d_inode != NULL);
-        LASSERT(S_ISDIR(dentry->d_inode->i_mode));
-        LASSERT(lld != NULL);
-        
-        sbi = ll_i2sbi(dentry->d_inode);
-        LASSERT(sbi != NULL);
+        if (atomic_read(&sbi->ll_gns_enabled) == 0)
+                RETURN(-EINVAL);
 
-        /* another thead is in progress of mouning some entry */
         spin_lock(&sbi->ll_gns_lock);
-        if (sbi->ll_gns_state == LL_GNS_MOUNTING) {
+       
+        /* 
+         * another thead is in progress or just finished mounting the
+         * dentry. Handling that.
+         */
+        if (sbi->ll_gns_state != LL_GNS_IDLE) {
+                /* 
+                 * another thread is trying to mount GNS dentry. We'd like to
+                 * handling that.
+                 */
                 spin_unlock(&sbi->ll_gns_lock);
 
-                wait_for_completion(&sbi->ll_gns_mount_finished);
+        restart:
+                /* 
+                 * check if dentry is mount point already, if so, do not restart
+                 * syscal.
+                 */
                 if (d_mountpoint(dentry))
                         RETURN(0);
-        }
 
-        /* another thread mounted it already */
-        if (sbi->ll_gns_state == LL_GNS_FINISHED) {
+                spin_lock(&sbi->ll_gns_lock);
+               if (sbi->ll_gns_pending_dentry && 
+                   is_subdir(sbi->ll_gns_pending_dentry, dentry)) {
+                       spin_unlock(&sbi->ll_gns_lock);
+                       RETURN(-EAGAIN);
+               }
                 spin_unlock(&sbi->ll_gns_lock);
 
-                /* we lost a race; just return */
+                /* 
+                 * waiting for GNS complete and check dentry again, it may be
+                 * mounted already.
+                 */
+                wait_for_completion(&sbi->ll_gns_mount_finished);
                 if (d_mountpoint(dentry))
                         RETURN(0);
+
+                /* 
+                 * check for he case when there are few waiters and all they are
+                 * awakened, but only one will find GNS state LL_GNS_IDLE, and
+                 * the rest will face with LL_GNS_MOUNTING.  --umka
+                 */
+                spin_lock(&sbi->ll_gns_lock);
+                if (sbi->ll_gns_state != LL_GNS_IDLE) {
+                        spin_unlock(&sbi->ll_gns_lock);
+                        goto restart;
+                }
+                spin_unlock(&sbi->ll_gns_lock);
         }
         LASSERT(sbi->ll_gns_state == LL_GNS_IDLE);
+        CDEBUG(D_INODE, "mounting dentry %p\n", dentry);
 
-        spin_lock(&dentry->d_lock);
-        dentry->d_flags |= DCACHE_GNS_MOUNTING;
-        spin_unlock(&dentry->d_lock);
-        
         /* mounting started */
         sbi->ll_gns_state = LL_GNS_MOUNTING;
+       sbi->ll_gns_pending_dentry = dentry;
         spin_unlock(&sbi->ll_gns_lock);
 
         /* we need to build an absolute pathname to pass to mount */
@@ -145,28 +177,42 @@ int ll_gns_mount_object(struct dentry *dentry,
                 GOTO(cleanup, rc = PTR_ERR(dchild));
         }
 
-        /* sychronizing with possible /proc/fs/...write */
+        /* synchronizing with possible /proc/fs/...write */
         down(&sbi->ll_gns_sem);
         
         /* 
          * mount object name is taken from sbi, where it is set in mount time or
          * via /proc/fs... tunable. It may be ".mntinfo" or so.
          */
-        dchild = ll_d_lookup(sbi->ll_gns_oname, dentry,
-                             strlen(sbi->ll_gns_oname));
+
+        /* 
+         * recursive lookup with trying to mount SUID bit marked directories on
+         * the way is not possible here, as lookup_one_len() does not pass @nd
+         * to ->lookup() and this is checked in ll_lookup_it().
+         */
+        dchild = ll_lookup_one_len(sbi->ll_gns_oname, dentry,
+                                   strlen(sbi->ll_gns_oname));
         up(&sbi->ll_gns_sem);
 
-        if (!dchild)
-                GOTO(cleanup, rc = -ENOENT);
+        cleanup_phase = 2;
         
         if (IS_ERR(dchild)) {
+                rc = PTR_ERR(dchild);
                 CERROR("can't find mount object %*s/%*s err = %d.\n",
                        (int)dentry->d_name.len, dentry->d_name.name,
-                       (int)dchild->d_name.len, dchild->d_name.name,
-                       (int)PTR_ERR(dchild));
-                GOTO(cleanup, rc = PTR_ERR(dchild));
+                       strlen(sbi->ll_gns_oname), sbi->ll_gns_oname,
+                       rc);
+                GOTO(cleanup, rc);
         }
 
+        /* mount object is not found */
+        if (!dchild->d_inode)
+                GOTO(cleanup, rc = -ENOENT);
+
+        /* check if found child is regular file */
+        if (!S_ISREG(dchild->d_inode->i_mode))
+                GOTO(cleanup, rc = -EBADF);
+
         mntget(mnt);
 
         /* ok, mount object if found, opening it. */
@@ -174,43 +220,51 @@ int ll_gns_mount_object(struct dentry *dentry,
         if (IS_ERR(mntinfo_fd)) {
                 CERROR("can't open mount object %*s/%*s err = %d.\n",
                        (int)dentry->d_name.len, dentry->d_name.name,
-                       (int)dchild->d_name.len, dchild->d_name.name,
+                       strlen(sbi->ll_gns_oname), sbi->ll_gns_oname,
                        (int)PTR_ERR(mntinfo_fd));
-                dput(dchild);
                 mntput(mnt);
                 GOTO(cleanup, rc = PTR_ERR(mntinfo_fd));
         }
-        cleanup_phase = 2;
+        cleanup_phase = 3;
 
-        if (mntinfo_fd->f_dentry->d_inode->i_size > PAGE_SIZE) {
+        if (mntinfo_fd->f_dentry->d_inode->i_size > PAGE_SIZE - 1) {
                 CERROR("mount object %*s/%*s is too big (%Ld)\n",
                        (int)dentry->d_name.len, dentry->d_name.name,
-                       (int)dchild->d_name.len, dchild->d_name.name,
+                       strlen(sbi->ll_gns_oname), sbi->ll_gns_oname,
                        mntinfo_fd->f_dentry->d_inode->i_size);
                 GOTO(cleanup, rc = -EFBIG);
         }
 
+        datapage = (char *)__get_free_page(GFP_KERNEL);
+        if (!datapage)
+                GOTO(cleanup, rc = -ENOMEM);
+
+        cleanup_phase = 4;
+        
         /* read data from mount object. */
-        mapping = mntinfo_fd->f_dentry->d_inode->i_mapping;
-        filler = (filler_t *)mapping->a_ops->readpage;
-        datapage = read_cache_page(mapping, 0, filler,
-                                   mntinfo_fd);
-        if (IS_ERR(datapage)) {
-                CERROR("can't read data from mount object %*s/%*s\n",
+        rc = kernel_read(mntinfo_fd, 0, datapage, PAGE_SIZE - 1);
+        if (rc < 0) {
+                CERROR("can't read mount object %*s/%*s data, err %d\n",
                        (int)dentry->d_name.len, dentry->d_name.name,
-                       (int)dchild->d_name.len, dchild->d_name.name);
-                GOTO(cleanup, rc = PTR_ERR(datapage));
+                       strlen(sbi->ll_gns_oname), sbi->ll_gns_oname,
+                       rc);
+                GOTO(cleanup, rc);
         }
 
-        p = kmap(datapage);
-        LASSERT(p != NULL);
-        p[PAGE_SIZE - 1] = '\0';
-        cleanup_phase = 3;
+        /* no data in mount object? */
+        if (rc == 0) {
+                CERROR("mount object %*s/%*s is empty?\n",
+                       (int)dentry->d_name.len, dentry->d_name.name,
+                       strlen(sbi->ll_gns_oname), sbi->ll_gns_oname);
+                GOTO(cleanup, rc);
+        }
 
+        datapage[rc] = '\0';
         fput(mntinfo_fd);
         mntinfo_fd = NULL;
+        dchild = NULL;
 
-        /* sychronizing with possible /proc/fs/...write */
+        /* synchronizing with possible /proc/fs/...write */
         down(&sbi->ll_gns_sem);
 
         /*
@@ -218,33 +272,31 @@ int ll_gns_mount_object(struct dentry *dentry,
          * may be /usr/lib/lustre/gns-upcall.sh
          */
         argv[0] = sbi->ll_gns_upcall;
-        argv[1] = p;
+        argv[1] = datapage;
         argv[2] = path;
         argv[3] = NULL;
         
         up(&sbi->ll_gns_sem);
 
-        rc = USERMODEHELPER(argv[0], argv, NULL);
+        /* do not wait for helper complete here. */
+        rc = call_usermodehelper(argv[0], argv, NULL, 1);
         if (rc) {
-                CERROR("failed to call GNS upcall %s, err = %d\n",
-                       sbi->ll_gns_upcall, rc);
-                GOTO(cleanup, rc);
+                CWARN("failed to call GNS upcall %s, err = %d, "
+                      "checking for mount anyway\n", sbi->ll_gns_upcall, rc);
         }
 
         /*
-         * wait for mount completion. This is actually not need, because
-         * USERMODEHELPER() returns only when usermode process finishes. But we
-         * doing this just for case USERMODEHELPER() semanthics will be changed
-         * or usermode upcall program will start mounting in backgound and
-         * return instantly. --umka
+         * waiting for dentry become mount point GNS_WAIT_ATTEMPTS times by 1
+         * second.
          */
-        if (ll_gns_wait_for_mount(dentry, 1, GNS_WAIT_ATTEMPTS)) {
+        rc = ll_gns_wait_for_mount(dentry, 1, GNS_WAIT_ATTEMPTS);
+        LASSERT(sbi->ll_gns_state == LL_GNS_FINISHED);
+       
+       /* checking for mount point anyway to not loss mounts */
+        if (d_mountpoint(dentry)) {
                 struct dentry *rdentry;
                 struct vfsmount *rmnt;
-                
-                /* mount is successful */
-                LASSERT(sbi->ll_gns_state == LL_GNS_FINISHED);
-
+               
                 rmnt = mntget(mnt);
                 rdentry = dget(dentry);
                 
@@ -265,34 +317,35 @@ int ll_gns_mount_object(struct dentry *dentry,
                         mntput(mnt);
                         dput(dentry);
                 }
-                spin_lock(&dentry->d_lock);
-                dentry->d_flags &= ~DCACHE_GNS_PENDING;
-                spin_unlock(&dentry->d_lock);
+               
+               rc = 0;
         } else {
-                CERROR("usermode upcall %s failed to mount %s\n",
-                       sbi->ll_gns_upcall, path);
-                rc = -ETIME;
+                CERROR("usermode upcall %s failed to mount %s, err %d\n",
+                       sbi->ll_gns_upcall, path, rc);
         }
-
+                
         EXIT;
 cleanup:
         switch (cleanup_phase) {
+        case 4:
+                free_page((unsigned long)datapage);
         case 3:
-                kunmap(datapage);
-                page_cache_release(datapage);
-        case 2:
                 if (mntinfo_fd != NULL)
                         fput(mntinfo_fd);
+        case 2:
+                if (dchild != NULL)
+                        dput(dchild);
         case 1:
                 free_page((unsigned long)pathpage);
         case 0:
                 spin_lock(&sbi->ll_gns_lock);
                 sbi->ll_gns_state = LL_GNS_IDLE;
+               sbi->ll_gns_pending_dentry = NULL;
                 spin_unlock(&sbi->ll_gns_lock);
 
-                spin_lock(&dentry->d_lock);
-                dentry->d_flags &= ~DCACHE_GNS_MOUNTING;
-                spin_unlock(&dentry->d_lock);
+                /* waking up all waiters after GNS state is LL_GNS_IDLE */
+                complete_all(&sbi->ll_gns_mount_finished);
+                init_completion(&sbi->ll_gns_mount_finished);
         }
         return rc;
 }
@@ -362,7 +415,7 @@ int ll_gns_check_mounts(struct ll_sb_info *sbi, int flags)
 }
 
 /*
- * GNS timer callback function. It restarts gns timer and wakes up GNS cvontrol
+ * GNS timer callback function. It restarts gns timer and wakes up GNS control
  * thread to process mounts list.
  */
 void ll_gns_timer_callback(unsigned long data)
@@ -392,7 +445,7 @@ static int inline ll_gns_check_event(void)
         return rc;
 }
 
-/* should we staop GNS control thread? */
+/* should we stop GNS control thread? */
 static int inline ll_gns_check_stop(void)
 {
         mb();
@@ -447,10 +500,6 @@ static int ll_gns_thread_main(void *arg)
                 spin_unlock(&gns_lock);
         }
 
-        /* 
-         * letting know stop function know that thread is stoped and it may
-         * return.
-         */
         EXIT;
         gns_thread.t_flags = SVC_STOPPED;