Whamcloud - gitweb
- added revalidating inode as Alex adviced before checking its size in ll_gns_mount_o...
[fs/lustre-release.git] / lustre / llite / llite_gns.c
index f28c11e..d9affef 100644 (file)
@@ -50,6 +50,7 @@ ll_gns_wait_for_mount(struct dentry *dentry,
 {
         struct l_wait_info lwi;
         struct ll_sb_info *sbi;
+       int rc = 0;
         ENTRY;
 
         LASSERT(dentry != NULL);
@@ -57,16 +58,21 @@ ll_gns_wait_for_mount(struct dentry *dentry,
         sbi = ll_s2sbi(dentry->d_sb);
         
         lwi = LWI_TIMEOUT(timeout * HZ, NULL, NULL);
-        for (; !d_mountpoint(dentry) && tries > 0; tries--)
+        for (; !d_mountpoint(dentry) && tries > 0; tries--) {
                 l_wait_event(sbi->ll_gns_waitq, d_mountpoint(dentry), &lwi);
-
-        if (d_mountpoint(dentry)) {
-                spin_lock(&sbi->ll_gns_lock);
-                sbi->ll_gns_state = LL_GNS_FINISHED;
-                spin_unlock(&sbi->ll_gns_lock);
-                RETURN(0);
-        }
-        RETURN(-ETIME);
+               if (signal_pending(current))
+                       GOTO(out, rc = -EINTR);
+       }
+
+        if (!d_mountpoint(dentry))
+               rc = -ETIME;
+       
+       EXIT;
+out:   
+        spin_lock(&sbi->ll_gns_lock);
+        sbi->ll_gns_state = LL_GNS_FINISHED;
+        spin_unlock(&sbi->ll_gns_lock);
+        return rc;
 }
 
 /*
@@ -77,50 +83,84 @@ ll_gns_wait_for_mount(struct dentry *dentry,
 int
 ll_gns_mount_object(struct dentry *dentry, struct vfsmount *mnt)
 {
-        struct ll_dentry_data *lld = dentry->d_fsdata;
-        char *path, *pathpage, *datapage, *argv[4];
+        char *path, *pathpage, *datapage = NULL, *argv[4];
         struct file *mntinfo_fd = NULL;
         int cleanup_phase = 0, rc = 0;
         struct ll_sb_info *sbi;
-        struct dentry *dchild;
+        struct dentry *dchild = NULL;
         ENTRY;
 
+        LASSERT(dentry->d_inode != NULL);
+
+        if (!S_ISDIR(dentry->d_inode->i_mode))
+                RETURN(-EINVAL);
+
+        sbi = ll_i2sbi(dentry->d_inode);
+        
         if (mnt == NULL) {
                 CERROR("suid directory found, but no "
                        "vfsmount available.\n");
                 RETURN(-EINVAL);
         }
 
-        CDEBUG(D_INODE, "mounting dentry %p\n", dentry);
-
-        LASSERT(dentry->d_inode != NULL);
-        LASSERT(S_ISDIR(dentry->d_inode->i_mode));
-        LASSERT(lld != NULL);
-        
-        sbi = ll_i2sbi(dentry->d_inode);
-        LASSERT(sbi != NULL);
+        if (atomic_read(&sbi->ll_gns_enabled) == 0)
+                RETURN(-EINVAL);
 
+        spin_lock(&sbi->ll_gns_lock);
+       
         /* 
          * another thead is in progress or just finished mounting the
          * dentry. Handling that.
          */
-        spin_lock(&sbi->ll_gns_lock);
-        if (sbi->ll_gns_state == LL_GNS_MOUNTING ||
-            sbi->ll_gns_state == LL_GNS_FINISHED) {
+        if (sbi->ll_gns_state != LL_GNS_IDLE) {
+                /* 
+                 * another thread is trying to mount GNS dentry. We'd like to
+                 * handling that.
+                 */
+                spin_unlock(&sbi->ll_gns_lock);
+
+        restart:
+                /* 
+                 * check if dentry is mount point already, if so, do not restart
+                 * syscal.
+                 */
+                if (d_mountpoint(dentry))
+                        RETURN(0);
+
+                spin_lock(&sbi->ll_gns_lock);
+               if (sbi->ll_gns_pending_dentry && 
+                   is_subdir(sbi->ll_gns_pending_dentry, dentry)) {
+                       spin_unlock(&sbi->ll_gns_lock);
+                       RETURN(-EAGAIN);
+               }
+                spin_unlock(&sbi->ll_gns_lock);
+
+                /* 
+                 * waiting for GNS complete and check dentry again, it may be
+                 * mounted already.
+                 */
+                wait_for_completion(&sbi->ll_gns_mount_finished);
+                if (d_mountpoint(dentry))
+                        RETURN(0);
+
+                /* 
+                 * check for he case when there are few waiters and all they are
+                 * awakened, but only one will find GNS state LL_GNS_IDLE, and
+                 * the rest will face with LL_GNS_MOUNTING.  --umka
+                 */
+                spin_lock(&sbi->ll_gns_lock);
+                if (sbi->ll_gns_state != LL_GNS_IDLE) {
+                        spin_unlock(&sbi->ll_gns_lock);
+                        goto restart;
+                }
                 spin_unlock(&sbi->ll_gns_lock);
-                CDEBUG(D_INODE, "GNS is in progress now, throwing "
-                       "-ERESTARTSYS to restart syscall and let "
-                       "it finish.\n");
-                RETURN(-ERESTARTSYS);
         }
         LASSERT(sbi->ll_gns_state == LL_GNS_IDLE);
+        CDEBUG(D_INODE, "mounting dentry %p\n", dentry);
 
-        spin_lock(&dentry->d_lock);
-        dentry->d_flags |= DCACHE_GNS_MOUNTING;
-        spin_unlock(&dentry->d_lock);
-        
         /* mounting started */
         sbi->ll_gns_state = LL_GNS_MOUNTING;
+       sbi->ll_gns_pending_dentry = dentry;
         spin_unlock(&sbi->ll_gns_lock);
 
         /* we need to build an absolute pathname to pass to mount */
@@ -133,8 +173,8 @@ ll_gns_mount_object(struct dentry *dentry, struct vfsmount *mnt)
         path = d_path(dentry, mnt, pathpage, PAGE_SIZE);
         if (IS_ERR(path)) {
                 CERROR("can't build mount object path, err %d\n",
-                       (int)PTR_ERR(dchild));
-                GOTO(cleanup, rc = PTR_ERR(dchild));
+                       (int)PTR_ERR(path));
+                GOTO(cleanup, rc = PTR_ERR(path));
         }
 
         /* synchronizing with possible /proc/fs/...write */
@@ -146,62 +186,62 @@ ll_gns_mount_object(struct dentry *dentry, struct vfsmount *mnt)
          */
 
         /* 
-         * FIXME: lookup_one_len() requires dentry->d_inode->i_sem to be locked,
-         * but we can't use ll_lookup_one_len() as this function is called from
-         * different contol paths and some of them take dentry->d_inode->i_sem
-         * and others do not.
+         * recursive lookup with trying to mount SUID bit marked directories on
+         * the way is not possible here, as lookup_one_len() does not pass @nd
+         * to ->lookup() and this is checked in ll_lookup_it().
          */
-        dchild = lookup_one_len(sbi->ll_gns_oname, dentry,
-                                strlen(sbi->ll_gns_oname));
+        dchild = ll_lookup_one_len(sbi->ll_gns_oname, dentry,
+                                   strlen(sbi->ll_gns_oname));
         up(&sbi->ll_gns_sem);
 
-        if (!dchild)
-                GOTO(cleanup, rc = -ENOENT);
-        
         if (IS_ERR(dchild)) {
                 rc = PTR_ERR(dchild);
-                
-                if (rc == -ERESTARTSYS) {
-                        CDEBUG(D_INODE, "possible endless loop is detected "
-                               "due to mount object is directory marked by "
-                               "SUID bit.\n");
-                        GOTO(cleanup, rc = -ELOOP);
-                }
-
                 CERROR("can't find mount object %*s/%*s err = %d.\n",
                        (int)dentry->d_name.len, dentry->d_name.name,
-                       (int)dchild->d_name.len, dchild->d_name.name,
+                       strlen(sbi->ll_gns_oname), sbi->ll_gns_oname,
                        rc);
                 GOTO(cleanup, rc);
         }
 
         /* mount object is not found */
-        if (!dchild->d_inode)
+        if (!dchild->d_inode) {
+                dput(dchild);
                 GOTO(cleanup, rc = -ENOENT);
+        }
 
         /* check if found child is regular file */
-        if (!S_ISREG(dchild->d_inode->i_mode))
-                GOTO(cleanup, rc = -EOPNOTSUPP);
-
-        mntget(mnt);
+        if (!S_ISREG(dchild->d_inode->i_mode)) {
+                dput(dchild);
+                GOTO(cleanup, rc = -EBADF);
+        }
 
         /* ok, mount object if found, opening it. */
-        mntinfo_fd = dentry_open(dchild, mnt, 0);
+        mntinfo_fd = dentry_open(dchild, mntget(mnt), 0);
         if (IS_ERR(mntinfo_fd)) {
                 CERROR("can't open mount object %*s/%*s err = %d.\n",
                        (int)dentry->d_name.len, dentry->d_name.name,
-                       (int)dchild->d_name.len, dchild->d_name.name,
+                       strlen(sbi->ll_gns_oname), sbi->ll_gns_oname,
                        (int)PTR_ERR(mntinfo_fd));
-                dput(dchild);
                 mntput(mnt);
+                dput(dchild);
                 GOTO(cleanup, rc = PTR_ERR(mntinfo_fd));
         }
         cleanup_phase = 2;
 
-        if (mntinfo_fd->f_dentry->d_inode->i_size > PAGE_SIZE) {
+        /* make sure that inode size is up-to-date */
+        rc = ll_inode_revalidate_it(mntinfo_fd->f_dentry);
+        if (rc < 0) {
+                CERROR("can't revalidate mount object %*s/%*s, err %d\n",
+                       (int)dentry->d_name.len, dentry->d_name.name,
+                       strlen(sbi->ll_gns_oname), sbi->ll_gns_oname,
+                       rc);
+                GOTO(cleanup, rc);
+        }
+
+        if (mntinfo_fd->f_dentry->d_inode->i_size > PAGE_SIZE - 1) {
                 CERROR("mount object %*s/%*s is too big (%Ld)\n",
                        (int)dentry->d_name.len, dentry->d_name.name,
-                       (int)dchild->d_name.len, dchild->d_name.name,
+                       strlen(sbi->ll_gns_oname), sbi->ll_gns_oname,
                        mntinfo_fd->f_dentry->d_inode->i_size);
                 GOTO(cleanup, rc = -EFBIG);
         }
@@ -213,19 +253,27 @@ ll_gns_mount_object(struct dentry *dentry, struct vfsmount *mnt)
         cleanup_phase = 3;
         
         /* read data from mount object. */
-        rc = kernel_read(mntinfo_fd, 0, datapage, PAGE_SIZE);
+        rc = kernel_read(mntinfo_fd, 0, datapage, PAGE_SIZE - 1);
         if (rc < 0) {
                 CERROR("can't read mount object %*s/%*s data, err %d\n",
                        (int)dentry->d_name.len, dentry->d_name.name,
-                       (int)dchild->d_name.len, dchild->d_name.name,
+                       strlen(sbi->ll_gns_oname), sbi->ll_gns_oname,
                        rc);
                 GOTO(cleanup, rc);
         }
 
-        datapage[PAGE_SIZE - 1] = '\0';
+        /* no data in mount object? */
+        if (rc == 0) {
+                CERROR("mount object %*s/%*s is empty?\n",
+                       (int)dentry->d_name.len, dentry->d_name.name,
+                       strlen(sbi->ll_gns_oname), sbi->ll_gns_oname);
+                GOTO(cleanup, rc);
+        }
 
+        datapage[rc] = '\0';
         fput(mntinfo_fd);
         mntinfo_fd = NULL;
+        dchild = NULL;
 
         /* synchronizing with possible /proc/fs/...write */
         down(&sbi->ll_gns_sem);
@@ -241,30 +289,25 @@ ll_gns_mount_object(struct dentry *dentry, struct vfsmount *mnt)
         
         up(&sbi->ll_gns_sem);
 
-        rc = USERMODEHELPER(argv[0], argv, NULL);
+        /* do not wait for helper complete here. */
+        rc = call_usermodehelper(argv[0], argv, NULL, 1);
         if (rc) {
-                CERROR("failed to call GNS upcall %s, err = %d\n",
-                       sbi->ll_gns_upcall, rc);
-                GOTO(cleanup, rc);
+                CWARN("failed to call GNS upcall %s, err = %d, "
+                      "checking for mount anyway\n", sbi->ll_gns_upcall, rc);
         }
 
         /*
-         * wait for mount completion. This is actually not need, because
-         * USERMODEHELPER() returns only when usermode process finishes. But we
-         * doing this just for case USERMODEHELPER() semantics will be changed
-         * or usermode upcall program will start mounting in backgound and
-         * return instantly. --umka
+         * waiting for dentry become mount point GNS_WAIT_ATTEMPTS times by 1
+         * second.
          */
-
         rc = ll_gns_wait_for_mount(dentry, 1, GNS_WAIT_ATTEMPTS);
-        complete_all(&sbi->ll_gns_mount_finished);
-        if (rc == 0) {
+        LASSERT(sbi->ll_gns_state == LL_GNS_FINISHED);
+       
+       /* checking for mount point anyway to not loss mounts */
+        if (d_mountpoint(dentry)) {
                 struct dentry *rdentry;
                 struct vfsmount *rmnt;
-                
-                /* mount is successful */
-                LASSERT(sbi->ll_gns_state == LL_GNS_FINISHED);
-
+               
                 rmnt = mntget(mnt);
                 rdentry = dget(dentry);
                 
@@ -285,9 +328,8 @@ ll_gns_mount_object(struct dentry *dentry, struct vfsmount *mnt)
                         mntput(mnt);
                         dput(dentry);
                 }
-                spin_lock(&dentry->d_lock);
-                dentry->d_flags &= ~DCACHE_GNS_PENDING;
-                spin_unlock(&dentry->d_lock);
+               
+               rc = 0;
         } else {
                 CERROR("usermode upcall %s failed to mount %s, err %d\n",
                        sbi->ll_gns_upcall, path, rc);
@@ -299,24 +341,21 @@ cleanup:
         case 3:
                 free_page((unsigned long)datapage);
         case 2:
-                if (mntinfo_fd != NULL)
+                if (mntinfo_fd != NULL) {
                         fput(mntinfo_fd);
+                        dchild = NULL;
+                }
         case 1:
                 free_page((unsigned long)pathpage);
-                
-                /* 
-                 * waking up all waiters after gns state is set to
-                 * LL_GNS_MOUNTING
-                 */
-                complete_all(&sbi->ll_gns_mount_finished);
         case 0:
                 spin_lock(&sbi->ll_gns_lock);
                 sbi->ll_gns_state = LL_GNS_IDLE;
+               sbi->ll_gns_pending_dentry = NULL;
                 spin_unlock(&sbi->ll_gns_lock);
 
-                spin_lock(&dentry->d_lock);
-                dentry->d_flags &= ~DCACHE_GNS_MOUNTING;
-                spin_unlock(&dentry->d_lock);
+                /* waking up all waiters after GNS state is LL_GNS_IDLE */
+                complete_all(&sbi->ll_gns_mount_finished);
+                init_completion(&sbi->ll_gns_mount_finished);
         }
         return rc;
 }
@@ -424,7 +463,7 @@ static int inline ll_gns_check_stop(void)
 }
 
 /* GNS control thread function. */
-static int ll_gns_thread_main(void *arg)
+static int ll_gns_thread(void *arg)
 {
         struct ll_gns_ctl *ctl = arg;
         unsigned long flags;
@@ -493,7 +532,7 @@ void ll_gns_del_timer(struct ll_sb_info *sbi)
  * starts GNS control thread and waits for a signal it is up and work may be
  * continued.
  */
-int ll_gns_start_thread(void)
+int ll_gns_thread_start(void)
 {
         int rc;
         ENTRY;
@@ -503,7 +542,7 @@ int ll_gns_start_thread(void)
         init_completion(&gns_ctl.gc_finishing);
         init_waitqueue_head(&gns_thread.t_ctl_waitq);
         
-        rc = kernel_thread(ll_gns_thread_main, &gns_ctl,
+        rc = kernel_thread(ll_gns_thread, &gns_ctl,
                            (CLONE_VM | CLONE_FILES));
         if (rc < 0) {
                 CERROR("cannot start GNS control thread, "
@@ -516,7 +555,7 @@ int ll_gns_start_thread(void)
 }
 
 /* stops GNS control thread and waits its actual stop. */
-void ll_gns_stop_thread(void)
+void ll_gns_thread_stop(void)
 {
         ENTRY;
         gns_thread.t_flags = SVC_STOPPING;