Whamcloud - gitweb
- added comment in mds_open() and GNS mount points.
[fs/lustre-release.git] / lustre / llite / llite_gns.c
index d3ae81c..a8e73c3 100644 (file)
@@ -1,8 +1,12 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- *   Author: Phil Schwan <phil@clusterfs.com>
+ * Copyright (C) 2004, 2005 Cluster File Systems, Inc.
+ *
+ * Author: Phil Schwan <phil@clusterfs.com>
+ * Author: Oleg Drokin <green@clusterfs.com>
+ * Author: Yury Umanets <yury@clusterfs.com>
+ * Review: Nikita Danilov <nikita@clusterfs.com>
  *
  *   This file is part of Lustre, http://www.lustre.org.
  *
@@ -46,46 +50,39 @@ ll_gns_wait_for_mount(struct dentry *dentry,
 {
         struct l_wait_info lwi;
         struct ll_sb_info *sbi;
-        int rc;
         ENTRY;
 
         LASSERT(dentry != NULL);
         LASSERT(!IS_ERR(dentry));
         sbi = ll_s2sbi(dentry->d_sb);
         
-        for (; !d_mountpoint(dentry) && tries > 0; tries--) {
-                lwi = LWI_TIMEOUT(timeout * HZ, NULL, NULL);
+        lwi = LWI_TIMEOUT(timeout * HZ, NULL, NULL);
+        for (; !d_mountpoint(dentry) && tries > 0; tries--)
                 l_wait_event(sbi->ll_gns_waitq, d_mountpoint(dentry), &lwi);
-        }
 
-        if ((rc = d_mountpoint(dentry) ? 1 : 0)) {
+        if (d_mountpoint(dentry)) {
                 spin_lock(&sbi->ll_gns_lock);
-                LASSERT(sbi->ll_gns_state == LL_GNS_MOUNTING);
                 sbi->ll_gns_state = LL_GNS_FINISHED;
                 spin_unlock(&sbi->ll_gns_lock);
+                RETURN(0);
         }
-
-        complete(&sbi->ll_gns_mount_finished);
-        RETURN(rc);
+        RETURN(-ETIME);
 }
 
 /*
  * tries to mount the mount object under passed @dentry. In the case of success
- * @dentry will become mount point and 0 will be retuned. Error code will be
+ * @dentry will become mount point and 0 will be returned. Error code will be
  * returned otherwise.
  */
-int ll_gns_mount_object(struct dentry *dentry,
-                        struct vfsmount *mnt)
+int
+ll_gns_mount_object(struct dentry *dentry, struct vfsmount *mnt)
 {
         struct ll_dentry_data *lld = dentry->d_fsdata;
-        char *p, *path, *pathpage, *argv[4];
+        char *path, *pathpage, *datapage, *argv[4];
         struct file *mntinfo_fd = NULL;
-        struct address_space *mapping;
         int cleanup_phase = 0, rc = 0;
         struct ll_sb_info *sbi;
         struct dentry *dchild;
-        struct page *datapage;
-        filler_t *filler;
         ENTRY;
 
         if (mnt == NULL) {
@@ -103,23 +100,18 @@ int ll_gns_mount_object(struct dentry *dentry,
         sbi = ll_i2sbi(dentry->d_inode);
         LASSERT(sbi != NULL);
 
-        /* another thead is in progress of mouning some entry */
+        /* 
+         * another thead is in progress or just finished mounting the
+         * dentry. Handling that.
+         */
         spin_lock(&sbi->ll_gns_lock);
-        if (sbi->ll_gns_state == LL_GNS_MOUNTING) {
+        if (sbi->ll_gns_state == LL_GNS_MOUNTING ||
+            sbi->ll_gns_state == LL_GNS_FINISHED) {
                 spin_unlock(&sbi->ll_gns_lock);
-
-                wait_for_completion(&sbi->ll_gns_mount_finished);
-                if (d_mountpoint(dentry))
-                        RETURN(0);
-        }
-
-        /* another thread mounted it already */
-        if (sbi->ll_gns_state == LL_GNS_FINISHED) {
-                spin_unlock(&sbi->ll_gns_lock);
-
-                /* we lost a race; just return */
-                if (d_mountpoint(dentry))
-                        RETURN(0);
+                CDEBUG(D_INODE, "GNS is in progress now, throwing "
+                       "-ERESTARTSYS to restart syscall and let "
+                       "it finish.\n");
+                RETURN(-ERESTARTSYS);
         }
         LASSERT(sbi->ll_gns_state == LL_GNS_IDLE);
 
@@ -145,28 +137,51 @@ int ll_gns_mount_object(struct dentry *dentry,
                 GOTO(cleanup, rc = PTR_ERR(dchild));
         }
 
-        /* sychronizing with possible /proc/fs/...write */
+        /* synchronizing with possible /proc/fs/...write */
         down(&sbi->ll_gns_sem);
         
         /* 
          * mount object name is taken from sbi, where it is set in mount time or
          * via /proc/fs... tunable. It may be ".mntinfo" or so.
          */
-        dchild = ll_d_lookup(sbi->ll_gns_oname, dentry,
-                             strlen(sbi->ll_gns_oname));
+
+        /* 
+         * FIXME: lookup_one_len() requires dentry->d_inode->i_sem to be locked,
+         * but we can't use ll_lookup_one_len() as this function is called from
+         * different contol paths and some of them take dentry->d_inode->i_sem
+         * and others do not.
+         */
+        dchild = lookup_one_len(sbi->ll_gns_oname, dentry,
+                                strlen(sbi->ll_gns_oname));
         up(&sbi->ll_gns_sem);
 
-        if (!dchild)
-                GOTO(cleanup, rc = -ENOENT);
+        cleanup_phase = 2;
         
         if (IS_ERR(dchild)) {
+                rc = PTR_ERR(dchild);
+                
+                if (rc == -ERESTARTSYS) {
+                        CDEBUG(D_INODE, "possible endless loop is detected "
+                               "due to mount object is directory marked by "
+                               "SUID bit.\n");
+                        GOTO(cleanup, rc = -ELOOP);
+                }
+
                 CERROR("can't find mount object %*s/%*s err = %d.\n",
                        (int)dentry->d_name.len, dentry->d_name.name,
-                       (int)dchild->d_name.len, dchild->d_name.name,
-                       (int)PTR_ERR(dchild));
-                GOTO(cleanup, rc = PTR_ERR(dchild));
+                       strlen(sbi->ll_gns_oname), sbi->ll_gns_oname,
+                       rc);
+                GOTO(cleanup, rc);
         }
 
+        /* mount object is not found */
+        if (!dchild->d_inode)
+                GOTO(cleanup, rc = -ENOENT);
+
+        /* check if found child is regular file */
+        if (!S_ISREG(dchild->d_inode->i_mode))
+                GOTO(cleanup, rc = -EOPNOTSUPP);
+
         mntget(mnt);
 
         /* ok, mount object if found, opening it. */
@@ -174,43 +189,44 @@ int ll_gns_mount_object(struct dentry *dentry,
         if (IS_ERR(mntinfo_fd)) {
                 CERROR("can't open mount object %*s/%*s err = %d.\n",
                        (int)dentry->d_name.len, dentry->d_name.name,
-                       (int)dchild->d_name.len, dchild->d_name.name,
+                       strlen(sbi->ll_gns_oname), sbi->ll_gns_oname,
                        (int)PTR_ERR(mntinfo_fd));
-                dput(dchild);
                 mntput(mnt);
                 GOTO(cleanup, rc = PTR_ERR(mntinfo_fd));
         }
-        cleanup_phase = 2;
+        cleanup_phase = 3;
 
         if (mntinfo_fd->f_dentry->d_inode->i_size > PAGE_SIZE) {
                 CERROR("mount object %*s/%*s is too big (%Ld)\n",
                        (int)dentry->d_name.len, dentry->d_name.name,
-                       (int)dchild->d_name.len, dchild->d_name.name,
+                       strlen(sbi->ll_gns_oname), sbi->ll_gns_oname,
                        mntinfo_fd->f_dentry->d_inode->i_size);
                 GOTO(cleanup, rc = -EFBIG);
         }
 
+        datapage = (char *)__get_free_page(GFP_KERNEL);
+        if (!datapage)
+                GOTO(cleanup, rc = -ENOMEM);
+
+        cleanup_phase = 4;
+        
         /* read data from mount object. */
-        mapping = mntinfo_fd->f_dentry->d_inode->i_mapping;
-        filler = (filler_t *)mapping->a_ops->readpage;
-        datapage = read_cache_page(mapping, 0, filler,
-                                   mntinfo_fd);
-        if (IS_ERR(datapage)) {
-                CERROR("can't read data from mount object %*s/%*s\n",
+        rc = kernel_read(mntinfo_fd, 0, datapage, PAGE_SIZE);
+        if (rc < 0) {
+                CERROR("can't read mount object %*s/%*s data, err %d\n",
                        (int)dentry->d_name.len, dentry->d_name.name,
-                       (int)dchild->d_name.len, dchild->d_name.name);
-                GOTO(cleanup, rc = PTR_ERR(datapage));
+                       strlen(sbi->ll_gns_oname), sbi->ll_gns_oname,
+                       rc);
+                GOTO(cleanup, rc);
         }
 
-        p = kmap(datapage);
-        LASSERT(p != NULL);
-        p[PAGE_SIZE - 1] = '\0';
-        cleanup_phase = 3;
+        datapage[PAGE_SIZE - 1] = '\0';
 
         fput(mntinfo_fd);
         mntinfo_fd = NULL;
+        dchild = NULL;
 
-        /* sychronizing with possible /proc/fs/...write */
+        /* synchronizing with possible /proc/fs/...write */
         down(&sbi->ll_gns_sem);
 
         /*
@@ -218,7 +234,7 @@ int ll_gns_mount_object(struct dentry *dentry,
          * may be /usr/lib/lustre/gns-upcall.sh
          */
         argv[0] = sbi->ll_gns_upcall;
-        argv[1] = p;
+        argv[1] = datapage;
         argv[2] = path;
         argv[3] = NULL;
         
@@ -234,11 +250,13 @@ int ll_gns_mount_object(struct dentry *dentry,
         /*
          * wait for mount completion. This is actually not need, because
          * USERMODEHELPER() returns only when usermode process finishes. But we
-         * doing this just for case USERMODEHELPER() semanthics will be changed
+         * doing this just for case USERMODEHELPER() semantics will be changed
          * or usermode upcall program will start mounting in backgound and
          * return instantly. --umka
          */
-        if (ll_gns_wait_for_mount(dentry, 1, GNS_WAIT_ATTEMPTS)) {
+        rc = ll_gns_wait_for_mount(dentry, 1, GNS_WAIT_ATTEMPTS);
+        complete_all(&sbi->ll_gns_mount_finished);
+        if (rc == 0) {
                 struct dentry *rdentry;
                 struct vfsmount *rmnt;
                 
@@ -269,22 +287,29 @@ int ll_gns_mount_object(struct dentry *dentry,
                 dentry->d_flags &= ~DCACHE_GNS_PENDING;
                 spin_unlock(&dentry->d_lock);
         } else {
-                CERROR("usermode upcall %s failed to mount %s\n",
-                       sbi->ll_gns_upcall, path);
-                rc = -ETIME;
+                CERROR("usermode upcall %s failed to mount %s, err %d\n",
+                       sbi->ll_gns_upcall, path, rc);
         }
-
+                
         EXIT;
 cleanup:
         switch (cleanup_phase) {
+        case 4:
+                free_page((unsigned long)datapage);
         case 3:
-                kunmap(datapage);
-                page_cache_release(datapage);
-        case 2:
                 if (mntinfo_fd != NULL)
                         fput(mntinfo_fd);
+        case 2:
+                if (dchild != NULL)
+                        dput(dchild);
         case 1:
                 free_page((unsigned long)pathpage);
+                
+                /* 
+                 * waking up all waiters after gns state is set to
+                 * LL_GNS_MOUNTING
+                 */
+                complete_all(&sbi->ll_gns_mount_finished);
         case 0:
                 spin_lock(&sbi->ll_gns_lock);
                 sbi->ll_gns_state = LL_GNS_IDLE;
@@ -362,7 +387,7 @@ int ll_gns_check_mounts(struct ll_sb_info *sbi, int flags)
 }
 
 /*
- * GNS timer callback function. It restarts gns timer and wakes up GNS cvontrol
+ * GNS timer callback function. It restarts gns timer and wakes up GNS control
  * thread to process mounts list.
  */
 void ll_gns_timer_callback(unsigned long data)
@@ -392,7 +417,7 @@ static int inline ll_gns_check_event(void)
         return rc;
 }
 
-/* should we staop GNS control thread? */
+/* should we stop GNS control thread? */
 static int inline ll_gns_check_stop(void)
 {
         mb();
@@ -447,10 +472,6 @@ static int ll_gns_thread_main(void *arg)
                 spin_unlock(&gns_lock);
         }
 
-        /* 
-         * letting know stop function know that thread is stoped and it may
-         * return.
-         */
         EXIT;
         gns_thread.t_flags = SVC_STOPPED;