Whamcloud - gitweb
b=19706
authorzhanghc <zhanghc>
Tue, 16 Jun 2009 10:49:14 +0000 (10:49 +0000)
committerzhanghc <zhanghc>
Tue, 16 Jun 2009 10:49:14 +0000 (10:49 +0000)
fix a deadlock bug being introduced from kernel 2.6.23

i=adilger@sun.com
i=johann@sun.com

lustre/autoconf/lustre-core.m4
lustre/include/lprocfs_status.h
lustre/obdclass/lprocfs_status.c

index 8281a12..d59b6a0 100644 (file)
@@ -1376,6 +1376,24 @@ LB_LINUX_TRY_COMPILE([
 ])
 ])
 
+# 2.6.23 add code to wait other users to complete before removing procfs entry
+AC_DEFUN([LC_PROCFS_USERS],
+[AC_MSG_CHECKING([if kernel has pde_users member in procfs entry struct])
+LB_LINUX_TRY_COMPILE([
+        #include <linux/proc_fs.h>
+],[
+        struct proc_dir_entry pde;
+
+        pde.pde_users   = 0;
+],[
+        AC_MSG_RESULT([yes])
+        AC_DEFINE(HAVE_PROCFS_USERS, 1, 
+                [kernel has pde_users member in procfs entry struct])
+],[
+        AC_MSG_RESULT([no])
+])
+])
+
 # 2.6.24 has bio_endio with 2 args
 AC_DEFUN([LC_BIO_ENDIO_2ARG],
 [AC_MSG_CHECKING([if kernel has bio_endio with 2 args])
@@ -1872,6 +1890,7 @@ AC_DEFUN([LC_PROG_LINUX],
           LC_HAVE_EXPORTFS_H
           LC_VM_OP_FAULT
           LC_REGISTER_SHRINKER
+          LC_PROCFS_USERS
 
           # 2.6.25
           LC_MAPPING_CAP_WRITEBACK_DIRTY
index 70b6fb1..7106e91 100644 (file)
@@ -532,6 +532,17 @@ extern int lprocfs_seq_release(struct inode *, struct file *);
 
 /* in lprocfs_stat.c, to protect the private data for proc entries */
 extern struct rw_semaphore _lprocfs_lock;
+
+/* to begin from 2.6.23, Linux defines self file_operations (proc_reg_file_ops)
+ * in procfs, the proc file_operation defined by Lustre (lprocfs_generic_fops)
+ * will be wrapped into the new defined proc_reg_file_ops, which instroduces 
+ * user count in proc_dir_entrey(pde_users) to protect the proc entry from 
+ * being deleted. then the protection lock (_lprocfs_lock) defined by Lustre
+ * isn't necessary anymore for lprocfs_generic_fops(e.g. lprocfs_fops_read).
+ * see bug19706 for detailed information.
+ */
+#ifndef HAVE_PROCFS_USERS
+
 #define LPROCFS_ENTRY()           do {  \
         down_read(&_lprocfs_lock);      \
 } while(0)
@@ -539,7 +550,18 @@ extern struct rw_semaphore _lprocfs_lock;
         up_read(&_lprocfs_lock);        \
 } while(0)
 
+#else
+
+#define LPROCFS_ENTRY()
+#define LPROCFS_EXIT()
+#endif
+
 #ifdef HAVE_PROCFS_DELETED
+
+#ifdef HAVE_PROCFS_USERS
+#error proc_dir_entry->deleted is conflicted with proc_dir_entry->pde_users
+#endif
+
 #define LPROCFS_ENTRY_AND_CHECK(dp) do {        \
         typecheck(struct proc_dir_entry *, dp); \
         LPROCFS_ENTRY();                        \
@@ -549,13 +571,38 @@ extern struct rw_semaphore _lprocfs_lock;
         }                                       \
 } while(0)
 #define LPROCFS_CHECK_DELETED(dp) ((dp)->deleted)
-#else
 
+#elif HAVE_PROCFS_USERS
+
+#define LPROCFS_CHECK_DELETED(dp) ({            \
+        int deleted = 0;                        \
+        spin_lock(&(dp)->pde_unload_lock);      \
+        if (dp->proc_fops == NULL)              \
+                deleted = 1;                    \
+        spin_unlock(&(dp)->pde_unload_lock);    \
+        deleted;                                \
+})
+             
+#define LPROCFS_ENTRY_AND_CHECK(dp) do {        \
+        if (LPROCFS_CHECK_DELETED(dp))          \
+                return -ENODEV;                 \
+} while(0)
+
+#else
+        
 #define LPROCFS_ENTRY_AND_CHECK(dp) \
         LPROCFS_ENTRY();
 #define LPROCFS_CHECK_DELETED(dp) (0)
 #endif
 
+#define LPROCFS_SRCH_ENTRY()      do {  \
+        down_read(&_lprocfs_lock);      \
+} while(0)
+
+#define LPROCFS_SRCH_EXIT()       do {  \
+        up_read(&_lprocfs_lock);        \
+} while(0)
+
 #define LPROCFS_WRITE_ENTRY()     do {  \
         down_write(&_lprocfs_lock);     \
 } while(0)
index 59f981d..d4ab8c0 100644 (file)
@@ -74,17 +74,17 @@ struct proc_dir_entry *lprocfs_srch(struct proc_dir_entry *head,
         if (head == NULL)
                 return NULL;
 
-        LPROCFS_ENTRY();
+        LPROCFS_SRCH_ENTRY();
         temp = head->subdir;
         while (temp != NULL) {
                 if (strcmp(temp->name, name) == 0) {
-                        LPROCFS_EXIT();
+                        LPROCFS_SRCH_EXIT();
                         return temp;
                 }
 
                 temp = temp->next;
         }
-        LPROCFS_EXIT();
+        LPROCFS_SRCH_EXIT();
         return NULL;
 }
 
@@ -351,9 +351,23 @@ void lprocfs_remove(struct proc_dir_entry **rooth)
                          "0x%p  %s/%s len %d\n", rm_entry, temp->name,
                          rm_entry->name, (int)strlen(rm_entry->name));
 
+#ifdef HAVE_PROCFS_USERS
+                /* if procfs uses user count to synchronize deletion of
+                 * proc entry, there is no protection for rm_entry->data,
+                 * then lprocfs_fops_read and lprocfs_fops_write maybe
+                 * call proc_dir_entry->read_proc (or write_proc) with
+                 * proc_dir_entry->data == NULL, then cause kernel Oops.
+                 * see bug19706 for detailed information */
+
+                /* procfs won't free rm_entry->data if it isn't a LINK,
+                 * and Lustre won't use rm_entry->data if it is a LINK */
+                if (S_ISLNK(rm_entry->mode))
+                        rm_entry->data = NULL;
+#else
                 /* Now, the rm_entry->deleted flags is protected
                  * by _lprocfs_lock. */
                 rm_entry->data = NULL;
+#endif
                 remove_proc_entry(rm_entry->name, temp);
                 if (temp == parent)
                         break;