Whamcloud - gitweb
LU-4563 Fix unsafe userspace access in many proc files
[fs/lustre-release.git] / lustre / obdclass / linux / linux-module.c
index 4b6ca83..1642123 100644 (file)
@@ -1,6 +1,4 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
+/*
  * GPL HEADER START
  *
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -28,6 +26,8 @@
 /*
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
  */
 
 #define DEBUG_SUBSYSTEM S_CLASS
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
 
 #ifdef __KERNEL__
-#ifndef AUTOCONF_INCLUDED
-#include <linux/config.h> /* for CONFIG_PROC_FS */
-#endif
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/highmem.h>
 #include <asm/io.h>
 #include <asm/ioctls.h>
-#include <asm/system.h>
 #include <asm/poll.h>
 #include <asm/uaccess.h>
 #include <linux/miscdevice.h>
-#include <linux/smp_lock.h>
 #include <linux/seq_file.h>
 #else
 # include <liblustre.h>
@@ -86,7 +78,6 @@
 #include <lustre_ver.h>
 #include <lustre/lustre_build_version.h>
 #ifdef __KERNEL__
-#include <linux/lustre_version.h>
 
 int proc_version;
 
@@ -99,7 +90,7 @@ int obd_ioctl_getdata(char **buf, int *len, void *arg)
         int offset = 0;
         ENTRY;
 
-        err = cfs_copy_from_user(&hdr, (void *)arg, sizeof(hdr));
+       err = copy_from_user(&hdr, (void *)arg, sizeof(hdr));
         if ( err )
                 RETURN(err);
 
@@ -120,9 +111,11 @@ int obd_ioctl_getdata(char **buf, int *len, void *arg)
                 RETURN(-EINVAL);
         }
 
-        /* XXX allocate this more intelligently, using kmalloc when
-         * appropriate */
-        OBD_VMALLOC(*buf, hdr.ioc_len);
+        /* When there are lots of processes calling vmalloc on multi-core
+         * system, the high lock contention will hurt performance badly,
+         * obdfilter-survey is an example, which relies on ioctl. So we'd
+         * better avoid vmalloc on ioctl path. LU-66 */
+        OBD_ALLOC_LARGE(*buf, hdr.ioc_len);
         if (*buf == NULL) {
                 CERROR("Cannot allocate control buffer of len %d\n",
                        hdr.ioc_len);
@@ -131,15 +124,15 @@ int obd_ioctl_getdata(char **buf, int *len, void *arg)
         *len = hdr.ioc_len;
         data = (struct obd_ioctl_data *)*buf;
 
-        err = cfs_copy_from_user(*buf, (void *)arg, hdr.ioc_len);
+       err = copy_from_user(*buf, (void *)arg, hdr.ioc_len);
         if ( err ) {
-                OBD_VFREE(*buf, hdr.ioc_len);
+                OBD_FREE_LARGE(*buf, hdr.ioc_len);
                 RETURN(err);
         }
 
         if (obd_ioctl_is_invalid(data)) {
                 CERROR("ioctl not correctly formatted\n");
-                OBD_VFREE(*buf, hdr.ioc_len);
+                OBD_FREE_LARGE(*buf, hdr.ioc_len);
                 RETURN(-EINVAL);
         }
 
@@ -165,41 +158,40 @@ int obd_ioctl_getdata(char **buf, int *len, void *arg)
         EXIT;
         return 0;
 }
+EXPORT_SYMBOL(obd_ioctl_getdata);
 
 int obd_ioctl_popdata(void *arg, void *data, int len)
 {
-        int err;
+       int err;
 
-        err = cfs_copy_to_user(arg, data, len);
-        if (err)
-                err = -EFAULT;
-        return err;
+       err = copy_to_user(arg, data, len);
+       if (err)
+               err = -EFAULT;
+       return err;
 }
-
-EXPORT_SYMBOL(obd_ioctl_getdata);
 EXPORT_SYMBOL(obd_ioctl_popdata);
 
 /*  opening /dev/obd */
 static int obd_class_open(struct inode * inode, struct file * file)
 {
-        ENTRY;
+       ENTRY;
 
-        PORTAL_MODULE_USE;
-        RETURN(0);
+       try_module_get(THIS_MODULE);
+       RETURN(0);
 }
 
 /*  closing /dev/obd */
 static int obd_class_release(struct inode * inode, struct file * file)
 {
-        ENTRY;
+       ENTRY;
 
-        PORTAL_MODULE_UNUSE;
-        RETURN(0);
+       module_put(THIS_MODULE);
+       RETURN(0);
 }
 
 /* to control /dev/obd */
-static int obd_class_ioctl(struct inode *inode, struct file *filp,
-                           unsigned int cmd, unsigned long arg)
+static long obd_class_ioctl(struct file *filp, unsigned int cmd,
+                           unsigned long arg)
 {
         int err = 0;
         ENTRY;
@@ -217,14 +209,14 @@ static int obd_class_ioctl(struct inode *inode, struct file *filp,
 
 /* declare character device */
 static struct file_operations obd_psdev_fops = {
-        .owner   = THIS_MODULE,
-        .ioctl   = obd_class_ioctl,     /* ioctl */
-        .open    = obd_class_open,      /* open */
-        .release = obd_class_release,   /* release */
+       .owner          = THIS_MODULE,
+       .unlocked_ioctl = obd_class_ioctl, /* unlocked_ioctl */
+       .open           = obd_class_open,      /* open */
+       .release        = obd_class_release,   /* release */
 };
 
 /* modules setup */
-cfs_psdev_t obd_psdev = {
+struct miscdevice obd_psdev = {
         .minor = OBD_DEV_MINOR,
         .name  = OBD_DEV_NAME,
         .fops  = &obd_psdev_fops,
@@ -233,98 +225,115 @@ cfs_psdev_t obd_psdev = {
 #endif
 
 #ifdef LPROCFS
-int obd_proc_read_version(char *page, char **start, off_t off, int count,
-                          int *eof, void *data)
+static int obd_proc_version_seq_show(struct seq_file *m, void *v)
 {
-        *eof = 1;
-#ifdef HAVE_VFS_INTENT_PATCHES
-        return snprintf(page, count, "lustre: %s\nkernel: %u\nbuild:  %s\n",
-                        LUSTRE_VERSION_STRING, LUSTRE_KERNEL_VERSION,
-                        BUILD_VERSION);
-#else
-        return snprintf(page, count, "lustre: %s\nkernel: %s\nbuild:  %s\n",
-                        LUSTRE_VERSION_STRING, "patchless_client",
-                        BUILD_VERSION);
-#endif
+       return seq_printf(m, "lustre: %s\nkernel: %s\nbuild:  %s\n",
+                         LUSTRE_VERSION_STRING, "patchless_client",
+                         BUILD_VERSION);
 }
+LPROC_SEQ_FOPS_RO(obd_proc_version);
 
-int obd_proc_read_pinger(char *page, char **start, off_t off, int count,
-                         int *eof, void *data)
+static int obd_proc_pinger_seq_show(struct seq_file *m, void *v)
 {
-        *eof = 1;
-        return snprintf(page, count, "%s\n",
+       return seq_printf(m, "%s\n",
 #ifdef ENABLE_PINGER
-                        "on"
+                            "on"
 #else
-                        "off"
+                            "off"
 #endif
-                       );
+                        );
 }
+LPROC_SEQ_FOPS_RO(obd_proc_pinger);
 
 /**
  * Check all obd devices health
  *
- * \param page
- * \param start
- * \param off
- * \param count
- * \param eof
- * \param data
- *                  proc read function parameters, please refer to kernel
- *                  code fs/proc/generic.c proc_file_read()
+ * \param seq_file
  * \param data [in] unused
  *
- * \retval number of characters printed
+ * \retval number of characters printed if healthy
  */
-static int obd_proc_read_health(char *page, char **start, off_t off,
-                                int count, int *eof, void *data)
+static int obd_proc_health_seq_show(struct seq_file *m, void *data)
 {
-        int rc = 0, i;
-        *eof = 1;
-
-        if (libcfs_catastrophe)
-                rc += snprintf(page + rc, count - rc, "LBUG\n");
-
-        cfs_spin_lock(&obd_dev_lock);
-        for (i = 0; i < class_devno_max(); i++) {
-                struct obd_device *obd;
-
-                obd = class_num2obd(i);
-                if (obd == NULL || !obd->obd_attached || !obd->obd_set_up)
-                        continue;
-
-                LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
-                if (obd->obd_stopping)
-                        continue;
-
-                class_incref(obd, __FUNCTION__, cfs_current());
-                cfs_spin_unlock(&obd_dev_lock);
-
-                if (obd_health_check(obd)) {
-                        rc += snprintf(page + rc, count - rc,
-                                       "device %s reported unhealthy\n",
-                                       obd->obd_name);
-                }
-                class_decref(obd, __FUNCTION__, cfs_current());
-                cfs_spin_lock(&obd_dev_lock);
-        }
-        cfs_spin_unlock(&obd_dev_lock);
+       bool healthy = true;
+       int i;
+
+       if (libcfs_catastrophe)
+               seq_printf(m, "LBUG\n");
+
+       read_lock(&obd_dev_lock);
+       for (i = 0; i < class_devno_max(); i++) {
+               struct obd_device *obd;
+
+               obd = class_num2obd(i);
+               if (obd == NULL || !obd->obd_attached || !obd->obd_set_up)
+                       continue;
+
+               LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
+               if (obd->obd_stopping)
+                       continue;
+
+               class_incref(obd, __FUNCTION__, current);
+               read_unlock(&obd_dev_lock);
+
+               if (obd_health_check(NULL, obd)) {
+                       seq_printf(m, "device %s reported unhealthy\n",
+                                       obd->obd_name);
+                       healthy = false;
+               }
+               class_decref(obd, __FUNCTION__, current);
+               read_lock(&obd_dev_lock);
+       }
+       read_unlock(&obd_dev_lock);
+
+       if (healthy)
+               return seq_printf(m, "healthy\n");
+
+       seq_printf(m, "NOT HEALTHY\n");
+       return 0;
+}
+LPROC_SEQ_FOPS_RO(obd_proc_health);
 
-        if (rc == 0)
-                return snprintf(page, count, "healthy\n");
+static int obd_proc_jobid_var_seq_show(struct seq_file *m, void *v)
+{
+       return seq_printf(m, "%s\n", obd_jobid_var);
+}
+
+static ssize_t
+obd_proc_jobid_var_seq_write(struct file *file, const char __user *buffer,
+                            size_t count, loff_t *off)
+{
+       if (!count || count > JOBSTATS_JOBID_VAR_MAX_LEN)
+               return -EINVAL;
+
+       memset(obd_jobid_var, 0, JOBSTATS_JOBID_VAR_MAX_LEN + 1);
 
-        rc += snprintf(page + rc, count - rc, "NOT HEALTHY\n");
-        return rc;
+       /* This might leave the var invalid on error, which is probably fine.*/
+       if (copy_from_user(obd_jobid_var, buffer, count))
+               return -EFAULT;
+
+       /* Trim the trailing '\n' if any */
+       if (obd_jobid_var[count - 1] == '\n')
+               obd_jobid_var[count - 1] = 0;
+
+       return count;
 }
+LPROC_SEQ_FOPS(obd_proc_jobid_var);
 
 /* Root for /proc/fs/lustre */
 struct proc_dir_entry *proc_lustre_root = NULL;
-
-struct lprocfs_vars lprocfs_base[] = {
-        { "version", obd_proc_read_version, NULL, NULL },
-        { "pinger", obd_proc_read_pinger, NULL, NULL },
-        { "health_check", obd_proc_read_health, NULL, NULL },
-        { 0 }
+EXPORT_SYMBOL(proc_lustre_root);
+
+struct lprocfs_seq_vars lprocfs_base[] = {
+       { .name =       "version",
+         .fops =       &obd_proc_version_fops  },
+       { .name =       "pinger",
+         .fops =       &obd_proc_pinger_fops   },
+       { .name =       "health_check",
+         .fops =       &obd_proc_health_fops   },
+       { .name =       "jobid_var",
+         .fops =       &obd_proc_jobid_var_fops},
+       { 0 }
 };
 #else
 #define lprocfs_base NULL
@@ -376,7 +385,7 @@ static int obd_device_list_seq_show(struct seq_file *p, void *v)
         return seq_printf(p, "%3d %s %s %s %s %d\n",
                           (int)index, status, obd->obd_type->typ_name,
                           obd->obd_name, obd->obd_uuid.uuid,
-                          cfs_atomic_read(&obd->obd_refcount));
+                         atomic_read(&obd->obd_refcount));
 }
 
 struct seq_operations obd_device_list_sops = {
@@ -388,17 +397,15 @@ struct seq_operations obd_device_list_sops = {
 
 static int obd_device_list_open(struct inode *inode, struct file *file)
 {
-        struct proc_dir_entry *dp = PDE(inode);
-        struct seq_file *seq;
-        int rc = seq_open(file, &obd_device_list_sops);
-
-        if (rc)
-                return rc;
+       struct seq_file *seq;
+       int rc = seq_open(file, &obd_device_list_sops);
 
-        seq = file->private_data;
-        seq->private = dp->data;
+       if (rc)
+               return rc;
 
-        return 0;
+       seq = file->private_data;
+       seq->private = PDE_DATA(inode);
+       return 0;
 }
 
 struct file_operations obd_device_list_fops = {
@@ -413,20 +420,20 @@ struct file_operations obd_device_list_fops = {
 int class_procfs_init(void)
 {
 #ifdef __KERNEL__
-        int rc;
-        ENTRY;
-
-        obd_sysctl_init();
-        proc_lustre_root = lprocfs_register("fs/lustre", NULL,
-                                            lprocfs_base, NULL);
-        rc = lprocfs_seq_create(proc_lustre_root, "devices", 0444,
-                                &obd_device_list_fops, NULL);
-        if (rc)
-                CERROR("error adding /proc/fs/lustre/devices file\n");
+       int rc;
+       ENTRY;
+
+       obd_sysctl_init();
+       proc_lustre_root = lprocfs_seq_register("fs/lustre", NULL,
+                                               lprocfs_base, NULL);
+       rc = lprocfs_seq_create(proc_lustre_root, "devices", 0444,
+                               &obd_device_list_fops, NULL);
+       if (rc)
+               CERROR("error adding /proc/fs/lustre/devices file\n");
 #else
-        ENTRY;
+       ENTRY;
 #endif
-        RETURN(0);
+       RETURN(0);
 }
 
 #ifdef __KERNEL__
@@ -438,18 +445,4 @@ int class_procfs_clean(void)
         }
         RETURN(0);
 }
-
-
-/* Check that we're building against the appropriate version of the Lustre
- * kernel patch */
-#include <linux/lustre_version.h>
-#ifdef LUSTRE_KERNEL_VERSION
-#define LUSTRE_MIN_VERSION 45
-#define LUSTRE_MAX_VERSION 47
-#if (LUSTRE_KERNEL_VERSION < LUSTRE_MIN_VERSION)
-# error Cannot continue: Your Lustre kernel patch is older than the sources
-#elif (LUSTRE_KERNEL_VERSION > LUSTRE_MAX_VERSION)
-# error Cannot continue: Your Lustre sources are older than the kernel patch
-#endif
-#endif
 #endif