Whamcloud - gitweb
LU-5435 libcfs: copy out ioctl inline buffer
[fs/lustre-release.git] / lustre / obdclass / linux / linux-module.c
index df8a15d..8b114d2 100644 (file)
@@ -1,37 +1,47 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
+/*
+ * GPL HEADER START
  *
- * Object Devices Class Driver
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
  *
- *  Copyright (C) 2001-2003 Cluster File Systems, Inc.
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
  *
- *   This file is part of Lustre, http://www.lustre.org.
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
  *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
  *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
  *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * lustre/obdclass/linux/linux-module.c
  *
+ * Object Devices Class Driver
  * These are the only exported functions, they provide some generic
  * infrastructure for managing object devices
  */
+
 #define DEBUG_SUBSYSTEM S_CLASS
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
 
-#ifdef __KERNEL__
-#ifndef AUTOCONF_INCLUDED
-#include <linux/config.h> /* for CONFIG_PROC_FS */
-#endif
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/highmem.h>
 #include <asm/io.h>
 #include <asm/ioctls.h>
-#include <asm/system.h>
 #include <asm/poll.h>
 #include <asm/uaccess.h>
 #include <linux/miscdevice.h>
-#include <linux/smp_lock.h>
 #include <linux/seq_file.h>
-#else
-# include <liblustre.h>
-#endif
 
 #include <libcfs/libcfs.h>
 #include <obd_support.h>
 #include <obd_class.h>
+#include <lnet/lnetctl.h>
 #include <lprocfs_status.h>
+#include <lustre_ioctl.h>
 #include <lustre_ver.h>
 #include <lustre/lustre_build_version.h>
-#ifdef __KERNEL__
-#include <linux/lustre_version.h>
 
 int proc_version;
 
 /* buffer MUST be at least the size of obd_ioctl_hdr */
-int obd_ioctl_getdata(char **buf, int *len, void *arg)
+int obd_ioctl_getdata(char **buf, int *len, void __user *arg)
 {
-        struct obd_ioctl_hdr hdr;
-        struct obd_ioctl_data *data;
-        int err;
-        int offset = 0;
-        ENTRY;
+       struct obd_ioctl_hdr hdr;
+       struct obd_ioctl_data *data;
+       int offset = 0;
+       ENTRY;
 
-        err = copy_from_user(&hdr, (void *)arg, sizeof(hdr));
-        if ( err ) 
-                RETURN(err);
+       if (copy_from_user(&hdr, arg, sizeof(hdr)))
+               RETURN(-EFAULT);
 
         if (hdr.ioc_version != OBD_IOCTL_VERSION) {
                 CERROR("Version mismatch kernel (%x) vs application (%x)\n",
@@ -102,9 +105,11 @@ int obd_ioctl_getdata(char **buf, int *len, void *arg)
                 RETURN(-EINVAL);
         }
 
-        /* XXX allocate this more intelligently, using kmalloc when
-         * appropriate */
-        OBD_VMALLOC(*buf, hdr.ioc_len);
+        /* When there are lots of processes calling vmalloc on multi-core
+         * system, the high lock contention will hurt performance badly,
+         * obdfilter-survey is an example, which relies on ioctl. So we'd
+         * better avoid vmalloc on ioctl path. LU-66 */
+        OBD_ALLOC_LARGE(*buf, hdr.ioc_len);
         if (*buf == NULL) {
                 CERROR("Cannot allocate control buffer of len %d\n",
                        hdr.ioc_len);
@@ -113,193 +118,216 @@ int obd_ioctl_getdata(char **buf, int *len, void *arg)
         *len = hdr.ioc_len;
         data = (struct obd_ioctl_data *)*buf;
 
-        err = copy_from_user(*buf, (void *)arg, hdr.ioc_len);
-        if ( err ) {
-                OBD_VFREE(*buf, hdr.ioc_len);
-                RETURN(err);
-        }
+       if (copy_from_user(*buf, arg, hdr.ioc_len)) {
+               OBD_FREE_LARGE(*buf, hdr.ioc_len);
+               RETURN(-EFAULT);
+       }
 
         if (obd_ioctl_is_invalid(data)) {
                 CERROR("ioctl not correctly formatted\n");
-                OBD_VFREE(*buf, hdr.ioc_len);
+                OBD_FREE_LARGE(*buf, hdr.ioc_len);
                 RETURN(-EINVAL);
         }
 
         if (data->ioc_inllen1) {
                 data->ioc_inlbuf1 = &data->ioc_bulk[0];
-                offset += size_round(data->ioc_inllen1);
+                offset += cfs_size_round(data->ioc_inllen1);
         }
 
         if (data->ioc_inllen2) {
                 data->ioc_inlbuf2 = &data->ioc_bulk[0] + offset;
-                offset += size_round(data->ioc_inllen2);
+                offset += cfs_size_round(data->ioc_inllen2);
         }
 
         if (data->ioc_inllen3) {
                 data->ioc_inlbuf3 = &data->ioc_bulk[0] + offset;
-                offset += size_round(data->ioc_inllen3);
+                offset += cfs_size_round(data->ioc_inllen3);
         }
 
-        if (data->ioc_inllen4) {
-                data->ioc_inlbuf4 = &data->ioc_bulk[0] + offset;
-        }
+       if (data->ioc_inllen4)
+               data->ioc_inlbuf4 = &data->ioc_bulk[0] + offset;
 
-        EXIT;
-        return 0;
+       RETURN(0);
 }
+EXPORT_SYMBOL(obd_ioctl_getdata);
 
-int obd_ioctl_popdata(void *arg, void *data, int len)
+int obd_ioctl_popdata(void __user *arg, void *data, int len)
 {
-        int err; 
-        
-        err = copy_to_user(arg, data, len);
-        if (err)
-                err = -EFAULT;
-        return err;
-}
+       int err;
+       ENTRY;
 
-EXPORT_SYMBOL(obd_ioctl_getdata);
+       err = copy_to_user(arg, data, len) ? -EFAULT : 0;
+       RETURN(err);
+}
 EXPORT_SYMBOL(obd_ioctl_popdata);
 
-#define OBD_MINOR 241
-extern struct cfs_psdev_ops          obd_psdev_ops;
-
 /*  opening /dev/obd */
 static int obd_class_open(struct inode * inode, struct file * file)
 {
-        if (obd_psdev_ops.p_open != NULL)
-                return obd_psdev_ops.p_open(0, NULL);
-        return -EPERM;
+       ENTRY;
+
+       try_module_get(THIS_MODULE);
+       RETURN(0);
 }
 
 /*  closing /dev/obd */
 static int obd_class_release(struct inode * inode, struct file * file)
 {
-        if (obd_psdev_ops.p_close != NULL)
-                return obd_psdev_ops.p_close(0, NULL);
-        return -EPERM;
+       ENTRY;
+
+       module_put(THIS_MODULE);
+       RETURN(0);
 }
 
 /* to control /dev/obd */
-static int obd_class_ioctl(struct inode *inode, struct file *filp,
-                           unsigned int cmd, unsigned long arg)
+static long obd_class_ioctl(struct file *filp, unsigned int cmd,
+                           unsigned long arg)
 {
         int err = 0;
         ENTRY;
 
-        if (current->fsuid != 0)
+        /* Allow non-root access for OBD_IOC_PING_TARGET - used by lfs check */
+        if (!cfs_capable(CFS_CAP_SYS_ADMIN) && (cmd != OBD_IOC_PING_TARGET))
                 RETURN(err = -EACCES);
         if ((cmd & 0xffffff00) == ((int)'T') << 8) /* ignore all tty ioctls */
                 RETURN(err = -ENOTTY);
 
-        if (obd_psdev_ops.p_ioctl != NULL)
-                err = obd_psdev_ops.p_ioctl(NULL, cmd, (void *)arg);
-        else
-                err = -EPERM;
+        err = class_handle_ioctl(cmd, (unsigned long)arg);
 
         RETURN(err);
 }
 
 /* declare character device */
 static struct file_operations obd_psdev_fops = {
-        .owner   = THIS_MODULE,
-        .ioctl   = obd_class_ioctl,     /* ioctl */
-        .open    = obd_class_open,      /* open */
-        .release = obd_class_release,   /* release */
+       .owner          = THIS_MODULE,
+       .unlocked_ioctl = obd_class_ioctl, /* unlocked_ioctl */
+       .open           = obd_class_open,      /* open */
+       .release        = obd_class_release,   /* release */
 };
 
 /* modules setup */
-cfs_psdev_t obd_psdev = {
-        .minor = OBD_MINOR,
-        .name  = "obd_psdev",
+struct miscdevice obd_psdev = {
+        .minor = OBD_DEV_MINOR,
+        .name  = OBD_DEV_NAME,
         .fops  = &obd_psdev_fops,
 };
 
-#endif
 
 #ifdef LPROCFS
-int obd_proc_read_version(char *page, char **start, off_t off, int count,
-                          int *eof, void *data)
+static int obd_proc_version_seq_show(struct seq_file *m, void *v)
 {
-        *eof = 1;
-#ifdef HAVE_VFS_INTENT_PATCHES
-        return snprintf(page, count, "lustre: %s\nkernel: %u\nbuild:  %s\n",
-                        LUSTRE_VERSION_STRING, LUSTRE_KERNEL_VERSION,
-                        BUILD_VERSION);
-#else
-        return snprintf(page, count, "lustre: %s\nkernel: %s\nbuild:  %s\n",
-                        LUSTRE_VERSION_STRING, "patchless", BUILD_VERSION);
-#endif
+       return seq_printf(m, "lustre: %s\nkernel: %s\nbuild:  %s\n",
+                         LUSTRE_VERSION_STRING, "patchless_client",
+                         BUILD_VERSION);
 }
+LPROC_SEQ_FOPS_RO(obd_proc_version);
 
-int obd_proc_read_pinger(char *page, char **start, off_t off, int count,
-                         int *eof, void *data)
+static int obd_proc_pinger_seq_show(struct seq_file *m, void *v)
 {
-        *eof = 1;
-        return snprintf(page, count, "%s\n",
+       return seq_printf(m, "%s\n",
 #ifdef ENABLE_PINGER
-                        "on"
+                            "on"
 #else
-                        "off"
+                            "off"
 #endif
-                       );
+                        );
+}
+LPROC_SEQ_FOPS_RO(obd_proc_pinger);
+
+/**
+ * Check all obd devices health
+ *
+ * \param seq_file
+ * \param data [in] unused
+ *
+ * \retval number of characters printed if healthy
+ */
+static int obd_proc_health_seq_show(struct seq_file *m, void *data)
+{
+       bool healthy = true;
+       int i;
+
+       if (libcfs_catastrophe)
+               seq_printf(m, "LBUG\n");
+
+       read_lock(&obd_dev_lock);
+       for (i = 0; i < class_devno_max(); i++) {
+               struct obd_device *obd;
+
+               obd = class_num2obd(i);
+               if (obd == NULL || !obd->obd_attached || !obd->obd_set_up)
+                       continue;
+
+               LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
+               if (obd->obd_stopping)
+                       continue;
+
+               class_incref(obd, __FUNCTION__, current);
+               read_unlock(&obd_dev_lock);
+
+               if (obd_health_check(NULL, obd)) {
+                       seq_printf(m, "device %s reported unhealthy\n",
+                                       obd->obd_name);
+                       healthy = false;
+               }
+               class_decref(obd, __FUNCTION__, current);
+               read_lock(&obd_dev_lock);
+       }
+       read_unlock(&obd_dev_lock);
+
+       if (healthy)
+               return seq_printf(m, "healthy\n");
+
+       seq_printf(m, "NOT HEALTHY\n");
+       return 0;
 }
+LPROC_SEQ_FOPS_RO(obd_proc_health);
 
-static int obd_proc_read_health(char *page, char **start, off_t off,
-                                int count, int *eof, void *data)
+static int obd_proc_jobid_var_seq_show(struct seq_file *m, void *v)
 {
-        int rc = 0, i;
-        *eof = 1;
-
-        if (libcfs_catastrophe)
-                rc += snprintf(page + rc, count - rc, "LBUG\n");
-
-        spin_lock(&obd_dev_lock);
-        for (i = 0; i < class_devno_max(); i++) {
-                struct obd_device *obd;
-
-                obd = class_num2obd(i);
-                if (obd == NULL)
-                        continue;
-
-                LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
-                if (obd->obd_stopping)
-                        continue;
-
-                class_incref(obd);
-                spin_unlock(&obd_dev_lock);
-
-                if (obd_health_check(obd)) {
-                        rc += snprintf(page + rc, count - rc,
-                                       "device %s reported unhealthy\n",
-                                       obd->obd_name);
-                }
-                class_decref(obd);
-                spin_lock(&obd_dev_lock);
-        }
-        spin_unlock(&obd_dev_lock);
+       return seq_printf(m, "%s\n", obd_jobid_var);
+}
+
+static ssize_t
+obd_proc_jobid_var_seq_write(struct file *file, const char __user *buffer,
+                            size_t count, loff_t *off)
+{
+       if (!count || count > JOBSTATS_JOBID_VAR_MAX_LEN)
+               return -EINVAL;
+
+       memset(obd_jobid_var, 0, JOBSTATS_JOBID_VAR_MAX_LEN + 1);
 
-        if (rc == 0)
-                return snprintf(page, count, "healthy\n");
+       /* This might leave the var invalid on error, which is probably fine.*/
+       if (copy_from_user(obd_jobid_var, buffer, count))
+               return -EFAULT;
 
-        rc += snprintf(page + rc, count - rc, "NOT HEALTHY\n");
-        return rc;
+       /* Trim the trailing '\n' if any */
+       if (obd_jobid_var[count - 1] == '\n')
+               obd_jobid_var[count - 1] = 0;
+
+       return count;
 }
+LPROC_SEQ_FOPS(obd_proc_jobid_var);
 
 /* Root for /proc/fs/lustre */
 struct proc_dir_entry *proc_lustre_root = NULL;
-
-struct lprocfs_vars lprocfs_base[] = {
-        { "version", obd_proc_read_version, NULL, NULL },
-        { "pinger", obd_proc_read_pinger, NULL, NULL },
-        { "health_check", obd_proc_read_health, NULL, NULL },
-        { 0 }
+EXPORT_SYMBOL(proc_lustre_root);
+
+struct lprocfs_seq_vars lprocfs_base[] = {
+       { .name =       "version",
+         .fops =       &obd_proc_version_fops  },
+       { .name =       "pinger",
+         .fops =       &obd_proc_pinger_fops   },
+       { .name =       "health_check",
+         .fops =       &obd_proc_health_fops   },
+       { .name =       "jobid_var",
+         .fops =       &obd_proc_jobid_var_fops},
+       { 0 }
 };
 #else
 #define lprocfs_base NULL
 #endif /* LPROCFS */
 
-#ifdef __KERNEL__
 static void *obd_device_list_seq_start(struct seq_file *p, loff_t *pos)
 {
         if (*pos >= class_devno_max())
@@ -313,7 +341,7 @@ static void obd_device_list_seq_stop(struct seq_file *p, void *v)
 }
 
 static void *obd_device_list_seq_next(struct seq_file *p, void *v, loff_t *pos)
-{      
+{
         ++*pos;
         if (*pos >= class_devno_max())
                 return NULL;
@@ -345,7 +373,7 @@ static int obd_device_list_seq_show(struct seq_file *p, void *v)
         return seq_printf(p, "%3d %s %s %s %s %d\n",
                           (int)index, status, obd->obd_type->typ_name,
                           obd->obd_name, obd->obd_uuid.uuid,
-                          atomic_read(&obd->obd_refcount));
+                         atomic_read(&obd->obd_refcount));
 }
 
 struct seq_operations obd_device_list_sops = {
@@ -357,17 +385,15 @@ struct seq_operations obd_device_list_sops = {
 
 static int obd_device_list_open(struct inode *inode, struct file *file)
 {
-        struct proc_dir_entry *dp = PDE(inode);
-        struct seq_file *seq;
-        int rc = seq_open(file, &obd_device_list_sops);
-
-        if (rc)
-                return rc;
+       struct seq_file *seq;
+       int rc = seq_open(file, &obd_device_list_sops);
 
-        seq = file->private_data;
-        seq->private = dp->data;
+       if (rc)
+               return rc;
 
-        return 0;
+       seq = file->private_data;
+       seq->private = PDE_DATA(inode);
+       return 0;
 }
 
 struct file_operations obd_device_list_fops = {
@@ -377,28 +403,40 @@ struct file_operations obd_device_list_fops = {
         .llseek  = seq_lseek,
         .release = seq_release,
 };
-#endif
 
 int class_procfs_init(void)
 {
-#ifdef __KERNEL__
-        int rc;
-        ENTRY;
+       struct proc_dir_entry *entry;
+       int rc;
+       ENTRY;
 
-        obd_sysctl_init();
-        proc_lustre_root = lprocfs_register("lustre", proc_root_fs,
-                                            lprocfs_base, NULL);
-        rc = lprocfs_seq_create(proc_lustre_root, "devices", 0444,
-                                &obd_device_list_fops, NULL);
-        if (rc)
-                CERROR("error adding /proc/fs/lustre/devices file\n");
-#else
-        ENTRY;
-#endif
-        RETURN(0);
+       obd_sysctl_init();
+
+       entry = lprocfs_seq_register("fs/lustre", NULL, lprocfs_base, NULL);
+       if (IS_ERR(entry)) {
+               rc = PTR_ERR(entry);
+               CERROR("cannot create '/proc/fs/lustre': rc = %d\n", rc);
+               RETURN(rc);
+       }
+
+       proc_lustre_root = entry;
+
+       rc = lprocfs_seq_create(proc_lustre_root, "devices", 0444,
+                               &obd_device_list_fops, NULL);
+       if (rc < 0) {
+               CERROR("cannot create '/proc/fs/lustre/devices': rc = %d\n",
+                      rc);
+               GOTO(out_proc, rc);
+       }
+
+       RETURN(rc);
+
+out_proc:
+       lprocfs_remove(&proc_lustre_root);
+
+       RETURN(rc);
 }
 
-#ifdef __KERNEL__
 int class_procfs_clean(void)
 {
         ENTRY;
@@ -407,18 +445,3 @@ int class_procfs_clean(void)
         }
         RETURN(0);
 }
-
-
-/* Check that we're building against the appropriate version of the Lustre
- * kernel patch */
-#include <linux/lustre_version.h>
-#ifdef LUSTRE_KERNEL_VERSION
-#define LUSTRE_MIN_VERSION 45
-#define LUSTRE_MAX_VERSION 47
-#if (LUSTRE_KERNEL_VERSION < LUSTRE_MIN_VERSION)
-# error Cannot continue: Your Lustre kernel patch is older than the sources
-#elif (LUSTRE_KERNEL_VERSION > LUSTRE_MAX_VERSION)
-# error Cannot continue: Your Lustre sources are older than the kernel patch
-#endif
-#endif
-#endif