Whamcloud - gitweb
LU-12506 changelog: support large number of MDT 59/37759/7
authorHongchao Zhang <hongchao@whamcloud.com>
Wed, 4 Mar 2020 14:07:19 +0000 (09:07 -0500)
committerOleg Drokin <green@whamcloud.com>
Tue, 24 Mar 2020 05:16:16 +0000 (05:16 +0000)
At client, the changelog of each MDT is associated with
one miscdevice, but the number of miscdevice is limited
to 64 in Linux kernel, then it will fail if there are
more than 64 MDTs.

This patch replaces miscdevice with dynamic devices to
support more MDTs.

Change-Id: Ie3ce76cbe1c613bf17d6350ea95546524b6d66b8
Signed-off-by: Hongchao Zhang <hongchao@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/37759
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Petros Koutoupis <petros.koutoupis@hpe.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/mdc/mdc_changelog.c
lustre/mdc/mdc_internal.h
lustre/mdc/mdc_request.c

index fb9fa54..634fbd8 100644 (file)
@@ -33,7 +33,8 @@
 #include <linux/init.h>
 #include <linux/kthread.h>
 #include <linux/poll.h>
-#include <linux/miscdevice.h>
+#include <linux/device.h>
+#include <linux/cdev.h>
 
 #include <lustre_log.h>
 #include <uapi/linux/lustre/lustre_ioctl.h>
@@ -58,15 +59,16 @@ static LIST_HEAD(chlg_registered_devices);
 
 struct chlg_registered_dev {
        /* Device name of the form "changelog-{MDTNAME}" */
-       char                    ced_name[32];
-       /* Misc device descriptor */
-       struct miscdevice       ced_misc;
+       char                     ced_name[32];
+       /* changelog char device */
+       struct cdev              ced_cdev;
+       struct device           *ced_device;
        /* OBDs referencing this device (multiple mount point) */
-       struct list_head        ced_obds;
+       struct list_head         ced_obds;
        /* Reference counter for proper deregistration */
-       struct kref             ced_refs;
+       struct kref              ced_refs;
        /* Link within the global chlg_registered_devices */
-       struct list_head        ced_link;
+       struct list_head         ced_link;
 };
 
 struct chlg_reader_state {
@@ -111,19 +113,57 @@ enum {
        CDEV_CHLG_MAX_PREFETCH = 1024,
 };
 
+static DEFINE_IDR(chlg_minor_idr);
+static DEFINE_SPINLOCK(chlg_minor_lock);
+
+static int chlg_minor_alloc(int *pminor)
+{
+       void *minor_allocated = (void *)-1;
+       int minor;
+
+       idr_preload(GFP_KERNEL);
+       spin_lock(&chlg_minor_lock);
+       minor = idr_alloc(&chlg_minor_idr, minor_allocated, 0,
+                         MDC_CHANGELOG_DEV_COUNT, GFP_NOWAIT);
+       spin_unlock(&chlg_minor_lock);
+       idr_preload_end();
+
+       if (minor < 0)
+               return minor;
+
+       *pminor = minor;
+       return 0;
+}
+
+static void chlg_minor_free(int minor)
+{
+       spin_lock(&chlg_minor_lock);
+       idr_remove(&chlg_minor_idr, minor);
+       spin_unlock(&chlg_minor_lock);
+}
+
+static void chlg_device_release(struct device *dev)
+{
+       struct chlg_registered_dev *entry = dev_get_drvdata(dev);
+
+       chlg_minor_free(MINOR(entry->ced_cdev.dev));
+       OBD_FREE_PTR(entry);
+}
+
 /**
  * Deregister a changelog character device whose refcount has reached zero.
  */
 static void chlg_dev_clear(struct kref *kref)
 {
-       struct chlg_registered_dev *entry = container_of(kref,
-                                               struct chlg_registered_dev,
-                                               ced_refs);
+       struct chlg_registered_dev *entry;
+       
        ENTRY;
+       entry = container_of(kref, struct chlg_registered_dev,
+                            ced_refs);
 
        list_del(&entry->ced_link);
-       misc_deregister(&entry->ced_misc);
-       OBD_FREE_PTR(entry);
+       cdev_del(&entry->ced_cdev);
+       device_destroy(mdc_changelog_class, entry->ced_cdev.dev);
        EXIT;
 }
 
@@ -551,13 +591,12 @@ out_kbuf:
 static int chlg_open(struct inode *inode, struct file *file)
 {
        struct chlg_reader_state *crs;
-       struct miscdevice *misc = file->private_data;
        struct chlg_registered_dev *dev;
        struct task_struct *task;
        int rc;
        ENTRY;
 
-       dev = container_of(misc, struct chlg_registered_dev, ced_misc);
+       dev = container_of(inode->i_cdev, struct chlg_registered_dev, ced_cdev);
 
        OBD_ALLOC_PTR(crs);
        if (!crs)
@@ -676,11 +715,11 @@ static const struct file_operations chlg_fops = {
  * This uses obd_name of the form: "testfs-MDT0000-mdc-ffff88006501600"
  * and returns a name of the form: "changelog-testfs-MDT0000".
  */
-static void get_chlg_name(char *name, size_t name_len, struct obd_device *obd)
+static void get_target_name(char *name, size_t name_len, struct obd_device *obd)
 {
        int i;
 
-       snprintf(name, name_len, "changelog-%s", obd->obd_name);
+       snprintf(name, name_len, "%s", obd->obd_name);
 
        /* Find the 2nd '-' from the end and truncate on it */
        for (i = 0; i < 2; i++) {
@@ -742,18 +781,16 @@ int mdc_changelog_cdev_init(struct obd_device *obd)
 {
        struct chlg_registered_dev *exist;
        struct chlg_registered_dev *entry;
-       int rc;
+       struct device *device;
+       dev_t dev;
+       int minor, rc;
        ENTRY;
 
        OBD_ALLOC_PTR(entry);
        if (entry == NULL)
                RETURN(-ENOMEM);
 
-       get_chlg_name(entry->ced_name, sizeof(entry->ced_name), obd);
-
-       entry->ced_misc.minor = MISC_DYNAMIC_MINOR;
-       entry->ced_misc.name  = entry->ced_name;
-       entry->ced_misc.fops  = &chlg_fops;
+       get_target_name(entry->ced_name, sizeof(entry->ced_name), obd);
 
        kref_init(&entry->ced_refs);
        INIT_LIST_HEAD(&entry->ced_obds);
@@ -771,14 +808,37 @@ int mdc_changelog_cdev_init(struct obd_device *obd)
        list_add_tail(&entry->ced_link, &chlg_registered_devices);
 
        /* Register new character device */
-       rc = misc_register(&entry->ced_misc);
-       if (rc != 0) {
-               list_del_init(&obd->u.cli.cl_chg_dev_linkage);
-               list_del(&entry->ced_link);
+       cdev_init(&entry->ced_cdev, &chlg_fops);
+       entry->ced_cdev.owner = THIS_MODULE;
+
+       rc = chlg_minor_alloc(&minor);
+       if (rc)
                GOTO(out_unlock, rc);
-       }
+
+       dev = MKDEV(MAJOR(mdc_changelog_dev), minor);
+       rc = cdev_add(&entry->ced_cdev, dev, 1);
+       if (rc)
+               GOTO(out_minor, rc);
+
+       device = device_create(mdc_changelog_class, NULL, dev, entry, "%s-%s",
+                              MDC_CHANGELOG_DEV_NAME, entry->ced_name);
+       if (IS_ERR(device))
+               GOTO(out_cdev, rc = PTR_ERR(device));
+
+       device->release = chlg_device_release;
+       entry->ced_device = device;
 
        entry = NULL;   /* prevent it from being freed below */
+       GOTO(out_unlock, rc = 0);
+
+out_cdev:
+       cdev_del(&entry->ced_cdev);
+
+out_minor:
+       chlg_minor_free(minor);
+
+       list_del_init(&obd->u.cli.cl_chg_dev_linkage);
+       list_del(&entry->ced_link);
 
 out_unlock:
        mutex_unlock(&chlg_registered_dev_lock);
index 0de0ebd..d599034 100644 (file)
@@ -149,6 +149,11 @@ enum ldlm_mode mdc_lock_match(struct obd_export *exp, __u64 flags,
                              enum ldlm_mode mode, struct lustre_handle *lockh);
 
 
+#define MDC_CHANGELOG_DEV_COUNT LMV_MAX_STRIPE_COUNT
+#define MDC_CHANGELOG_DEV_NAME "changelog"
+extern struct class *mdc_changelog_class;
+extern dev_t mdc_changelog_dev;
+
 int mdc_changelog_cdev_init(struct obd_device *obd);
 
 void mdc_changelog_cdev_finish(struct obd_device *obd);
index 6c7163f..750ed3d 100644 (file)
@@ -40,6 +40,7 @@
 #include <linux/utsname.h>
 #include <linux/delay.h>
 #include <linux/uidgid.h>
+#include <linux/device.h>
 
 #include <lustre_errno.h>
 
@@ -2957,15 +2958,40 @@ static const struct md_ops mdc_md_ops = {
        .m_rmfid                = mdc_rmfid,
 };
 
+dev_t mdc_changelog_dev;
+struct class *mdc_changelog_class;
 static int __init mdc_init(void)
 {
-       return class_register_type(&mdc_obd_ops, &mdc_md_ops, true, NULL,
-                                  LUSTRE_MDC_NAME, &mdc_device_type);
+       int rc = 0;
+       rc = alloc_chrdev_region(&mdc_changelog_dev, 0,
+                                MDC_CHANGELOG_DEV_COUNT,
+                                MDC_CHANGELOG_DEV_NAME);
+       if (rc)
+               return rc;
+
+       mdc_changelog_class = class_create(THIS_MODULE, MDC_CHANGELOG_DEV_NAME);
+       if (IS_ERR(mdc_changelog_class)) {
+               rc = PTR_ERR(mdc_changelog_class);
+               goto out_dev;
+       }
+
+       rc = class_register_type(&mdc_obd_ops, &mdc_md_ops, true, NULL,
+                                LUSTRE_MDC_NAME, &mdc_device_type);
+       if (rc)
+               goto out_dev;
+
+       return 0;
+
+out_dev:
+       unregister_chrdev_region(mdc_changelog_dev, MDC_CHANGELOG_DEV_COUNT);
+       return rc;
 }
 
 static void __exit mdc_exit(void)
 {
-        class_unregister_type(LUSTRE_MDC_NAME);
+       class_destroy(mdc_changelog_class);
+       unregister_chrdev_region(mdc_changelog_dev, MDC_CHANGELOG_DEV_COUNT);
+       class_unregister_type(LUSTRE_MDC_NAME);
 }
 
 MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");