From d0423abc1adc717b08de61be3556688cccd52ddf Mon Sep 17 00:00:00 2001 From: Hongchao Zhang Date: Wed, 4 Mar 2020 09:07:19 -0500 Subject: [PATCH] LU-12506 changelog: support large number of MDT At client, the changelog of each MDT is associated with one miscdevice, but the number of miscdevice is limited to 64 in Linux kernel, then it will fail if there are more than 64 MDTs. This patch replaces miscdevice with dynamic devices to support more MDTs. Change-Id: Ie3ce76cbe1c613bf17d6350ea95546524b6d66b8 Signed-off-by: Hongchao Zhang Reviewed-on: https://review.whamcloud.com/37759 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Petros Koutoupis Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- lustre/mdc/mdc_changelog.c | 114 ++++++++++++++++++++++++++++++++++----------- lustre/mdc/mdc_internal.h | 5 ++ lustre/mdc/mdc_request.c | 32 +++++++++++-- 3 files changed, 121 insertions(+), 30 deletions(-) diff --git a/lustre/mdc/mdc_changelog.c b/lustre/mdc/mdc_changelog.c index fb9fa54..634fbd8 100644 --- a/lustre/mdc/mdc_changelog.c +++ b/lustre/mdc/mdc_changelog.c @@ -33,7 +33,8 @@ #include #include #include -#include +#include +#include #include #include @@ -58,15 +59,16 @@ static LIST_HEAD(chlg_registered_devices); struct chlg_registered_dev { /* Device name of the form "changelog-{MDTNAME}" */ - char ced_name[32]; - /* Misc device descriptor */ - struct miscdevice ced_misc; + char ced_name[32]; + /* changelog char device */ + struct cdev ced_cdev; + struct device *ced_device; /* OBDs referencing this device (multiple mount point) */ - struct list_head ced_obds; + struct list_head ced_obds; /* Reference counter for proper deregistration */ - struct kref ced_refs; + struct kref ced_refs; /* Link within the global chlg_registered_devices */ - struct list_head ced_link; + struct list_head ced_link; }; struct chlg_reader_state { @@ -111,19 +113,57 @@ enum { CDEV_CHLG_MAX_PREFETCH = 1024, }; +static DEFINE_IDR(chlg_minor_idr); +static DEFINE_SPINLOCK(chlg_minor_lock); + +static int chlg_minor_alloc(int *pminor) +{ + void *minor_allocated = (void *)-1; + int minor; + + idr_preload(GFP_KERNEL); + spin_lock(&chlg_minor_lock); + minor = idr_alloc(&chlg_minor_idr, minor_allocated, 0, + MDC_CHANGELOG_DEV_COUNT, GFP_NOWAIT); + spin_unlock(&chlg_minor_lock); + idr_preload_end(); + + if (minor < 0) + return minor; + + *pminor = minor; + return 0; +} + +static void chlg_minor_free(int minor) +{ + spin_lock(&chlg_minor_lock); + idr_remove(&chlg_minor_idr, minor); + spin_unlock(&chlg_minor_lock); +} + +static void chlg_device_release(struct device *dev) +{ + struct chlg_registered_dev *entry = dev_get_drvdata(dev); + + chlg_minor_free(MINOR(entry->ced_cdev.dev)); + OBD_FREE_PTR(entry); +} + /** * Deregister a changelog character device whose refcount has reached zero. */ static void chlg_dev_clear(struct kref *kref) { - struct chlg_registered_dev *entry = container_of(kref, - struct chlg_registered_dev, - ced_refs); + struct chlg_registered_dev *entry; + ENTRY; + entry = container_of(kref, struct chlg_registered_dev, + ced_refs); list_del(&entry->ced_link); - misc_deregister(&entry->ced_misc); - OBD_FREE_PTR(entry); + cdev_del(&entry->ced_cdev); + device_destroy(mdc_changelog_class, entry->ced_cdev.dev); EXIT; } @@ -551,13 +591,12 @@ out_kbuf: static int chlg_open(struct inode *inode, struct file *file) { struct chlg_reader_state *crs; - struct miscdevice *misc = file->private_data; struct chlg_registered_dev *dev; struct task_struct *task; int rc; ENTRY; - dev = container_of(misc, struct chlg_registered_dev, ced_misc); + dev = container_of(inode->i_cdev, struct chlg_registered_dev, ced_cdev); OBD_ALLOC_PTR(crs); if (!crs) @@ -676,11 +715,11 @@ static const struct file_operations chlg_fops = { * This uses obd_name of the form: "testfs-MDT0000-mdc-ffff88006501600" * and returns a name of the form: "changelog-testfs-MDT0000". */ -static void get_chlg_name(char *name, size_t name_len, struct obd_device *obd) +static void get_target_name(char *name, size_t name_len, struct obd_device *obd) { int i; - snprintf(name, name_len, "changelog-%s", obd->obd_name); + snprintf(name, name_len, "%s", obd->obd_name); /* Find the 2nd '-' from the end and truncate on it */ for (i = 0; i < 2; i++) { @@ -742,18 +781,16 @@ int mdc_changelog_cdev_init(struct obd_device *obd) { struct chlg_registered_dev *exist; struct chlg_registered_dev *entry; - int rc; + struct device *device; + dev_t dev; + int minor, rc; ENTRY; OBD_ALLOC_PTR(entry); if (entry == NULL) RETURN(-ENOMEM); - get_chlg_name(entry->ced_name, sizeof(entry->ced_name), obd); - - entry->ced_misc.minor = MISC_DYNAMIC_MINOR; - entry->ced_misc.name = entry->ced_name; - entry->ced_misc.fops = &chlg_fops; + get_target_name(entry->ced_name, sizeof(entry->ced_name), obd); kref_init(&entry->ced_refs); INIT_LIST_HEAD(&entry->ced_obds); @@ -771,14 +808,37 @@ int mdc_changelog_cdev_init(struct obd_device *obd) list_add_tail(&entry->ced_link, &chlg_registered_devices); /* Register new character device */ - rc = misc_register(&entry->ced_misc); - if (rc != 0) { - list_del_init(&obd->u.cli.cl_chg_dev_linkage); - list_del(&entry->ced_link); + cdev_init(&entry->ced_cdev, &chlg_fops); + entry->ced_cdev.owner = THIS_MODULE; + + rc = chlg_minor_alloc(&minor); + if (rc) GOTO(out_unlock, rc); - } + + dev = MKDEV(MAJOR(mdc_changelog_dev), minor); + rc = cdev_add(&entry->ced_cdev, dev, 1); + if (rc) + GOTO(out_minor, rc); + + device = device_create(mdc_changelog_class, NULL, dev, entry, "%s-%s", + MDC_CHANGELOG_DEV_NAME, entry->ced_name); + if (IS_ERR(device)) + GOTO(out_cdev, rc = PTR_ERR(device)); + + device->release = chlg_device_release; + entry->ced_device = device; entry = NULL; /* prevent it from being freed below */ + GOTO(out_unlock, rc = 0); + +out_cdev: + cdev_del(&entry->ced_cdev); + +out_minor: + chlg_minor_free(minor); + + list_del_init(&obd->u.cli.cl_chg_dev_linkage); + list_del(&entry->ced_link); out_unlock: mutex_unlock(&chlg_registered_dev_lock); diff --git a/lustre/mdc/mdc_internal.h b/lustre/mdc/mdc_internal.h index 0de0ebd..d599034 100644 --- a/lustre/mdc/mdc_internal.h +++ b/lustre/mdc/mdc_internal.h @@ -149,6 +149,11 @@ enum ldlm_mode mdc_lock_match(struct obd_export *exp, __u64 flags, enum ldlm_mode mode, struct lustre_handle *lockh); +#define MDC_CHANGELOG_DEV_COUNT LMV_MAX_STRIPE_COUNT +#define MDC_CHANGELOG_DEV_NAME "changelog" +extern struct class *mdc_changelog_class; +extern dev_t mdc_changelog_dev; + int mdc_changelog_cdev_init(struct obd_device *obd); void mdc_changelog_cdev_finish(struct obd_device *obd); diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index 6c7163f..750ed3d 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -40,6 +40,7 @@ #include #include #include +#include #include @@ -2957,15 +2958,40 @@ static const struct md_ops mdc_md_ops = { .m_rmfid = mdc_rmfid, }; +dev_t mdc_changelog_dev; +struct class *mdc_changelog_class; static int __init mdc_init(void) { - return class_register_type(&mdc_obd_ops, &mdc_md_ops, true, NULL, - LUSTRE_MDC_NAME, &mdc_device_type); + int rc = 0; + rc = alloc_chrdev_region(&mdc_changelog_dev, 0, + MDC_CHANGELOG_DEV_COUNT, + MDC_CHANGELOG_DEV_NAME); + if (rc) + return rc; + + mdc_changelog_class = class_create(THIS_MODULE, MDC_CHANGELOG_DEV_NAME); + if (IS_ERR(mdc_changelog_class)) { + rc = PTR_ERR(mdc_changelog_class); + goto out_dev; + } + + rc = class_register_type(&mdc_obd_ops, &mdc_md_ops, true, NULL, + LUSTRE_MDC_NAME, &mdc_device_type); + if (rc) + goto out_dev; + + return 0; + +out_dev: + unregister_chrdev_region(mdc_changelog_dev, MDC_CHANGELOG_DEV_COUNT); + return rc; } static void __exit mdc_exit(void) { - class_unregister_type(LUSTRE_MDC_NAME); + class_destroy(mdc_changelog_class); + unregister_chrdev_region(mdc_changelog_dev, MDC_CHANGELOG_DEV_COUNT); + class_unregister_type(LUSTRE_MDC_NAME); } MODULE_AUTHOR("OpenSFS, Inc. "); -- 1.8.3.1