Whamcloud - gitweb
LU-147 avoid 8k obd device amount limit
authorAlexey Lyashkov <Alexey_Lyashkov@xyratex.com>
Fri, 18 Mar 2011 20:13:45 +0000 (23:13 +0300)
committerOleg Drokin <green@whamcloud.com>
Wed, 20 Apr 2011 20:30:51 +0000 (13:30 -0700)
increase number obd devices per host and
improve a lookup speed in device list via hashes.

Change-Id: I0357d0da42ce6fdd6e0846bb5d389df45f4db046
Signed-off-by: Alexey Lyashkov <Alexey_Lyashkov@xyratex.com>
Signed-off-by: Vitaly Fertman <vitaly_fertman@xyratex.com>
Reviewed-on: http://review.whamcloud.com/347
Tested-by: Hudson
Reviewed-by: Oleg Drokin <green@whamcloud.com>
libcfs/libcfs/hash.c
lustre/include/obd.h
lustre/include/obd_class.h
lustre/liblustre/super.c
lustre/llite/llite_lib.c
lustre/obdclass/class_obd.c
lustre/obdclass/genops.c
lustre/obdclass/linux/linux-module.c
lustre/obdclass/obd_config.c

index db30f8c..1ea6bf4 100644 (file)
@@ -1157,7 +1157,7 @@ CFS_EXPORT_SYMBOL(cfs_hash_getref);
 
 void cfs_hash_putref(cfs_hash_t *hs)
 {
-        if (cfs_atomic_dec_and_test(&hs->hs_refcount))
+        if (hs && cfs_atomic_dec_and_test(&hs->hs_refcount))
                 cfs_hash_destroy(hs);
 }
 CFS_EXPORT_SYMBOL(cfs_hash_putref);
index 3b73a4b..1a8d508 100644 (file)
@@ -983,13 +983,19 @@ struct obd_device {
         struct obd_type        *obd_type;
         __u32                   obd_magic;
 
+        int                     obd_minor;
+        cfs_hlist_node_t        obd_minor_node; /** < find by minor */
+
+        cfs_list_t              obd_list;
         /* common and UUID name of this device */
         char                    obd_name[MAX_OBD_NAME];
+        cfs_hlist_node_t        obd_name_node; /** < find by name */
+
         struct obd_uuid         obd_uuid;
+        cfs_hlist_node_t        obd_uuid_node; /** < find by uuid */
 
         struct lu_device       *obd_lu_dev;
 
-        int                     obd_minor;
         /* bitfield modification is protected by obd_dev_lock */
         unsigned long obd_attached:1,      /* finished attach */
                       obd_set_up:1,        /* finished setup */
index fb6f702..690f5c7 100644 (file)
@@ -76,20 +76,48 @@ int class_register_type(struct obd_ops *, struct md_ops *,
                         struct lu_device_type *ldt);
 int class_unregister_type(const char *nm);
 
-struct obd_device *class_newdev(const char *type_name, const char *name);
+
+
+
+/**
+ allocate one minor number
+ */
+int obd_minor_alloc(void);
+/**
+ release allocated minor
+ */
+void obd_minor_release(long minor);
+/**
+ return 1 if minor is point to allocated device,
+ return 0 otherwise
+ */
+int obd_minor_valid(long minor);
+
+int obd_hashes_init(void);
+void obd_hashes_fini(void);
+
+struct obd_device *class_newdev(const char *type_name, const char *name,
+                                const char *uuid);
 void class_release_dev(struct obd_device *obd);
 
 int class_name2dev(const char *name);
 struct obd_device *class_name2obd(const char *name);
 int class_uuid2dev(struct obd_uuid *uuid);
 struct obd_device *class_uuid2obd(struct obd_uuid *uuid);
+
 void class_obd_list(void);
+void obd_devlist_first(struct obd_device **pos);
+void obd_devlist_next(struct obd_device **pos);
+void obd_devlist_last(struct obd_device *pos);
+const char *obd_dev_status(struct obd_device *obd);
+
 struct obd_device * class_find_client_obd(struct obd_uuid *tgt_uuid,
                                           const char * typ_name,
                                           struct obd_uuid *grp_uuid);
 struct obd_device * class_devices_in_group(struct obd_uuid *grp_uuid,
-                                           int *next);
-struct obd_device * class_num2obd(int num);
+                                           struct obd_device **prev);
+struct obd_device * class_num2obd(__u32 num);
+
 
 int class_notify_sptlrpc_conf(const char *fsname, int namelen);
 
@@ -468,11 +496,6 @@ do {                                                                 \
         }                                                            \
 } while (0)
 
-static inline int class_devno_max(void)
-{
-        return MAX_OBD_DEVICES;
-}
-
 static inline int obd_get_info(struct obd_export *exp, __u32 keylen,
                                void *key, __u32 *vallen, void *val,
                                struct lov_stripe_md *lsm)
index 297083b..31bfdf5 100644 (file)
@@ -95,7 +95,7 @@ static void llu_fsop_gone(struct filesys *fs)
 {
         struct llu_sb_info *sbi = (struct llu_sb_info *) fs->fs_private;
         struct obd_device *obd = class_exp2obd(sbi->ll_md_exp);
-        int next = 0;
+        struct obd_device *prev = NULL;
         ENTRY;
 
         cfs_list_del(&sbi->ll_conn_chain);
@@ -103,7 +103,7 @@ static void llu_fsop_gone(struct filesys *fs)
         obd_disconnect(sbi->ll_dt_exp);
         obd_disconnect(sbi->ll_md_exp);
 
-        while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) != NULL)
+        while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &prev)) != NULL)
                 class_manual_cleanup(obd);
 
         OBD_FREE(sbi, sizeof(*sbi));
index b9850ab..3e4c4eb 100644 (file)
@@ -935,7 +935,8 @@ void ll_put_super(struct super_block *sb)
         struct lustre_sb_info *lsi = s2lsi(sb);
         struct ll_sb_info *sbi = ll_s2sbi(sb);
         char *profilenm = get_profile_name(sb);
-        int force = 1, next;
+        int force = 1;
+        struct obd_device *prev;
         ENTRY;
 
         CDEBUG(D_VFSTRACE, "VFS Op: sb %p - %s\n", sb, profilenm);
@@ -955,9 +956,9 @@ void ll_put_super(struct super_block *sb)
         /* We need to set force before the lov_disconnect in
            lustre_common_put_super, since l_d cleans up osc's as well. */
         if (force) {
-                next = 0;
+                prev = NULL;
                 while ((obd = class_devices_in_group(&sbi->ll_sb_uuid,
-                                                     &next)) != NULL) {
+                                                     &prev)) != NULL) {
                         obd->obd_force = force;
                 }
         }
@@ -967,8 +968,8 @@ void ll_put_super(struct super_block *sb)
                 client_common_put_super(sb);
         }
 
-        next = 0;
-        while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) !=NULL) {
+        prev = NULL;
+        while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &prev)) !=NULL) {
                 class_manual_cleanup(obd);
         }
 
index 8b10a64..6c1dd8a 100644 (file)
@@ -264,7 +264,8 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg)
 
         case OBD_IOC_GETDEVICE: {
                 int     index = data->ioc_count;
-                char    *status, *str;
+                const char *status;
+                char    *str;
 
                 if (!data->ioc_inlbuf1) {
                         CERROR("No buffer passed in ioctl\n");
@@ -279,14 +280,7 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg)
                 if (!obd)
                         GOTO(out, err = -ENOENT);
 
-                if (obd->obd_stopping)
-                        status = "ST";
-                else if (obd->obd_set_up)
-                        status = "UP";
-                else if (obd->obd_attached)
-                        status = "AT";
-                else
-                        status = "--";
+                status = obd_dev_status(obd);
                 str = (char *)data->ioc_bulk;
                 snprintf(str, len - sizeof(*data), "%3d %s %s %s %s %d",
                          (int)index, status, obd->obd_type->typ_name,
@@ -305,7 +299,8 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg)
                 if (strnlen(data->ioc_inlbuf4, MAX_OBD_NAME) >= MAX_OBD_NAME)
                         GOTO(out, err = -EINVAL);
                 obd = class_name2obd(data->ioc_inlbuf4);
-        } else if (data->ioc_dev < class_devno_max()) {
+        } else if (obd_minor_valid(data->ioc_dev)) {
+                /* XXX = max allocated minor */
                 obd = class_num2obd(data->ioc_dev);
         } else {
                 CERROR("OBD ioctl: No device\n");
@@ -386,9 +381,7 @@ EXPORT_SYMBOL(class_register_type);
 EXPORT_SYMBOL(class_unregister_type);
 EXPORT_SYMBOL(class_get_type);
 EXPORT_SYMBOL(class_put_type);
-EXPORT_SYMBOL(class_name2dev);
 EXPORT_SYMBOL(class_name2obd);
-EXPORT_SYMBOL(class_uuid2dev);
 EXPORT_SYMBOL(class_uuid2obd);
 EXPORT_SYMBOL(class_find_client_obd);
 EXPORT_SYMBOL(class_devices_in_group);
@@ -513,8 +506,9 @@ static int __init init_obdclass(void)
 int init_obdclass(void)
 #endif
 {
-        int i, err;
+        int err;
 #ifdef __KERNEL__
+        int i;
         int lustre_register_fs(void);
 
         for (i = CAPA_SITE_CLIENT; i < CAPA_SITE_MAX; i++)
@@ -561,9 +555,6 @@ int init_obdclass(void)
                 return err;
         }
 
-        /* This struct is already zeroed for us (static global) */
-        for (i = 0; i < class_devno_max(); i++)
-                obd_devs[i] = NULL;
 
         /* Default the dirty page cache cap to 1/2 of system memory.
          * For clients with less memory, a larger fraction is needed
@@ -588,7 +579,12 @@ int init_obdclass(void)
 
 #ifdef __KERNEL__
         err = lustre_register_fs();
+        if (err)
+                return err;
 #endif
+        err = obd_hashes_init();
+        if (err)
+                return err;
 
         return err;
 }
@@ -598,24 +594,27 @@ int init_obdclass(void)
 #ifdef __KERNEL__
 static void cleanup_obdclass(void)
 {
-        int i;
         int lustre_unregister_fs(void);
         __u64 memory_leaked, pages_leaked;
         __u64 memory_max, pages_max;
+        struct obd_device *obd;
         ENTRY;
 
         lustre_unregister_fs();
 
         cfs_psdev_deregister(&obd_psdev);
-        for (i = 0; i < class_devno_max(); i++) {
-                struct obd_device *obd = class_num2obd(i);
-                if (obd && obd->obd_set_up &&
+        for (obd_devlist_first(&obd);
+             obd != NULL;
+             obd_devlist_next(&obd)) {
+                if (obd->obd_set_up &&
                     OBT(obd) && OBP(obd, detach)) {
                         /* XXX should this call generic detach otherwise? */
                         LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
                         OBP(obd, detach)(obd);
                 }
         }
+        obd_hashes_fini();
+
         lu_global_fini();
 
         obd_cleanup_caches();
index 9a9adfa..8b00ad9 100644 (file)
@@ -46,6 +46,7 @@
 #include <obd_ost.h>
 #include <obd_class.h>
 #include <lprocfs_status.h>
+#include <libcfs/bitmap.h>
 
 extern cfs_list_t obd_types;
 cfs_spinlock_t obd_types_lock;
@@ -66,6 +67,240 @@ static void print_export_data(struct obd_export *exp,
 
 int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c);
 
+
+cfs_spinlock_t obd_minor_lock;
+/**
+ * Maximum number of OBD devices on a single node (includes devices
+ * from all filesystems mounted on a client). This limit is itself arbitrary,
+ * though the lov_user_md_{v1,v3} structures (used for specifying the
+ * striping layout from llapi_setstripe() and on directory default EAs)
+ * have a 16-bit limit on the starting OST index.
+ **/
+const int obd_minor_map_size = 65536;
+cfs_bitmap_t *obd_minor_map;
+
+int obd_minor_alloc(void)
+{
+        int ret;
+
+        cfs_spin_lock(&obd_minor_lock);
+        ret = cfs_find_first_zero_bit(obd_minor_map->data, obd_minor_map_size);
+        if (ret != obd_minor_map_size)
+                cfs_bitmap_set(obd_minor_map, ret);
+        else
+                ret = -1;
+        cfs_spin_unlock(&obd_minor_lock);
+
+        return ret;
+}
+void obd_minor_release(long minor)
+{
+        cfs_spin_lock(&obd_minor_lock);
+        cfs_bitmap_clear(obd_minor_map, minor);
+        cfs_spin_unlock(&obd_minor_lock);
+}
+
+int obd_minor_valid(long minor)
+{
+        int ret;
+
+        cfs_spin_lock(&obd_minor_lock);
+        ret = cfs_bitmap_check(obd_minor_map, minor);
+        cfs_spin_unlock(&obd_minor_lock);
+
+        return ret;
+}
+
+static CFS_LIST_HEAD(obd_dev_list);
+static const int obd_hash_init_bits = 10;
+static const int obd_hash_max_bits = 30;
+static const int obd_hash_bkt_bits = 10;
+
+static cfs_hash_t *obd_name_hash = NULL;
+static unsigned obd_name_hops_hash(cfs_hash_t *lh, const void *key,
+                                   unsigned mask)
+{
+        return cfs_hash_djb2_hash(key, strlen(key), mask);
+}
+
+static void *obd_name_hops_obj(cfs_hlist_node_t *hn)
+{
+        struct obd_device *obd = cfs_hlist_entry(hn, struct obd_device,
+                                                 obd_name_node);
+        LASSERTF(obd->obd_magic == OBD_DEVICE_MAGIC,
+                 "%p obd_magic %08x != %08x\n",
+                 obd, obd->obd_magic, OBD_DEVICE_MAGIC);
+
+        return (void *)obd;
+}
+
+static void *obd_name_hops_key(cfs_hlist_node_t *hn)
+{
+        struct obd_device *obd = obd_name_hops_obj(hn);
+
+        return &obd->obd_name;
+}
+
+static int obd_name_hops_compare(const void *key, cfs_hlist_node_t *hn)
+{
+        void *nk = obd_name_hops_key(hn);
+
+        return strcmp(key, nk) == 0;
+}
+
+static void obd_name_hops_noop(cfs_hash_t *hs, cfs_hlist_node_t *hn)
+{
+        obd_name_hops_obj(hn);
+}
+
+static cfs_hash_ops_t obd_name_hops = {
+        .hs_hash        = obd_name_hops_hash,
+        .hs_keycmp      = obd_name_hops_compare,
+        .hs_key         = obd_name_hops_key,
+        .hs_object      = obd_name_hops_obj,
+        .hs_get         = obd_name_hops_noop,
+        .hs_put_locked  = obd_name_hops_noop,
+};
+
+static cfs_hash_t *obd_uuid_hash = NULL;
+static unsigned obd_uuid_hops_hash(cfs_hash_t *lh, const void *key,
+                                   unsigned mask)
+{
+        return cfs_hash_djb2_hash(key, strlen(key), mask);
+}
+
+static void *obd_uuid_hops_obj(cfs_hlist_node_t *hn)
+{
+        struct obd_device *obd = cfs_hlist_entry(hn, struct obd_device,
+                                                 obd_uuid_node);
+        LASSERTF(obd->obd_magic == OBD_DEVICE_MAGIC,
+                 "%p obd_magic %08x != %08x\n",
+                 obd, obd->obd_magic, OBD_DEVICE_MAGIC);
+
+        return (void *)obd;
+}
+
+static void *obd_uuid_hops_key(cfs_hlist_node_t *hn)
+{
+        struct obd_device *obd = obd_uuid_hops_obj(hn);
+
+        return &obd->obd_uuid;
+}
+
+static int obd_uuid_hops_compare(const void *key, cfs_hlist_node_t *hn)
+{
+        void *nk = obd_uuid_hops_key(hn);
+
+        return obd_uuid_equals(key, nk);
+}
+
+static void obd_uuid_hops_noop(cfs_hash_t *hs, cfs_hlist_node_t *hn)
+{
+        obd_uuid_hops_obj(hn);
+}
+
+static cfs_hash_ops_t obd_uuid_hops = {
+        .hs_hash        = obd_uuid_hops_hash,
+        .hs_keycmp      = obd_uuid_hops_compare,
+        .hs_key         = obd_uuid_hops_key,
+        .hs_object      = obd_uuid_hops_obj,
+        .hs_get         = obd_uuid_hops_noop,
+        .hs_put_locked  = obd_uuid_hops_noop,
+};
+
+static cfs_hash_t *obd_minor_hash = NULL;
+static unsigned obd_minor_hops_hash(cfs_hash_t *lh, const void *key,
+                                    unsigned mask)
+{
+        return cfs_hash_u32_hash(*((__u32 *)key), mask);
+}
+
+static void *obd_minor_hops_obj(cfs_hlist_node_t *hn)
+{
+        struct obd_device *obd = cfs_hlist_entry(hn, struct obd_device,
+                                                 obd_minor_node);
+        LASSERTF(obd->obd_magic == OBD_DEVICE_MAGIC,
+                 "%p obd_magic %08x != %08x\n",
+                 obd, obd->obd_magic, OBD_DEVICE_MAGIC);
+
+        return (void *)obd;
+}
+
+static void *obd_minor_hops_key(cfs_hlist_node_t *hn)
+{
+        struct obd_device *obd = obd_minor_hops_obj(hn);
+
+        return &obd->obd_minor;
+}
+
+static int obd_minor_hops_compare(const void *key, cfs_hlist_node_t *hn)
+{
+        __u32 *nk = obd_minor_hops_key(hn);
+
+        return *((__u32 *)key) == *nk;
+}
+
+static void obd_minor_hops_noop(cfs_hash_t *hs, cfs_hlist_node_t *hn)
+{
+        obd_minor_hops_obj(hn);
+}
+
+static cfs_hash_ops_t obd_minor_hops = {
+        .hs_hash        = obd_minor_hops_hash,
+        .hs_keycmp      = obd_minor_hops_compare,
+        .hs_key         = obd_minor_hops_key,
+        .hs_object      = obd_minor_hops_obj,
+        .hs_get         = obd_minor_hops_noop,
+        .hs_put_locked  = obd_minor_hops_noop,
+};
+
+int obd_hashes_init(void)
+{
+        obd_name_hash = cfs_hash_create("obd_name",
+                                        obd_hash_init_bits, obd_hash_max_bits,
+                                        obd_hash_bkt_bits, 0,
+                                        CFS_HASH_MIN_THETA, CFS_HASH_MAX_THETA,
+                                        &obd_name_hops,
+                                        CFS_HASH_DEFAULT | CFS_HASH_NO_ITEMREF);
+        if (obd_name_hash == NULL)
+                return -ENOMEM;
+
+        obd_uuid_hash = cfs_hash_create("obd_uuid",
+                                        obd_hash_init_bits, obd_hash_max_bits,
+                                        obd_hash_bkt_bits, 0,
+                                        CFS_HASH_MIN_THETA, CFS_HASH_MAX_THETA,
+                                        &obd_uuid_hops,
+                                        CFS_HASH_DEFAULT | CFS_HASH_NO_ITEMREF);
+        if (obd_name_hash == NULL)
+                return -ENOMEM;
+
+        obd_minor_hash = cfs_hash_create("obd_minor",
+                                         obd_hash_init_bits, obd_hash_max_bits,
+                                         obd_hash_bkt_bits, 0,
+                                         CFS_HASH_MIN_THETA, CFS_HASH_MAX_THETA,
+                                         &obd_minor_hops,
+                                         CFS_HASH_DEFAULT | CFS_HASH_NO_ITEMREF);
+        if (obd_name_hash == NULL)
+                return -ENOMEM;
+
+        obd_minor_map = CFS_ALLOCATE_BITMAP(obd_minor_map_size);
+        if (obd_minor_map == NULL)
+                return -ENOMEM;
+
+        cfs_spin_lock_init(&obd_minor_lock);
+
+        return 0;
+}
+
+void obd_hashes_fini(void)
+{
+        if (obd_minor_map)
+               CFS_FREE_BITMAP(obd_minor_map);
+        cfs_hash_putref(obd_name_hash);
+        cfs_hash_putref(obd_uuid_hash);
+        cfs_hash_putref(obd_minor_hash);
+}
+
 /*
  * support functions: we could use inter-module communication, but this
  * is more portable to other OS's
@@ -256,6 +491,63 @@ int class_unregister_type(const char *name)
         RETURN(0);
 } /* class_unregister_type */
 
+const char *obd_dev_status(struct obd_device *obd)
+{
+        const char *status;
+
+        if (obd->obd_stopping)
+                status = "ST";
+        else if (obd->obd_inactive)
+                status = "IN";
+        else if (obd->obd_set_up)
+                status = "UP";
+        else if (obd->obd_attached)
+                status = "AT";
+        else
+                status = "--";
+
+        return status;
+}
+
+#define cfs_list_entry_safe(pos, head, type, member)  \
+        (pos == head ? NULL : cfs_list_entry(pos, type, member))
+
+void obd_devlist_first(struct obd_device **pos)
+{
+        struct obd_device *obd;
+
+        cfs_spin_lock(&obd_dev_lock);
+        obd = cfs_list_entry_safe(obd_dev_list.next, &obd_dev_list,
+                                  struct obd_device, obd_list);
+        if (obd != NULL)
+                class_incref(obd, "devlist", obd);
+        cfs_spin_unlock(&obd_dev_lock);
+
+        *pos = obd;
+}
+
+void obd_devlist_next(struct obd_device **pos)
+{
+        struct obd_device *obd = NULL;
+
+        cfs_spin_lock(&obd_dev_lock);
+        obd = cfs_list_entry_safe((*pos)->obd_list.next, &obd_dev_list,
+                                  struct obd_device, obd_list);
+        if (obd)
+                class_incref(obd, "devlist", obd);
+        cfs_spin_unlock(&obd_dev_lock);
+
+        class_decref(*pos, "devlist", *pos);
+        *pos = obd;
+}
+
+void obd_devlist_last(struct obd_device *pos)
+{
+        if (pos)
+                class_decref(pos,"devlist", pos);
+
+}
+
 /**
  * Create a new obd device.
  *
@@ -267,13 +559,11 @@ int class_unregister_type(const char *name)
  * \retval NULL if create fails, otherwise return the obd device
  *         pointer created.
  */
-struct obd_device *class_newdev(const char *type_name, const char *name)
+struct obd_device *class_newdev(const char *type_name, const char *name, const char *uuid)
 {
-        struct obd_device *result = NULL;
         struct obd_device *newdev;
-        struct obd_type *type = NULL;
-        int i;
-        int new_obd_minor = 0;
+        struct obd_type *type;
+        long ret;
 
         if (strlen(name) >= MAX_OBD_NAME) {
                 CERROR("name/uuid must be < %u bytes long\n", MAX_OBD_NAME);
@@ -283,62 +573,52 @@ struct obd_device *class_newdev(const char *type_name, const char *name)
         type = class_get_type(type_name);
         if (type == NULL){
                 CERROR("OBD: unknown type: %s\n", type_name);
-                RETURN(ERR_PTR(-ENODEV));
+                ret = -ENODEV;
+                goto error_type;
         }
 
         newdev = obd_device_alloc();
         if (newdev == NULL) {
-                class_put_type(type);
-                RETURN(ERR_PTR(-ENOMEM));
+                ret = -ENOMEM;
+                goto error_device;
         }
-        LASSERT(newdev->obd_magic == OBD_DEVICE_MAGIC);
 
-        cfs_spin_lock(&obd_dev_lock);
-        for (i = 0; i < class_devno_max(); i++) {
-                struct obd_device *obd = class_num2obd(i);
-                if (obd && obd->obd_name &&
-                    (strcmp(name, obd->obd_name) == 0)) {
-                        CERROR("Device %s already exists, won't add\n", name);
-                        if (result) {
-                                LASSERTF(result->obd_magic == OBD_DEVICE_MAGIC,
-                                         "%p obd_magic %08x != %08x\n", result,
-                                         result->obd_magic, OBD_DEVICE_MAGIC);
-                                LASSERTF(result->obd_minor == new_obd_minor,
-                                         "%p obd_minor %d != %d\n", result,
-                                         result->obd_minor, new_obd_minor);
-
-                                obd_devs[result->obd_minor] = NULL;
-                                result->obd_name[0]='\0';
-                         }
-                        result = ERR_PTR(-EEXIST);
-                        break;
-                }
-                if (!result && !obd) {
-                        result = newdev;
-                        result->obd_minor = i;
-                        new_obd_minor = i;
-                        result->obd_type = type;
-                        strncpy(result->obd_name, name,
-                                sizeof(result->obd_name) - 1);
-                        obd_devs[i] = result;
-                }
+        newdev->obd_minor = obd_minor_alloc();
+        if (newdev->obd_minor < 0) {
+                CERROR("don't have free minors\n");
+                ret = -ENODATA;
+                goto error_minor;
         }
-        cfs_spin_unlock(&obd_dev_lock);
 
-        if (result == NULL && i >= class_devno_max()) {
-                CERROR("all %u OBD devices used, increase MAX_OBD_DEVICES\n",
-                       class_devno_max());
-                result = ERR_PTR(-EOVERFLOW);
+        /* find add unique by name */
+        strncpy(newdev->obd_name, name, sizeof(newdev->obd_name) - 1);
+        if (cfs_hash_add_unique(obd_name_hash, name, &newdev->obd_name_node)) {
+                CERROR("fails to add an unique obddev (%s) to the hash\n",
+                       name);
+                ret = -EEXIST;
+                goto error_dup;
         }
+        newdev->obd_type = type;
 
-        if (IS_ERR(result)) {
-                obd_device_free(newdev);
-                class_put_type(type);
-        } else {
-                CDEBUG(D_IOCTL, "Adding new device %s (%p)\n",
-                       result->obd_name, result);
-        }
-        return result;
+        cfs_hash_add(obd_minor_hash, &newdev->obd_minor, &newdev->obd_minor_node);
+        memcpy(newdev->obd_uuid.uuid, uuid, strlen(uuid));
+        cfs_hash_add(obd_uuid_hash, uuid, &newdev->obd_uuid_node);
+
+        cfs_spin_lock(&obd_dev_lock);
+        cfs_list_add_tail(&newdev->obd_list, &obd_dev_list);
+        cfs_spin_unlock(&obd_dev_lock);
+
+        CDEBUG(D_IOCTL, "Adding new device %s (%p)\n",
+               newdev->obd_name, newdev);
+        RETURN(newdev);
+error_dup:
+       obd_minor_release(newdev->obd_minor);
+error_minor:
+        obd_device_free(newdev);
+error_device:
+        class_put_type(type);
+error_type:
+        RETURN(ERR_PTR(ret));
 }
 
 void class_release_dev(struct obd_device *obd)
@@ -347,16 +627,20 @@ void class_release_dev(struct obd_device *obd)
 
         LASSERTF(obd->obd_magic == OBD_DEVICE_MAGIC, "%p obd_magic %08x != %08x\n",
                  obd, obd->obd_magic, OBD_DEVICE_MAGIC);
-        LASSERTF(obd == obd_devs[obd->obd_minor], "obd %p != obd_devs[%d] %p\n",
-                 obd, obd->obd_minor, obd_devs[obd->obd_minor]);
         LASSERT(obd_type != NULL);
 
         CDEBUG(D_INFO, "Release obd device %s obd_type name =%s\n",
                obd->obd_name,obd->obd_type->typ_name);
 
+        cfs_hash_del(obd_name_hash, obd->obd_name, &obd->obd_name_node);
+        cfs_hash_del(obd_uuid_hash, &obd->obd_uuid, &obd->obd_uuid_node);
+        cfs_hash_del(obd_minor_hash, &obd->obd_minor, &obd->obd_minor_node);
+
         cfs_spin_lock(&obd_dev_lock);
-        obd_devs[obd->obd_minor] = NULL;
+        cfs_list_del(&obd->obd_list);
         cfs_spin_unlock(&obd_dev_lock);
+
+        obd_minor_release(obd->obd_minor);
         obd_device_free(obd);
 
         class_put_type(obd_type);
@@ -364,63 +648,28 @@ void class_release_dev(struct obd_device *obd)
 
 int class_name2dev(const char *name)
 {
-        int i;
-
-        if (!name)
-                return -1;
-
-        cfs_spin_lock(&obd_dev_lock);
-        for (i = 0; i < class_devno_max(); i++) {
-                struct obd_device *obd = class_num2obd(i);
-                if (obd && obd->obd_name && strcmp(name, obd->obd_name) == 0) {
-                        /* Make sure we finished attaching before we give
-                           out any references */
-                        LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
-                        if (obd->obd_attached) {
-                                cfs_spin_unlock(&obd_dev_lock);
-                                return i;
-                        }
-                        break;
-                }
-        }
-        cfs_spin_unlock(&obd_dev_lock);
+        struct obd_device *obd;
 
-        return -1;
+        obd = cfs_hash_lookup(obd_name_hash, name);
+        return obd != NULL ? obd->obd_minor : -1 ;
 }
 
 struct obd_device *class_name2obd(const char *name)
 {
-        int dev = class_name2dev(name);
-
-        if (dev < 0 || dev > class_devno_max())
-                return NULL;
-        return class_num2obd(dev);
+        return cfs_hash_lookup(obd_name_hash, name);
 }
 
 int class_uuid2dev(struct obd_uuid *uuid)
 {
-        int i;
-
-        cfs_spin_lock(&obd_dev_lock);
-        for (i = 0; i < class_devno_max(); i++) {
-                struct obd_device *obd = class_num2obd(i);
-                if (obd && obd_uuid_equals(uuid, &obd->obd_uuid)) {
-                        LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
-                        cfs_spin_unlock(&obd_dev_lock);
-                        return i;
-                }
-        }
-        cfs_spin_unlock(&obd_dev_lock);
+        struct obd_device *obd;
 
-        return -1;
+        obd = cfs_hash_lookup(obd_uuid_hash, uuid);
+        return obd != NULL ? obd->obd_minor : -1;
 }
 
 struct obd_device *class_uuid2obd(struct obd_uuid *uuid)
 {
-        int dev = class_uuid2dev(uuid);
-        if (dev < 0)
-                return NULL;
-        return class_num2obd(dev);
+        return  cfs_hash_lookup(obd_uuid_hash, uuid);
 }
 
 /**
@@ -431,50 +680,30 @@ struct obd_device *class_uuid2obd(struct obd_uuid *uuid)
  * \retval NULL if ::obd_devs[\a num] does not contains an obd device
  *         otherwise return the obd device there.
  */
-struct obd_device *class_num2obd(int num)
+struct obd_device *class_num2obd(__u32 minor)
 {
-        struct obd_device *obd = NULL;
+        struct obd_device *obd;
 
-        if (num < class_devno_max()) {
-                obd = obd_devs[num];
-                if (obd == NULL)
-                        return NULL;
-
-                LASSERTF(obd->obd_magic == OBD_DEVICE_MAGIC,
-                         "%p obd_magic %08x != %08x\n",
-                         obd, obd->obd_magic, OBD_DEVICE_MAGIC);
-                LASSERTF(obd->obd_minor == num,
-                         "%p obd_minor %0d != %0d\n",
-                         obd, obd->obd_minor, num);
-        }
+        obd = cfs_hash_lookup(obd_minor_hash, &minor);
 
         return obd;
 }
 
 void class_obd_list(void)
 {
-        char *status;
-        int i;
+        const char *status;
+        struct obd_device *obd;
 
-        cfs_spin_lock(&obd_dev_lock);
-        for (i = 0; i < class_devno_max(); i++) {
-                struct obd_device *obd = class_num2obd(i);
-                if (obd == NULL)
-                        continue;
-                if (obd->obd_stopping)
-                        status = "ST";
-                else if (obd->obd_set_up)
-                        status = "UP";
-                else if (obd->obd_attached)
-                        status = "AT";
-                else
-                        status = "--";
+        for (obd_devlist_first(&obd);
+             obd != NULL;
+             obd_devlist_next(&obd)) {
+
+                status = obd_dev_status(obd);
                 LCONSOLE(D_CONFIG, "%3d %s %s %s %s %d\n",
-                         i, status, obd->obd_type->typ_name,
+                         obd->obd_minor, status, obd->obd_type->typ_name,
                          obd->obd_name, obd->obd_uuid.uuid,
                          cfs_atomic_read(&obd->obd_refcount));
         }
-        cfs_spin_unlock(&obd_dev_lock);
         return;
 }
 
@@ -485,25 +714,23 @@ struct obd_device * class_find_client_obd(struct obd_uuid *tgt_uuid,
                                           const char * typ_name,
                                           struct obd_uuid *grp_uuid)
 {
-        int i;
+        struct obd_device *obd;
 
-        cfs_spin_lock(&obd_dev_lock);
-        for (i = 0; i < class_devno_max(); i++) {
-                struct obd_device *obd = class_num2obd(i);
-                if (obd == NULL)
-                        continue;
+        for (obd_devlist_first(&obd);
+             obd != NULL;
+             obd_devlist_next(&obd)) {
+                /* XXX per type list ? */
                 if ((strncmp(obd->obd_type->typ_name, typ_name,
                              strlen(typ_name)) == 0)) {
                         if (obd_uuid_equals(tgt_uuid,
                                             &obd->u.cli.cl_target_uuid) &&
                             ((grp_uuid)? obd_uuid_equals(grp_uuid,
                                                          &obd->obd_uuid) : 1)) {
-                                cfs_spin_unlock(&obd_dev_lock);
+                                obd_devlist_last(obd);
                                 return obd;
                         }
                 }
         }
-        cfs_spin_unlock(&obd_dev_lock);
 
         return NULL;
 }
@@ -512,30 +739,24 @@ struct obd_device * class_find_client_obd(struct obd_uuid *tgt_uuid,
    searching at *next, and if a device is found, the next index to look
    at is saved in *next. If next is NULL, then the first matching device
    will always be returned. */
-struct obd_device * class_devices_in_group(struct obd_uuid *grp_uuid, int *next)
+struct obd_device * class_devices_in_group(struct obd_uuid *grp_uuid,
+                                           struct obd_device **prev)
 {
-        int i;
+        struct obd_device *obd = *prev;
 
-        if (next == NULL)
-                i = 0;
-        else if (*next >= 0 && *next < class_devno_max())
-                i = *next;
+        if (obd == NULL)
+               obd_devlist_first(&obd);
         else
-                return NULL;
+               obd_devlist_next(&obd);
 
-        cfs_spin_lock(&obd_dev_lock);
-        for (; i < class_devno_max(); i++) {
-                struct obd_device *obd = class_num2obd(i);
-                if (obd == NULL)
-                        continue;
+
+        for (; obd != NULL; obd_devlist_next(&obd)) {
                 if (obd_uuid_equals(grp_uuid, &obd->obd_uuid)) {
-                        if (next != NULL)
-                                *next = i+1;
-                        cfs_spin_unlock(&obd_dev_lock);
+                        /* XXX return with reference */
+                        *prev = obd;
                         return obd;
                 }
         }
-        cfs_spin_unlock(&obd_dev_lock);
 
         return NULL;
 }
@@ -548,15 +769,14 @@ int class_notify_sptlrpc_conf(const char *fsname, int namelen)
 {
         struct obd_device  *obd;
         const char         *type;
-        int                 i, rc = 0, rc2;
+        int                 rc = 0, rc2;
 
         LASSERT(namelen > 0);
 
-        cfs_spin_lock(&obd_dev_lock);
-        for (i = 0; i < class_devno_max(); i++) {
-                obd = class_num2obd(i);
-
-                if (obd == NULL || obd->obd_set_up == 0 || obd->obd_stopping)
+        for (obd_devlist_first(&obd);
+             obd != NULL;
+             obd_devlist_next(&obd)) {
+                if (obd->obd_set_up == 0 || obd->obd_stopping)
                         continue;
 
                 /* only notify mdc, osc, mdt, ost */
@@ -570,16 +790,13 @@ int class_notify_sptlrpc_conf(const char *fsname, int namelen)
                 if (strncmp(obd->obd_name, fsname, namelen))
                         continue;
 
-                class_incref(obd, __FUNCTION__, obd);
-                cfs_spin_unlock(&obd_dev_lock);
+                /** XXX - some new obd can be added at that point */
                 rc2 = obd_set_info_async(obd->obd_self_export,
                                          sizeof(KEY_SPTLRPC_CONF),
                                          KEY_SPTLRPC_CONF, 0, NULL, NULL);
                 rc = rc ? rc : rc2;
-                class_decref(obd, __FUNCTION__, obd);
-                cfs_spin_lock(&obd_dev_lock);
         }
-        cfs_spin_unlock(&obd_dev_lock);
+
         return rc;
 }
 EXPORT_SYMBOL(class_notify_sptlrpc_conf);
index d893f16..fee81d0 100644 (file)
@@ -284,36 +284,28 @@ int obd_proc_read_pinger(char *page, char **start, off_t off, int count,
 static int obd_proc_read_health(char *page, char **start, off_t off,
                                 int count, int *eof, void *data)
 {
-        int rc = 0, i;
-        *eof = 1;
+        int rc = 0;
+        struct obd_device *obd;
 
+        *eof = 1;
         if (libcfs_catastrophe)
                 rc += snprintf(page + rc, count - rc, "LBUG\n");
 
-        cfs_spin_lock(&obd_dev_lock);
-        for (i = 0; i < class_devno_max(); i++) {
-                struct obd_device *obd;
-
-                obd = class_num2obd(i);
-                if (obd == NULL || !obd->obd_attached || !obd->obd_set_up)
+        for(obd_devlist_first(&obd);
+            obd != NULL;
+            obd_devlist_next(&obd)) {
+                if (obd->obd_attached || !obd->obd_set_up)
                         continue;
 
-                LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
                 if (obd->obd_stopping)
                         continue;
 
-                class_incref(obd, __FUNCTION__, cfs_current());
-                cfs_spin_unlock(&obd_dev_lock);
-
                 if (obd_health_check(obd)) {
                         rc += snprintf(page + rc, count - rc,
                                        "device %s reported unhealthy\n",
                                        obd->obd_name);
                 }
-                class_decref(obd, __FUNCTION__, cfs_current());
-                cfs_spin_lock(&obd_dev_lock);
         }
-        cfs_spin_unlock(&obd_dev_lock);
 
         if (rc == 0)
                 return snprintf(page, count, "healthy\n");
@@ -336,50 +328,43 @@ struct lprocfs_vars lprocfs_base[] = {
 #endif /* LPROCFS */
 
 #ifdef __KERNEL__
+
 static void *obd_device_list_seq_start(struct seq_file *p, loff_t *pos)
 {
-        if (*pos >= class_devno_max())
-                return NULL;
+        struct obd_device *obd;
+        loff_t i = *pos;
+
+        for(obd_devlist_first(&obd);
+            i != 0 && obd != NULL;
+            obd_devlist_next(&obd)) {
+                i --;
+        }
 
-        return pos;
+        return obd;
 }
 
 static void obd_device_list_seq_stop(struct seq_file *p, void *v)
 {
+        obd_devlist_last(v);
 }
 
 static void *obd_device_list_seq_next(struct seq_file *p, void *v, loff_t *pos)
 {
-        ++*pos;
-        if (*pos >= class_devno_max())
-                return NULL;
+        struct obd_device *obd = v;
 
-        return pos;
+        ++ *pos;
+        obd_devlist_next(&obd);
+        return obd;
 }
 
 static int obd_device_list_seq_show(struct seq_file *p, void *v)
 {
-        loff_t index = *(loff_t *)v;
-        struct obd_device *obd = class_num2obd((int)index);
-        char *status;
-
-        if (obd == NULL)
-                return 0;
-
-        LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
-        if (obd->obd_stopping)
-                status = "ST";
-        else if (obd->obd_inactive)
-                status = "IN";
-        else if (obd->obd_set_up)
-                status = "UP";
-        else if (obd->obd_attached)
-                status = "AT";
-        else
-                status = "--";
+        struct obd_device *obd = v;
+        const char *status;
 
+        status = obd_dev_status(obd);
         return seq_printf(p, "%3d %s %s %s %s %d\n",
-                          (int)index, status, obd->obd_type->typ_name,
+                          obd->obd_minor, status, obd->obd_type->typ_name,
                           obd->obd_name, obd->obd_uuid.uuid,
                           cfs_atomic_read(&obd->obd_refcount));
 }
index 2799869..6af0a5c 100644 (file)
@@ -267,7 +267,7 @@ int class_attach(struct lustre_cfg *lcfg)
 {
         struct obd_device *obd = NULL;
         char *typename, *name, *uuid;
-        int rc, len;
+        int rc;
         ENTRY;
 
         if (!LUSTRE_CFG_BUFLEN(lcfg, 1)) {
@@ -288,10 +288,16 @@ int class_attach(struct lustre_cfg *lcfg)
         }
         uuid = lustre_cfg_string(lcfg, 2);
 
+        if (strlen(uuid) >= sizeof(obd->obd_uuid)) {
+                CERROR("uuid must be < %d bytes long\n",
+                       (int)sizeof(obd->obd_uuid));
+                RETURN(-EINVAL);
+        }
+
         CDEBUG(D_IOCTL, "attach type %s name: %s uuid: %s\n",
                MKSTR(typename), MKSTR(name), MKSTR(uuid));
 
-        obd = class_newdev(typename, name);
+        obd = class_newdev(typename, name, uuid);
         if (IS_ERR(obd)) {
                 /* Already exists or out of obds */
                 rc = PTR_ERR(obd);
@@ -300,6 +306,7 @@ int class_attach(struct lustre_cfg *lcfg)
                        name, typename, rc);
                 GOTO(out, rc);
         }
+
         LASSERTF(obd != NULL, "Cannot get obd device %s of type %s\n",
                  name, typename);
         LASSERTF(obd->obd_magic == OBD_DEVICE_MAGIC,
@@ -339,14 +346,6 @@ int class_attach(struct lustre_cfg *lcfg)
 
         llog_group_init(&obd->obd_olg, FID_SEQ_LLOG);
 
-        len = strlen(uuid);
-        if (len >= sizeof(obd->obd_uuid)) {
-                CERROR("uuid must be < %d bytes long\n",
-                       (int)sizeof(obd->obd_uuid));
-                GOTO(out, rc = -EINVAL);
-        }
-        memcpy(obd->obd_uuid.uuid, uuid, len);
-
         /* do the attach */
         if (OBP(obd, attach)) {
                 rc = OBP(obd,attach)(obd, sizeof *lcfg, lcfg);
@@ -636,10 +635,7 @@ void class_decref(struct obd_device *obd, const char *scope, const void *source)
         int err;
         int refs;
 
-        cfs_spin_lock(&obd->obd_dev_lock);
-        cfs_atomic_dec(&obd->obd_refcount);
-        refs = cfs_atomic_read(&obd->obd_refcount);
-        cfs_spin_unlock(&obd->obd_dev_lock);
+        refs = cfs_atomic_dec_return(&obd->obd_refcount);
         lu_ref_del(&obd->obd_reference, scope, source);
 
         CDEBUG(D_INFO, "Decref %s (%p) now %d\n", obd->obd_name, obd, refs);