-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
+/*
+ * GPL HEADER START
*
- * Copyright (C) 2002 Cluster File Systems, Inc.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
- * This file is part of Lustre, http://www.lustre.org.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
*
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
*
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
*
+ * Copyright (c) 2011, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
*/
#define DEBUG_SUBSYSTEM S_RPC
-#include <linux/obd_support.h>
-#include <linux/obd_class.h>
-#include <linux/lustre_net.h>
+#include <linux/delay.h>
+#include <libcfs/linux/linux-hash.h>
+#include <obd_support.h>
+#include <obd_class.h>
+#include <lustre_net.h>
-static spinlock_t conn_lock;
-static struct list_head conn_list;
-static struct list_head conn_unused_list;
+#include "ptlrpc_internal.h"
-/* If UUID is NULL, c->c_remote_uuid must be all zeroes
- * If UUID is non-NULL, c->c_remote_uuid must match. */
-static int match_connection_uuid(struct ptlrpc_connection *c, struct obd_uuid *uuid)
+static struct rhashtable conn_hash;
+
+/*
+ * struct lnet_process_id may contain unassigned bytes which might not
+ * be zero, so we cannot just hash and compare bytes.
+ */
+
+static u32 lnet_process_id_hash(const void *data, u32 len, u32 seed)
{
- struct obd_uuid zero_uuid;
- memset(&zero_uuid, 0, sizeof(zero_uuid));
+ const struct lnet_process_id *lpi = data;
- if (uuid)
- return memcmp(c->c_remote_uuid.uuid, uuid->uuid,
- sizeof(uuid->uuid));
+ seed = cfs_hash_32(seed ^ lpi->pid, 32);
+ seed ^= cfs_hash_64(lpi->nid, 32);
+ return seed;
+}
- return memcmp(c->c_remote_uuid.uuid, &zero_uuid, sizeof(zero_uuid));
+static int lnet_process_id_cmp(struct rhashtable_compare_arg *arg,
+ const void *obj)
+{
+ const struct lnet_process_id *lpi = arg->key;
+ const struct ptlrpc_connection *con = obj;
+
+ if (lpi->nid == con->c_peer.nid &&
+ lpi->pid == con->c_peer.pid)
+ return 0;
+ return -ESRCH;
+}
+
+static const struct rhashtable_params conn_hash_params = {
+ .key_len = 1, /* actually variable-length */
+ .key_offset = offsetof(struct ptlrpc_connection, c_peer),
+ .head_offset = offsetof(struct ptlrpc_connection, c_hash),
+ .hashfn = lnet_process_id_hash,
+ .obj_cmpfn = lnet_process_id_cmp,
+};
+
+struct ptlrpc_connection *
+ptlrpc_connection_get(struct lnet_process_id peer, lnet_nid_t self,
+ struct obd_uuid *uuid)
+{
+ struct ptlrpc_connection *conn, *conn2;
+ ENTRY;
+
+ peer.nid = LNetPrimaryNID(peer.nid);
+ conn = rhashtable_lookup_fast(&conn_hash, &peer, conn_hash_params);
+ if (conn) {
+ ptlrpc_connection_addref(conn);
+ GOTO(out, conn);
+ }
+
+ OBD_ALLOC_PTR(conn);
+ if (!conn)
+ RETURN(NULL);
+
+ conn->c_peer = peer;
+ conn->c_self = self;
+ atomic_set(&conn->c_refcount, 1);
+ if (uuid)
+ obd_str2uuid(&conn->c_remote_uuid, uuid->uuid);
+
+ /*
+ * Add the newly created conn to the hash, on key collision we
+ * lost a racing addition and must destroy our newly allocated
+ * connection. The object which exists in the hash will be
+ * returned,otherwise NULL is returned on success.
+ */
+try_again:
+ conn2 = rhashtable_lookup_get_insert_fast(&conn_hash, &conn->c_hash,
+ conn_hash_params);
+ if (conn2) {
+ /* insertion failed */
+ OBD_FREE_PTR(conn);
+ if (IS_ERR(conn2)) {
+ /* hash table could be resizing. */
+ if (PTR_ERR(conn2) == -ENOMEM ||
+ PTR_ERR(conn2) == -EBUSY) {
+ msleep(5);
+ goto try_again;
+ }
+ return NULL;
+ }
+ conn = conn2;
+ ptlrpc_connection_addref(conn);
+ }
+ EXIT;
+out:
+ CDEBUG(D_INFO, "conn=%p refcount %d to %s\n",
+ conn, atomic_read(&conn->c_refcount),
+ libcfs_nid2str(conn->c_peer.nid));
+ return conn;
}
-struct ptlrpc_connection *ptlrpc_get_connection(struct lustre_peer *peer,
- struct obd_uuid *uuid)
+int ptlrpc_connection_put(struct ptlrpc_connection *conn)
{
- struct list_head *tmp, *pos;
- struct ptlrpc_connection *c;
- ENTRY;
-
- CDEBUG(D_INFO, "peer is %08x %08lx %08lx\n",
- peer->peer_nid, peer->peer_ni.nal_idx, peer->peer_ni.handle_idx);
-
- spin_lock(&conn_lock);
- list_for_each(tmp, &conn_list) {
- c = list_entry(tmp, struct ptlrpc_connection, c_link);
- if (memcmp(peer, &c->c_peer, sizeof(*peer)) == 0 &&
- !match_connection_uuid(c, uuid)) {
- ptlrpc_connection_addref(c);
- GOTO(out, c);
- }
- }
-
- list_for_each_safe(tmp, pos, &conn_unused_list) {
- c = list_entry(tmp, struct ptlrpc_connection, c_link);
- if (memcmp(peer, &c->c_peer, sizeof(*peer)) == 0 &&
- !match_connection_uuid(c, uuid)) {
- ptlrpc_connection_addref(c);
- list_del(&c->c_link);
- list_add(&c->c_link, &conn_list);
- GOTO(out, c);
- }
- }
-
- /* FIXME: this should be a slab once we can validate slab addresses
- * without OOPSing */
- OBD_ALLOC(c, sizeof(*c));
- if (c == NULL)
- GOTO(out, c);
-
- c->c_generation = 1;
- c->c_epoch = 1;
- c->c_bootcount = 0;
- c->c_flags = 0;
- if (uuid->uuid)
- obd_str2uuid(&c->c_remote_uuid, uuid->uuid);
- INIT_LIST_HEAD(&c->c_imports);
- INIT_LIST_HEAD(&c->c_exports);
- INIT_LIST_HEAD(&c->c_sb_chain);
- INIT_LIST_HEAD(&c->c_recovd_data.rd_managed_chain);
- INIT_LIST_HEAD(&c->c_delayed_head);
- atomic_set(&c->c_refcount, 0);
- ptlrpc_connection_addref(c);
- spin_lock_init(&c->c_lock);
-
- memcpy(&c->c_peer, peer, sizeof(c->c_peer));
- list_add(&c->c_link, &conn_list);
-
- EXIT;
- out:
- spin_unlock(&conn_lock);
- return c;
+ int rc = 0;
+ ENTRY;
+
+ if (!conn)
+ RETURN(rc);
+
+ LASSERT(atomic_read(&conn->c_refcount) > 0);
+
+ /*
+ * We do not remove connection from hashtable and
+ * do not free it even if last caller released ref,
+ * as we want to have it cached for the case it is
+ * needed again.
+ *
+ * Deallocating it and later creating new connection
+ * again would be wastful. This way we also avoid
+ * expensive locking to protect things from get/put
+ * race when found cached connection is freed by
+ * ptlrpc_connection_put().
+ *
+ * It will be freed later in module unload time,
+ * when ptlrpc_connection_fini()->lh_exit->conn_exit()
+ * path is called.
+ */
+ if (atomic_dec_return(&conn->c_refcount) == 0)
+ rc = 1;
+
+ CDEBUG(D_INFO, "PUT conn=%p refcount %d to %s\n",
+ conn, atomic_read(&conn->c_refcount),
+ libcfs_nid2str(conn->c_peer.nid));
+
+ RETURN(rc);
}
-int ptlrpc_put_connection(struct ptlrpc_connection *c)
+struct ptlrpc_connection *
+ptlrpc_connection_addref(struct ptlrpc_connection *conn)
{
- int rc = 0;
- ENTRY;
-
- if (c == NULL) {
- CERROR("NULL connection\n");
- RETURN(0);
- }
-
- CDEBUG(D_INFO, "connection=%p refcount %d\n",
- c, atomic_read(&c->c_refcount) - 1);
- if (atomic_dec_and_test(&c->c_refcount)) {
- recovd_conn_unmanage(c);
- spin_lock(&conn_lock);
- list_del(&c->c_link);
- list_add(&c->c_link, &conn_unused_list);
- spin_unlock(&conn_lock);
- rc = 1;
- }
- if (atomic_read(&c->c_refcount) < 0)
- CERROR("connection %p refcount %d!\n",
- c, atomic_read(&c->c_refcount));
-
- RETURN(rc);
+ ENTRY;
+
+ atomic_inc(&conn->c_refcount);
+ CDEBUG(D_INFO, "conn=%p refcount %d to %s\n",
+ conn, atomic_read(&conn->c_refcount),
+ libcfs_nid2str(conn->c_peer.nid));
+
+ RETURN(conn);
}
-struct ptlrpc_connection *ptlrpc_connection_addref(struct ptlrpc_connection *c)
+static void
+conn_exit(void *vconn, void *data)
{
- ENTRY;
- CDEBUG(D_INFO, "connection=%p refcount %d\n",
- c, atomic_read(&c->c_refcount) + 1);
- atomic_inc(&c->c_refcount);
- RETURN(c);
+ struct ptlrpc_connection *conn = vconn;
+
+ /*
+ * Nothing should be left. Connection user put it and
+ * connection also was deleted from table by this time
+ * so we should have 0 refs.
+ */
+ LASSERTF(atomic_read(&conn->c_refcount) == 0,
+ "Busy connection with %d refs\n",
+ atomic_read(&conn->c_refcount));
+ OBD_FREE_PTR(conn);
}
-void ptlrpc_init_connection(void)
+int ptlrpc_connection_init(void)
{
- INIT_LIST_HEAD(&conn_list);
- INIT_LIST_HEAD(&conn_unused_list);
- conn_lock = SPIN_LOCK_UNLOCKED;
+ return rhashtable_init(&conn_hash, &conn_hash_params);
}
-void ptlrpc_cleanup_connection(void)
+void ptlrpc_connection_fini(void)
{
- struct list_head *tmp, *pos;
- struct ptlrpc_connection *c;
-
- spin_lock(&conn_lock);
- list_for_each_safe(tmp, pos, &conn_unused_list) {
- c = list_entry(tmp, struct ptlrpc_connection, c_link);
- list_del(&c->c_link);
- OBD_FREE(c, sizeof(*c));
- }
- list_for_each_safe(tmp, pos, &conn_list) {
- c = list_entry(tmp, struct ptlrpc_connection, c_link);
- CERROR("Connection %p/%s has refcount %d (nid=%lu)\n",
- c, c->c_remote_uuid.uuid, atomic_read(&c->c_refcount),
- (unsigned long)c->c_peer.peer_nid);
- list_del(&c->c_link);
- OBD_FREE(c, sizeof(*c));
- }
- spin_unlock(&conn_lock);
+ rhashtable_free_and_destroy(&conn_hash, conn_exit, NULL);
}