Whamcloud - gitweb
b=14132
[fs/lustre-release.git] / lnet / ulnds / socklnd / poll.c
index 9c346a7..2f15f1b 100644 (file)
@@ -1,28 +1,57 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- *   Author: Maxim Patlasov <maxim@clusterfs.com>
+ * GPL HEADER START
  *
- *   This file is part of the Lustre file system, http://www.lustre.org
- *   Lustre is a trademark of Cluster File Systems, Inc.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright  2008 Sun Microsystems, Inc. All rights reserved
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lnet/ulnds/socklnd/poll.c
+ *
+ * Author: Maxim Patlasov <maxim@clusterfs.com>
  */
 
 #include "usocklnd.h"
 #include <unistd.h>
-#include <syscall.h>
+#include <sys/syscall.h>
 
 void
 usocklnd_process_stale_list(usock_pollthread_t *pt_data)
 {
         while (!list_empty(&pt_data->upt_stale_list)) {
-                usock_conn_t *conn;                        
+                usock_conn_t *conn;
                 conn = list_entry(pt_data->upt_stale_list.next,
                                   usock_conn_t, uc_stale_list);
-                
+
                 list_del(&conn->uc_stale_list);
-                
+
                 usocklnd_tear_peer_conn(conn);
                 usocklnd_conn_decref(conn); /* -1 for idx2conn[idx] or pr */
         }
@@ -47,14 +76,14 @@ usocklnd_poll_thread(void *arg)
         sigset_t  sigs;
         sigfillset (&sigs);
         pthread_sigmask (SIG_SETMASK, &sigs, 0);
-        
+
         LASSERT(pt_data != NULL);
-        
+
         planned_time = cfs_time_shift(usock_tuns.ut_poll_timeout);
         chunk = usocklnd_calculate_chunk_size(pt_data->upt_nfds);
         saved_nfds = pt_data->upt_nfds;
         idx_start = 1;
-        
+
         /* Main loop */
         while (usock_data.ud_shutdown == 0) {
                 rc = 0;
@@ -65,11 +94,11 @@ usocklnd_poll_thread(void *arg)
                         usock_pollrequest_t *pr;
                         pr = list_entry(pt_data->upt_pollrequests.next,
                                         usock_pollrequest_t, upr_list);
-                        
+
                         list_del(&pr->upr_list);
                         rc = usocklnd_process_pollrequest(pr, pt_data);
                         if (rc)
-                                break;                        
+                                break;
                 }
                 pthread_mutex_unlock(&pt_data->upt_pollrequests_lock);
 
@@ -78,7 +107,7 @@ usocklnd_poll_thread(void *arg)
 
                 /* Delete conns orphaned due to POLL_DEL_REQUESTs */
                 usocklnd_process_stale_list(pt_data);
-                
+
                 /* Actual polling for events */
                 rc = poll(pt_data->upt_pollfd,
                           pt_data->upt_nfds,
@@ -106,7 +135,7 @@ usocklnd_poll_thread(void *arg)
                         extra = 0;
                 }
 
-                times = cfs_duration_sec(cfs_time_sub(current_time, planned_time)) + 1;                
+                times = cfs_duration_sec(cfs_time_sub(current_time, planned_time)) + 1;
                 idx_finish = MIN(idx_start + chunk*times + extra, pt_data->upt_nfds);
 
                 for (idx = idx_start; idx < idx_finish; idx++) {
@@ -120,7 +149,7 @@ usocklnd_poll_thread(void *arg)
                         pthread_mutex_unlock(&conn->uc_lock);
                 }
 
-                if (idx_finish == pt_data->upt_nfds) {                        
+                if (idx_finish == pt_data->upt_nfds) {
                         chunk = usocklnd_calculate_chunk_size(pt_data->upt_nfds);
                         saved_nfds = pt_data->upt_nfds;
                         idx_start = 1;
@@ -128,11 +157,11 @@ usocklnd_poll_thread(void *arg)
                 else {
                         idx_start = idx_finish;
                 }
-                
+
                 planned_time = cfs_time_add(current_time,
                                             cfs_time_seconds(usock_tuns.ut_poll_timeout));
         }
-        
+
         /* All conns should be deleted by POLL_DEL_REQUESTs while shutdown */
         LASSERT (rc != 0 || pt_data->upt_nfds == 1);
 
@@ -141,37 +170,37 @@ usocklnd_poll_thread(void *arg)
 
                 /* Block new poll requests to be enqueued */
                 pt_data->upt_errno = rc;
-                
+
                 while (!list_empty(&pt_data->upt_pollrequests)) {
                         usock_pollrequest_t *pr;
                         pr = list_entry(pt_data->upt_pollrequests.next,
                                         usock_pollrequest_t, upr_list);
-                        
+
                         list_del(&pr->upr_list);
 
                         if (pr->upr_type == POLL_ADD_REQUEST) {
-                                close(pr->upr_conn->uc_fd);
+                                libcfs_sock_release(pr->upr_conn->uc_sock);
                                 list_add_tail(&pr->upr_conn->uc_stale_list,
                                               &pt_data->upt_stale_list);
                         } else {
                                 usocklnd_conn_decref(pr->upr_conn);
                         }
-                        
+
                         LIBCFS_FREE (pr, sizeof(*pr));
                 }
                 pthread_mutex_unlock(&pt_data->upt_pollrequests_lock);
 
                 usocklnd_process_stale_list(pt_data);
-                
+
                 for (idx = 1; idx < pt_data->upt_nfds; idx++) {
                         usock_conn_t *conn = pt_data->upt_idx2conn[idx];
                         LASSERT(conn != NULL);
-                        close(conn->uc_fd);
+                        libcfs_sock_release(conn->uc_sock);
                         usocklnd_tear_peer_conn(conn);
                         usocklnd_conn_decref(conn);
                 }
         }
-        
+
         /* unblock usocklnd_shutdown() */
         cfs_complete(&pt_data->upt_completion);
 
@@ -197,7 +226,7 @@ usocklnd_add_pollrequest(usock_conn_t *conn, int type, short value)
         pr->upr_value = value;
 
         usocklnd_conn_addref(conn); /* +1 for poll request */
-        
+
         pthread_mutex_lock(&pt->upt_pollrequests_lock);
 
         if (pt->upt_errno) { /* very rare case: errored poll thread */
@@ -207,7 +236,7 @@ usocklnd_add_pollrequest(usock_conn_t *conn, int type, short value)
                 LIBCFS_FREE(pr, sizeof(*pr));
                 return rc;
         }
-        
+
         list_add_tail(&pr->upr_list, &pt->upt_pollrequests);
         pthread_mutex_unlock(&pt->upt_pollrequests_lock);
         return 0;
@@ -219,7 +248,7 @@ usocklnd_add_killrequest(usock_conn_t *conn)
         int                  pt_idx = conn->uc_pt_idx;
         usock_pollthread_t  *pt     = &usock_data.ud_pollthreads[pt_idx];
         usock_pollrequest_t *pr     = conn->uc_preq;
-        
+
         /* Use preallocated poll request because there is no good
          * workaround for ENOMEM error while killing connection */
         if (pr) {
@@ -228,9 +257,9 @@ usocklnd_add_killrequest(usock_conn_t *conn)
                 pr->upr_value = 0;
 
                 usocklnd_conn_addref(conn); /* +1 for poll request */
-                
+
                 pthread_mutex_lock(&pt->upt_pollrequests_lock);
-                
+
                 if (pt->upt_errno) { /* very rare case: errored poll thread */
                         pthread_mutex_unlock(&pt->upt_pollrequests_lock);
                         usocklnd_conn_decref(conn);
@@ -258,16 +287,17 @@ usocklnd_process_pollrequest(usock_pollrequest_t *pr,
         int           *fd2idx   = pt_data->upt_fd2idx;
         usock_conn_t **idx2conn = pt_data->upt_idx2conn;
         int           *skip     = pt_data->upt_skip;
-        
+
         LASSERT(conn != NULL);
-        LASSERT(conn->uc_fd >=0);
+        LASSERT(conn->uc_sock != NULL);
         LASSERT(type == POLL_ADD_REQUEST ||
-                conn->uc_fd < pt_data->upt_nfd2idx);
+                LIBCFS_SOCK2FD(conn->uc_sock) < pt_data->upt_nfd2idx);
 
         if (type != POLL_ADD_REQUEST) {
-                idx = fd2idx[conn->uc_fd];
+                idx = fd2idx[LIBCFS_SOCK2FD(conn->uc_sock)];
                 if (idx > 0 && idx < pt_data->upt_nfds) { /* hot path */
-                        LASSERT(pollfd[idx].fd == conn->uc_fd);
+                        LASSERT(pollfd[idx].fd ==
+                                LIBCFS_SOCK2FD(conn->uc_sock));
                 } else { /* unlikely */
                         CWARN("Very unlikely event happend: trying to"
                               " handle poll request of type %d but idx=%d"
@@ -283,7 +313,7 @@ usocklnd_process_pollrequest(usock_pollrequest_t *pr,
         }
 
         LIBCFS_FREE (pr, sizeof(*pr));
-        
+
         switch (type) {
         case POLL_ADD_REQUEST:
                 if (pt_data->upt_nfds >= pt_data->upt_npollfd) {
@@ -298,7 +328,7 @@ usocklnd_process_pollrequest(usock_pollrequest_t *pr,
                         if (new_pollfd == NULL)
                                 goto process_pollrequest_enomem;
                         pt_data->upt_pollfd = pollfd = new_pollfd;
-                        
+
                         new_idx2conn = LIBCFS_REALLOC(idx2conn, new_npollfd *
                                                       sizeof(usock_conn_t *));
                         if (new_idx2conn == NULL)
@@ -310,16 +340,16 @@ usocklnd_process_pollrequest(usock_pollrequest_t *pr,
                         if (new_skip == NULL)
                                 goto process_pollrequest_enomem;
                         pt_data->upt_skip = new_skip;
-                        
+
                         pt_data->upt_npollfd = new_npollfd;
                 }
 
-                if (conn->uc_fd >= pt_data->upt_nfd2idx) {
+                if (LIBCFS_SOCK2FD(conn->uc_sock) >= pt_data->upt_nfd2idx) {
                         /* resize fd2idx[] */
                         int *new_fd2idx;
                         int  new_nfd2idx = pt_data->upt_nfd2idx * 2;
 
-                        while (new_nfd2idx <= conn->uc_fd)
+                        while (new_nfd2idx <= LIBCFS_SOCK2FD(conn->uc_sock))
                                 new_nfd2idx *= 2;
 
                         new_fd2idx = LIBCFS_REALLOC(fd2idx, new_nfd2idx *
@@ -334,29 +364,29 @@ usocklnd_process_pollrequest(usock_pollrequest_t *pr,
                         pt_data->upt_nfd2idx = new_nfd2idx;
                 }
 
-                LASSERT(fd2idx[conn->uc_fd] == 0);
+                LASSERT(fd2idx[LIBCFS_SOCK2FD(conn->uc_sock)] == 0);
 
                 idx = pt_data->upt_nfds++;
                 idx2conn[idx] = conn;
-                fd2idx[conn->uc_fd] = idx;
+                fd2idx[LIBCFS_SOCK2FD(conn->uc_sock)] = idx;
 
-                pollfd[idx].fd = conn->uc_fd;
+                pollfd[idx].fd = LIBCFS_SOCK2FD(conn->uc_sock);
                 pollfd[idx].events = value;
                 pollfd[idx].revents = 0;
                 break;
         case POLL_DEL_REQUEST:
-                fd2idx[conn->uc_fd] = 0; /* invalidate this entry */
-                
+                fd2idx[LIBCFS_SOCK2FD(conn->uc_sock)] = 0; /* invalidate this
+                                                            * entry */
                 --pt_data->upt_nfds;
                 if (idx != pt_data->upt_nfds) {
                         /* shift last entry into released position */
                         memcpy(&pollfd[idx], &pollfd[pt_data->upt_nfds],
                                sizeof(struct pollfd));
                         idx2conn[idx] = idx2conn[pt_data->upt_nfds];
-                        fd2idx[pollfd[idx].fd] = idx;                        
+                        fd2idx[pollfd[idx].fd] = idx;
                 }
 
-                close(conn->uc_fd);
+                libcfs_sock_release(conn->uc_sock);
                 list_add_tail(&conn->uc_stale_list, &pt_data->upt_stale_list);
                 break;
         case POLL_RX_SET_REQUEST:
@@ -369,14 +399,14 @@ usocklnd_process_pollrequest(usock_pollrequest_t *pr,
                 pollfd[idx].events = value;
                 break;
         default:
-                LBUG(); /* unknown type */                
+                LBUG(); /* unknown type */
         }
 
         /* In the case of POLL_ADD_REQUEST, idx2conn[idx] takes the
          * reference that poll request possesses */
         if (type != POLL_ADD_REQUEST)
                 usocklnd_conn_decref(conn);
-        
+
         return 0;
 
   process_pollrequest_enomem:
@@ -404,14 +434,14 @@ usocklnd_execute_handlers(usock_pollthread_t *pt_data)
         for (j = 0; j < usock_tuns.ut_fair_limit; j++) {
                 int prev = 0;
                 int i = skip[0];
-                
+
                 if (i >= nfds) /* nothing ready */
                         break;
-                
+
                 do {
                         usock_conn_t *conn = idx2conn[i];
                         int next;
-                        
+
                         if (j == 0) /* first pass... */
                                 next = skip[i] = i+1; /* set skip chain */
                         else /* later passes... */
@@ -427,20 +457,20 @@ usocklnd_execute_handlers(usock_pollthread_t *pt_data)
                                 else
                                         usocklnd_exception_handler(conn);
                         }
-                        
+
                         if ((pollfd[i].revents & POLLIN) != 0 &&
                             usocklnd_read_handler(conn) <= 0)
                                 pollfd[i].revents &= ~POLLIN;
-                        
+
                         if ((pollfd[i].revents & POLLOUT) != 0 &&
                             usocklnd_write_handler(conn) <= 0)
                                 pollfd[i].revents &= ~POLLOUT;
-                        
+
                         if ((pollfd[i].revents & (POLLIN | POLLOUT)) == 0)
                                 skip[prev] = next; /* skip this entry next pass */
                         else
                                 prev = i;
-                        
+
                         i = next;
                 } while (i < nfds);
         }
@@ -452,14 +482,14 @@ usocklnd_calculate_chunk_size(int num)
         const int n     = 4;
         const int p     = usock_tuns.ut_poll_timeout;
         int       chunk = num;
-        
+
         /* chunk should be big enough to detect a timeout on any
          * connection within (n+1)/n times the timeout interval
          * if we checks every 'p' seconds 'chunk' conns */
-                 
+
         if (usock_tuns.ut_timeout > n * p)
                 chunk = (chunk * n * p) / usock_tuns.ut_timeout;
-        
+
         if (chunk == 0)
                 chunk = 1;
 
@@ -473,8 +503,8 @@ usocklnd_wakeup_pollthread(int i)
         int                 notification = 0;
         int                 rc;
 
-        rc = syscall(SYS_write, pt->upt_notifier_fd, &notification,
-                     sizeof(notification));
+        rc = syscall(SYS_write, LIBCFS_SOCK2FD(pt->upt_notifier[0]),
+                     &notification, sizeof(notification));
 
         if (rc != sizeof(notification))
                 CERROR("Very unlikely event happend: "