Whamcloud - gitweb
Land from b_hd_pid to HEAD
authorzhaoqiang <zhaoqiang>
Thu, 26 Aug 2004 07:34:53 +0000 (07:34 +0000)
committerzhaoqiang <zhaoqiang>
Thu, 26 Aug 2004 07:34:53 +0000 (07:34 +0000)
b=bug4165(pid)
r=Peter

- Assign the constant number to kernel portals pid,
  this constant is LUSTRE_SRV_PTL_PID(999999) defined
  in lustre_idl.h;
  In the user mode(liblustre), use the getpid()

- Modify the struct ptlrpc_peer's field peer_nid
  from ptl_nid_t to ptl_process_id_t, which include pid.
  Modify the related code with peer_nid.

- Change the snprintf remote_uuid.uuid to
  include pid info in target_handle_connect;

- In ksocknal_api_startup, use LUSTRE_SRV_PTL_PID to PtlNIInit.

- In tcpnal_init, b->lib_nal->libnal_ni.ni_pid.pid=0;
  This line should be deleted.

- In ptlrpc_get_connection, add the pid compare
  when iterate the conn_list & conn_unused_list

- Change request_in_callback while assign req->rq_peer.peer_id

- In ptlrpc_uuid_to_peer, give default value
  (LUSTRE_SRV_PTL_PID) to peer->peer_id.pid

- Change each entry to call PtlNIInit/PtlPut

- Change the entry to call PtlMEAttach,
  use c_peer value replace PTL_PID_ANY , it is the most important!

35 files changed:
lnet/include/linux/kp30.h
lnet/klnds/gmlnd/gmlnd_api.c
lnet/klnds/iblnd/ibnal.c
lnet/klnds/qswlnd/qswlnd.c
lnet/klnds/scimaclnd/scimacnal.c
lnet/klnds/socklnd/socklnd.c
lnet/klnds/socklnd/socklnd.h
lnet/libcfs/debug.c
lnet/lnet/lib-move.c
lnet/lnet/module.c
lnet/ulnds/socklnd/tcplnd.c
lnet/ulnds/tcplnd.c
lustre/include/linux/lustre_idl.h
lustre/include/linux/lustre_net.h
lustre/ldlm/ldlm_lib.c
lustre/ldlm/ldlm_lockd.c
lustre/liblustre/llite_lib.c
lustre/liblustre/tests/echo_test.c
lustre/obdfilter/filter.c
lustre/osc/osc_request.c
lustre/ost/ost_handler.c
lustre/portals/include/linux/kp30.h
lustre/portals/knals/gmnal/gmnal_api.c
lustre/portals/knals/ibnal/ibnal.c
lustre/portals/knals/qswnal/qswnal.c
lustre/portals/knals/scimacnal/scimacnal.c
lustre/portals/knals/socknal/socknal.c
lustre/portals/knals/socknal/socknal.h
lustre/portals/libcfs/debug.c
lustre/portals/portals/lib-move.c
lustre/portals/portals/module.c
lustre/portals/unals/tcpnal.c
lustre/ptlrpc/connection.c
lustre/ptlrpc/events.c
lustre/ptlrpc/niobuf.c

index 6ef28a8..b13f161 100644 (file)
@@ -326,6 +326,7 @@ void portals_debug_dumplog(void);
 
 /* support decl needed both by kernel and liblustre */
 char *portals_nid2str(int nal, ptl_nid_t nid, char *str);
+char *portals_id2str(int nal, ptl_process_id_t nid, char *str);
 
 #ifndef CURRENT_TIME
 # define CURRENT_TIME time(0)
@@ -644,7 +645,7 @@ enum {
         NAL_ENUM_END_MARKER
 };
 
-#define PTL_NALFMT_SIZE               26 /* %u:%u.%u.%u.%u (10+4+4+4+3+1) */
+#define PTL_NALFMT_SIZE              30 /* %u:%u.%u.%u.%u,%u (10+4+4+4+3+4+1) */
 
 #define NAL_MAX_NR (NAL_ENUM_END_MARKER - 1)
 
index 002587d..bdf5cfe 100644 (file)
@@ -318,7 +318,7 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
 /*
        pid = gm_getpid();
 */
-        process_id.pid = 0;
+        process_id.pid = requested_pid;
         process_id.nid = global_nid;
         
        CDEBUG(D_INFO, "portals_pid is [%u]\n", process_id.pid);
index 86c2a63..c6bd7ed 100644 (file)
@@ -246,7 +246,7 @@ kibnal_init(int             interface, // no use here
 
   rc = lib_init(&kibnal_lib, 
                 kibnal_data.kib_nid, 
-                0, // process id is set as 0  
+                requested_pid , // process id is set as requested_pid  instead of 0  
                 ptl_size, 
                 ac_size);
 
index c595450..38d1636 100644 (file)
@@ -696,7 +696,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
        /* Network interface ready to initialise */
 
        my_process_id.nid = kqswnal_elanid2nid(kqswnal_data.kqn_elanid);
-       my_process_id.pid = 0;
+       my_process_id.pid = requested_pid;
 
        rc = lib_init(&kqswnal_lib, nal, my_process_id,
                      requested_limits, actual_limits);
@@ -802,7 +802,7 @@ kqswnal_initialise (void)
        /* Pure gateways, and the workaround for 'EKC blocks forever until
         * the service is active' want the NAL started up at module load
         * time... */
-       rc = PtlNIInit(QSWNAL, 0, NULL, NULL, &kqswnal_ni);
+       rc = PtlNIInit(QSWNAL, LUSTRE_SRV_PTL_PID, NULL, NULL, &kqswnal_ni);
        if (rc != PTL_OK && rc != PTL_IFACE_DUP) {
                ptl_unregister_nal(QSWNAL);
                return (-ENODEV);
index e77bd8e..75188e9 100644 (file)
@@ -205,7 +205,7 @@ static int kscimacnal_startup(nal_t *nal, ptl_pid_t requested_pid,
         }
         kscimacnal_data.ksci_nid = (ptl_nid_t)(ntohl(mac_physaddr));
 
-        process_id.pid = 0;
+        process_id.pid = requested_pid;
         process_id.nid = kscimacnal_data.ksci_nid;
 
         CDEBUG(D_NET, "calling lib_init with nid "LPX64"\n",
index 9d39cb1..3a3629b 100644 (file)
@@ -1589,7 +1589,7 @@ ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
         }
 
         /* NB we have to wait to be told our true NID... */
-        process_id.pid = 0;
+        process_id.pid = requested_pid; //LUSTRE_SRV_PTL_PID; 
         process_id.nid = 0;
         
         rc = lib_init(&ksocknal_lib, nal, process_id,
@@ -1739,7 +1739,7 @@ ksocknal_module_init (void)
         }
 
         /* Pure gateways want the NAL started up at module load time... */
-        rc = PtlNIInit(SOCKNAL, 0, NULL, NULL, &ksocknal_ni);
+        rc = PtlNIInit(SOCKNAL, LUSTRE_SRV_PTL_PID, NULL, NULL, &ksocknal_ni);
         if (rc != PTL_OK && rc != PTL_IFACE_DUP) {
                 ptl_unregister_nal(SOCKNAL);
                 return (-ENODEV);
index ff73f71..e142059 100644 (file)
@@ -66,6 +66,7 @@
 #include <portals/lib-p30.h>
 #include <portals/nal.h>
 #include <portals/socknal.h>
+#include <linux/lustre_idl.h>
 
 #if CONFIG_SMP
 # define SOCKNAL_N_SCHED       num_online_cpus() /* # socknal schedulers */
index 2b2ce3b..eb75e60 100644 (file)
@@ -985,6 +985,30 @@ char *portals_nid2str(int nal, ptl_nid_t nid, char *str)
         }
         return str;
 }
+/*      bug #4615       */
+char *portals_id2str(int nal, ptl_process_id_t id, char *str)
+{
+        switch(nal){
+        case TCPNAL:
+                /* userspace NAL */
+        case SOCKNAL:
+                snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u.%u.%u.%u,%u",
+                         (__u32)(id.nid >> 32), HIPQUAD((id.nid)) , id.pid);
+                break;
+        case QSWNAL:
+        case GMNAL:
+        case IBNAL:
+                snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u,%u",
+                         (__u32)(id.nid >> 32), (__u32)id.nid, id.pid);
+                break;
+        default:
+                snprintf(str, PTL_NALFMT_SIZE - 1, "?%d? %llx,%lx",
+                         nal, (long long)id.nid, (long)id.pid );
+                break;
+        }
+        return str;
+}
+
 
 #ifdef __KERNEL__
 char stack_backtrace[LUSTRE_TRACE_SIZE];
@@ -1077,3 +1101,4 @@ EXPORT_SYMBOL(portals_debug_set_level);
 EXPORT_SYMBOL(portals_run_upcall);
 EXPORT_SYMBOL(portals_run_lbug_upcall);
 EXPORT_SYMBOL(portals_nid2str);
+EXPORT_SYMBOL(portals_id2str);
index 9dcc06e..854a452 100644 (file)
@@ -82,6 +82,8 @@ lib_match_md(lib_nal_t *nal, int index, int op_mask,
                 if (me->match_id.nid != PTL_NID_ANY &&
                     me->match_id.nid != src_nid)
                         continue;
+                
+                CDEBUG(D_NET,"match_id.pid [%x], src_pid [%x]\n", me->match_id.pid, src_pid);
 
                 if (me->match_id.pid != PTL_PID_ANY &&
                     me->match_id.pid != src_pid)
index 5615a72..d292a50 100644 (file)
@@ -50,6 +50,7 @@
 #include <linux/kp30.h>
 #include <linux/kpr.h>
 #include <linux/portals_compat25.h>
+#include <linux/lustre_idl.h>
 
 extern void (kping_client)(struct portal_ioctl_data *);
 
@@ -83,7 +84,7 @@ static int kportal_ioctl(struct portal_ioctl_data *data,
 
                 CDEBUG (D_IOCTL, "Getting nid for nal [%d]\n", data->ioc_nal);
 
-                err = PtlNIInit(data->ioc_nal, 0, NULL, NULL, &nih);
+                err = PtlNIInit(data->ioc_nal, LUSTRE_SRV_PTL_PID, NULL, NULL, &nih);
                 if (!(err == PTL_OK || err == PTL_IFACE_DUP))
                         RETURN (-EINVAL);
 
@@ -104,7 +105,7 @@ static int kportal_ioctl(struct portal_ioctl_data *data,
                 CDEBUG (D_IOCTL, "fail nid: [%d] "LPU64" count %d\n",
                         data->ioc_nal, data->ioc_nid, data->ioc_count);
 
-                err = PtlNIInit(data->ioc_nal, 0, NULL, NULL, &nih);
+                err = PtlNIInit(data->ioc_nal, LUSTRE_SRV_PTL_PID, NULL, NULL, &nih);
                 if (!(err == PTL_OK || err == PTL_IFACE_DUP))
                         return (-EINVAL);
 
index 34a9c9d..6e9cca9 100644 (file)
@@ -252,7 +252,7 @@ int tcpnal_init(bridge b)
         return(PTL_NAL_FAILED);
     }
     /* XXX cfs hack */
-    b->lib_nal->libnal_ni.ni_pid.pid=0;
+//    b->lib_nal->libnal_ni.ni_pid.pid=0;
     b->lower=m;
     return(PTL_OK);
 }
index 34a9c9d..6e9cca9 100644 (file)
@@ -252,7 +252,7 @@ int tcpnal_init(bridge b)
         return(PTL_NAL_FAILED);
     }
     /* XXX cfs hack */
-    b->lib_nal->libnal_ni.ni_pid.pid=0;
+//    b->lib_nal->libnal_ni.ni_pid.pid=0;
     b->lower=m;
     return(PTL_OK);
 }
index 8f5bd37..b69e1e1 100644 (file)
 #define LUSTRE_OST_VERSION  (0x00040000|PTLRPC_MSG_VERSION)
 #define LUSTRE_DLM_VERSION  (0x00040000|PTLRPC_MSG_VERSION)
 
+/* initial pid  */
+#define LUSTRE_PTL_PID          999999
+#define LUSTRE_SRV_PTL_PID      LUSTRE_PTL_PID    
+
 struct lustre_handle {
         __u64 cookie;
 };
index 03d9ab3..aa8309d 100644 (file)
 #define PTLBD_MAXREQSIZE 1024
 
 struct ptlrpc_peer {
-        ptl_nid_t         peer_nid;
+/*      bugfix #4615 
+ */
+        ptl_process_id_t  peer_id;      
         struct ptlrpc_ni *peer_ni;
 };
 
@@ -515,9 +517,17 @@ struct ptlrpc_service {
 static inline char *ptlrpc_peernid2str(struct ptlrpc_peer *p, char *str)
 {
         LASSERT(p->peer_ni != NULL);
-        return (portals_nid2str(p->peer_ni->pni_number, p->peer_nid, str));
+        return (portals_nid2str(p->peer_ni->pni_number, p->peer_id.nid, str));
+}
+
+/*      For bug #4615   */
+static inline char *ptlrpc_id2str(struct ptlrpc_peer *p, char *str)
+{
+        LASSERT(p->peer_ni != NULL);
+        return (portals_id2str(p->peer_ni->pni_number, p->peer_id, str));
 }
 
+
 /* ptlrpc/events.c */
 extern struct ptlrpc_ni ptlrpc_interfaces[];
 extern int              ptlrpc_ninterfaces;
@@ -539,6 +549,7 @@ int ptlrpc_put_connection(struct ptlrpc_connection *c);
 struct ptlrpc_connection *ptlrpc_connection_addref(struct ptlrpc_connection *);
 void ptlrpc_init_connection(void);
 void ptlrpc_cleanup_connection(void);
+extern ptl_pid_t ptl_get_pid(void);
 
 /* ptlrpc/niobuf.c */
 int ptlrpc_start_bulk_transfer(struct ptlrpc_bulk_desc *desc);
index fb70b65..8d41587 100644 (file)
@@ -416,11 +416,11 @@ int target_handle_connect(struct ptlrpc_request *req)
                 /* NB the casts only avoid compiler warnings */
         case 8:
                 snprintf(remote_uuid.uuid, sizeof remote_uuid,
-                         "NET_"LPX64"_UUID", (__u64)req->rq_peer.peer_nid);
+                         "NET_"LPX64"_UUID", (__u64)req->rq_peer.peer_id.nid);
                 break;
         case 4:
                 snprintf(remote_uuid.uuid, sizeof remote_uuid,
-                         "NET_%x_UUID", (__u32)req->rq_peer.peer_nid);
+                         "NET_%x_UUID", (__u32)req->rq_peer.peer_id.nid);
                 break;
         default:
                 LBUG();
index b4acd61..d1a60fe 100644 (file)
@@ -311,7 +311,7 @@ static void ldlm_failed_ast(struct ldlm_lock *lock, int rc,const char *ast_type)
 
         LDLM_ERROR(lock, "%s AST failed (%d): evicting client %s@%s NID "LPX64
                    " (%s)", ast_type, rc, lock->l_export->exp_client_uuid.uuid,
-                   conn->c_remote_uuid.uuid, conn->c_peer.peer_nid,
+                   conn->c_remote_uuid.uuid, conn->c_peer.peer_id.nid,
                    ptlrpc_peernid2str(&conn->c_peer, str));
         ptlrpc_fail_export(lock->l_export);
 }
index 16437bc..73f97da 100644 (file)
@@ -101,7 +101,29 @@ char *portals_nid2str(int nal, ptl_nid_t nid, char *str)
         }
         return str;
 }
-
+/*      bug #4615       */
+char *portals_id2str(int nal, ptl_process_id_t id, char *str)
+{
+        switch(nal){
+        case TCPNAL:
+                /* userspace NAL */
+        case SOCKNAL:
+                snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u.%u.%u.%u,%u",
+                         (__u32)(id.nid >> 32), HIPQUAD((id.nid)) , id.pid);
+                break;
+        case QSWNAL:
+        case GMNAL:
+        case IBNAL:
+                snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u,%u",
+                         (__u32)(id.nid >> 32), (__u32)id.nid, id.pid);
+                break;
+        default:
+                snprintf(str, PTL_NALFMT_SIZE - 1, "?%d? %llx,%lx",
+                         nal, (long long)id.nid, (long)id.pid );
+                break;
+        }
+        return str;
+}
 /*
  * random number generator stuff
  */
index 19fd83a..13157cd 100644 (file)
@@ -82,6 +82,29 @@ struct pingcli_args {
         int count;
         int size;
 };
+/*      bug #4615       */
+char *portals_id2str(int nal, ptl_process_id_t id, char *str)
+{
+        switch(nal){
+        case TCPNAL:
+                /* userspace NAL */
+        case SOCKNAL:
+                snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u.%u.%u.%u,%u",
+                         (__u32)(id.nid >> 32), HIPQUAD((id.nid)) , id.pid);
+                break;
+        case QSWNAL:
+        case GMNAL:
+        case IBNAL:
+                snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u,%u",
+                         (__u32)(id.nid >> 32), (__u32)id.nid, id.pid);
+                break;
+        default:
+                snprintf(str, PTL_NALFMT_SIZE - 1, "?%d? %llx,%lx",
+                         nal, (long long)id.nid, (long)id.pid );
+                break;
+        }
+        return str;
+}
 
 struct task_struct *current;
 
index 649e6fe..4faa1ef 100644 (file)
@@ -1626,9 +1626,9 @@ static int filter_connect_post(struct obd_export *exp)
 
         rc = llog_receptor_accept(ctxt, exp->exp_imp_reverse);
         portals_nid2str(exp->exp_connection->c_peer.peer_ni->pni_number,
-                        exp->exp_connection->c_peer.peer_nid, str);
+                        exp->exp_connection->c_peer.peer_id.nid, str);
         CDEBUG(D_OTHER, "%s: init llog ctxt for export "LPX64"/%s, group %d\n",
-               obd->obd_name, exp->exp_connection->c_peer.peer_nid,
+               obd->obd_name, exp->exp_connection->c_peer.peer_id.nid,
                str, fed->fed_group);
 
         RETURN(rc);
@@ -1685,11 +1685,11 @@ static int filter_connect(struct lustre_handle *conn, struct obd_device *obd,
         if (fed->fed_group != 0 && fed->fed_group != group) {
                 char str[PTL_NALFMT_SIZE];
                 portals_nid2str(exp->exp_connection->c_peer.peer_ni->pni_number,
-                                exp->exp_connection->c_peer.peer_nid, str);
+                                exp->exp_connection->c_peer.peer_id.nid, str);
                 CERROR("!!! This export (nid "LPX64"/%s) used object group %d "
                        "earlier; now it's trying to use group %d!  This could "
                        "be a bug in the MDS.  Tell CFS.\n",
-                       exp->exp_connection->c_peer.peer_nid, str,
+                       exp->exp_connection->c_peer.peer_id.nid, str,
                        fed->fed_group, group);
                 GOTO(cleanup, rc = -EPROTO);
         }
@@ -2392,9 +2392,9 @@ static int filter_create(struct obd_export *exp, struct obdo *oa,
 
         if (!(oa->o_valid & OBD_MD_FLGROUP) || group == 0) {
                 portals_nid2str(exp->exp_connection->c_peer.peer_ni->pni_number,
-                                exp->exp_connection->c_peer.peer_nid, str);
+                                exp->exp_connection->c_peer.peer_id.nid, str);
                 CERROR("!!! nid "LPX64"/%s sent invalid object group %d\n",
-                       exp->exp_connection->c_peer.peer_nid, str, group);
+                       exp->exp_connection->c_peer.peer_id.nid, str, group);
                 RETURN(-EINVAL);
         }
 
@@ -2410,11 +2410,11 @@ static int filter_create(struct obd_export *exp, struct obdo *oa,
         if (fed->fed_group != group && !recreate_objs &&
             !(oa->o_valid & OBD_MD_REINT)) {
                 portals_nid2str(exp->exp_connection->c_peer.peer_ni->pni_number,
-                                exp->exp_connection->c_peer.peer_nid, str);
+                                exp->exp_connection->c_peer.peer_id.nid, str);
                 CERROR("!!! This export (nid "LPX64"/%s) used object group %d "
                        "earlier; now it's trying to use group %d!  This could "
                        "be a bug in the MDS.  Tell CFS.\n",
-                       exp->exp_connection->c_peer.peer_nid, str,
+                       exp->exp_connection->c_peer.peer_id.nid, str,
                        fed->fed_group, group);
                 RETURN(-ENOTUNIQ);
         }
index ed75cfd..1f84b4b 100644 (file)
@@ -870,12 +870,12 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, struct obdo *oa,
                 if (server_cksum != cksum) {
                         CERROR("Bad checksum: server %x, client %x, server NID "
                                LPX64" (%s)\n", server_cksum, cksum,
-                               peer->peer_nid, str);
+                               peer->peer_id.nid, str);
                         cksum_counter = 0;
                         oa->o_cksum = cksum;
                 } else if ((cksum_counter & (-cksum_counter)) == cksum_counter){
                         CWARN("Checksum %u from "LPX64" (%s) OK: %x\n",
-                              cksum_counter, peer->peer_nid, str, cksum);
+                              cksum_counter, peer->peer_id.nid, str, cksum);
                 }
         } else {
                 static int cksum_missed;
@@ -884,7 +884,7 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, struct obdo *oa,
                 if ((cksum_missed & (-cksum_missed)) == cksum_missed)
                         CERROR("Request checksum %u from "LPX64", no reply\n",
                                cksum_missed,
-                               req->rq_import->imp_connection->c_peer.peer_nid);
+                               req->rq_import->imp_connection->c_peer.peer_id.nid);
         }
 #endif
         RETURN(0);
index 2271c6c..d96936e 100644 (file)
@@ -669,7 +669,7 @@ int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
                 if (client_cksum != cksum) {
                         CERROR("Bad checksum: client %x, server %x, client NID "
                                LPX64" (%s)\n", client_cksum, cksum,
-                               req->rq_connection->c_peer.peer_nid, str);
+                               req->rq_connection->c_peer.peer_id.nid, str);
                         cksum_counter = 1;
                         repbody->oa.o_cksum = cksum;
                 } else {
@@ -677,7 +677,7 @@ int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
                         if ((cksum_counter & (-cksum_counter)) == cksum_counter)
                                 CWARN("Checksum %u from "LPX64": %x OK\n",
                                       cksum_counter,
-                                      req->rq_connection->c_peer.peer_nid,
+                                      req->rq_connection->c_peer.peer_id.nid,
                                       cksum);
                 }
         }
index 6ef28a8..b13f161 100644 (file)
@@ -326,6 +326,7 @@ void portals_debug_dumplog(void);
 
 /* support decl needed both by kernel and liblustre */
 char *portals_nid2str(int nal, ptl_nid_t nid, char *str);
+char *portals_id2str(int nal, ptl_process_id_t nid, char *str);
 
 #ifndef CURRENT_TIME
 # define CURRENT_TIME time(0)
@@ -644,7 +645,7 @@ enum {
         NAL_ENUM_END_MARKER
 };
 
-#define PTL_NALFMT_SIZE               26 /* %u:%u.%u.%u.%u (10+4+4+4+3+1) */
+#define PTL_NALFMT_SIZE              30 /* %u:%u.%u.%u.%u,%u (10+4+4+4+3+4+1) */
 
 #define NAL_MAX_NR (NAL_ENUM_END_MARKER - 1)
 
index 002587d..bdf5cfe 100644 (file)
@@ -318,7 +318,7 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
 /*
        pid = gm_getpid();
 */
-        process_id.pid = 0;
+        process_id.pid = requested_pid;
         process_id.nid = global_nid;
         
        CDEBUG(D_INFO, "portals_pid is [%u]\n", process_id.pid);
index 86c2a63..c6bd7ed 100644 (file)
@@ -246,7 +246,7 @@ kibnal_init(int             interface, // no use here
 
   rc = lib_init(&kibnal_lib, 
                 kibnal_data.kib_nid, 
-                0, // process id is set as 0  
+                requested_pid , // process id is set as requested_pid  instead of 0  
                 ptl_size, 
                 ac_size);
 
index c595450..38d1636 100644 (file)
@@ -696,7 +696,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
        /* Network interface ready to initialise */
 
        my_process_id.nid = kqswnal_elanid2nid(kqswnal_data.kqn_elanid);
-       my_process_id.pid = 0;
+       my_process_id.pid = requested_pid;
 
        rc = lib_init(&kqswnal_lib, nal, my_process_id,
                      requested_limits, actual_limits);
@@ -802,7 +802,7 @@ kqswnal_initialise (void)
        /* Pure gateways, and the workaround for 'EKC blocks forever until
         * the service is active' want the NAL started up at module load
         * time... */
-       rc = PtlNIInit(QSWNAL, 0, NULL, NULL, &kqswnal_ni);
+       rc = PtlNIInit(QSWNAL, LUSTRE_SRV_PTL_PID, NULL, NULL, &kqswnal_ni);
        if (rc != PTL_OK && rc != PTL_IFACE_DUP) {
                ptl_unregister_nal(QSWNAL);
                return (-ENODEV);
index e77bd8e..75188e9 100644 (file)
@@ -205,7 +205,7 @@ static int kscimacnal_startup(nal_t *nal, ptl_pid_t requested_pid,
         }
         kscimacnal_data.ksci_nid = (ptl_nid_t)(ntohl(mac_physaddr));
 
-        process_id.pid = 0;
+        process_id.pid = requested_pid;
         process_id.nid = kscimacnal_data.ksci_nid;
 
         CDEBUG(D_NET, "calling lib_init with nid "LPX64"\n",
index 9d39cb1..3a3629b 100644 (file)
@@ -1589,7 +1589,7 @@ ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
         }
 
         /* NB we have to wait to be told our true NID... */
-        process_id.pid = 0;
+        process_id.pid = requested_pid; //LUSTRE_SRV_PTL_PID; 
         process_id.nid = 0;
         
         rc = lib_init(&ksocknal_lib, nal, process_id,
@@ -1739,7 +1739,7 @@ ksocknal_module_init (void)
         }
 
         /* Pure gateways want the NAL started up at module load time... */
-        rc = PtlNIInit(SOCKNAL, 0, NULL, NULL, &ksocknal_ni);
+        rc = PtlNIInit(SOCKNAL, LUSTRE_SRV_PTL_PID, NULL, NULL, &ksocknal_ni);
         if (rc != PTL_OK && rc != PTL_IFACE_DUP) {
                 ptl_unregister_nal(SOCKNAL);
                 return (-ENODEV);
index ff73f71..e142059 100644 (file)
@@ -66,6 +66,7 @@
 #include <portals/lib-p30.h>
 #include <portals/nal.h>
 #include <portals/socknal.h>
+#include <linux/lustre_idl.h>
 
 #if CONFIG_SMP
 # define SOCKNAL_N_SCHED       num_online_cpus() /* # socknal schedulers */
index 2b2ce3b..eb75e60 100644 (file)
@@ -985,6 +985,30 @@ char *portals_nid2str(int nal, ptl_nid_t nid, char *str)
         }
         return str;
 }
+/*      bug #4615       */
+char *portals_id2str(int nal, ptl_process_id_t id, char *str)
+{
+        switch(nal){
+        case TCPNAL:
+                /* userspace NAL */
+        case SOCKNAL:
+                snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u.%u.%u.%u,%u",
+                         (__u32)(id.nid >> 32), HIPQUAD((id.nid)) , id.pid);
+                break;
+        case QSWNAL:
+        case GMNAL:
+        case IBNAL:
+                snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u,%u",
+                         (__u32)(id.nid >> 32), (__u32)id.nid, id.pid);
+                break;
+        default:
+                snprintf(str, PTL_NALFMT_SIZE - 1, "?%d? %llx,%lx",
+                         nal, (long long)id.nid, (long)id.pid );
+                break;
+        }
+        return str;
+}
+
 
 #ifdef __KERNEL__
 char stack_backtrace[LUSTRE_TRACE_SIZE];
@@ -1077,3 +1101,4 @@ EXPORT_SYMBOL(portals_debug_set_level);
 EXPORT_SYMBOL(portals_run_upcall);
 EXPORT_SYMBOL(portals_run_lbug_upcall);
 EXPORT_SYMBOL(portals_nid2str);
+EXPORT_SYMBOL(portals_id2str);
index 9dcc06e..854a452 100644 (file)
@@ -82,6 +82,8 @@ lib_match_md(lib_nal_t *nal, int index, int op_mask,
                 if (me->match_id.nid != PTL_NID_ANY &&
                     me->match_id.nid != src_nid)
                         continue;
+                
+                CDEBUG(D_NET,"match_id.pid [%x], src_pid [%x]\n", me->match_id.pid, src_pid);
 
                 if (me->match_id.pid != PTL_PID_ANY &&
                     me->match_id.pid != src_pid)
index 5615a72..d292a50 100644 (file)
@@ -50,6 +50,7 @@
 #include <linux/kp30.h>
 #include <linux/kpr.h>
 #include <linux/portals_compat25.h>
+#include <linux/lustre_idl.h>
 
 extern void (kping_client)(struct portal_ioctl_data *);
 
@@ -83,7 +84,7 @@ static int kportal_ioctl(struct portal_ioctl_data *data,
 
                 CDEBUG (D_IOCTL, "Getting nid for nal [%d]\n", data->ioc_nal);
 
-                err = PtlNIInit(data->ioc_nal, 0, NULL, NULL, &nih);
+                err = PtlNIInit(data->ioc_nal, LUSTRE_SRV_PTL_PID, NULL, NULL, &nih);
                 if (!(err == PTL_OK || err == PTL_IFACE_DUP))
                         RETURN (-EINVAL);
 
@@ -104,7 +105,7 @@ static int kportal_ioctl(struct portal_ioctl_data *data,
                 CDEBUG (D_IOCTL, "fail nid: [%d] "LPU64" count %d\n",
                         data->ioc_nal, data->ioc_nid, data->ioc_count);
 
-                err = PtlNIInit(data->ioc_nal, 0, NULL, NULL, &nih);
+                err = PtlNIInit(data->ioc_nal, LUSTRE_SRV_PTL_PID, NULL, NULL, &nih);
                 if (!(err == PTL_OK || err == PTL_IFACE_DUP))
                         return (-EINVAL);
 
index 34a9c9d..6e9cca9 100644 (file)
@@ -252,7 +252,7 @@ int tcpnal_init(bridge b)
         return(PTL_NAL_FAILED);
     }
     /* XXX cfs hack */
-    b->lib_nal->libnal_ni.ni_pid.pid=0;
+//    b->lib_nal->libnal_ni.ni_pid.pid=0;
     b->lower=m;
     return(PTL_OK);
 }
index 2e0d889..646cb07 100644 (file)
@@ -62,12 +62,12 @@ struct ptlrpc_connection *ptlrpc_get_connection(struct ptlrpc_peer *peer,
 
 
         CDEBUG(D_INFO, "peer is %s on %s\n",
-               ptlrpc_peernid2str(peer, str), peer->peer_ni->pni_name);
+               ptlrpc_id2str(peer, str), peer->peer_ni->pni_name);
 
         spin_lock(&conn_lock);
         list_for_each(tmp, &conn_list) {
                 c = list_entry(tmp, struct ptlrpc_connection, c_link);
-                if (peer->peer_nid == c->c_peer.peer_nid &&
+                if (!memcmp(peer, &c->c_peer, sizeof(struct ptlrpc_peer)) &&
                     peer->peer_ni == c->c_peer.peer_ni) {
                         ptlrpc_connection_addref(c);
                         GOTO(out, c);
@@ -76,7 +76,7 @@ struct ptlrpc_connection *ptlrpc_get_connection(struct ptlrpc_peer *peer,
 
         list_for_each_safe(tmp, pos, &conn_unused_list) {
                 c = list_entry(tmp, struct ptlrpc_connection, c_link);
-                if (peer->peer_nid == c->c_peer.peer_nid &&
+                if (!memcmp(peer, &c->c_peer, sizeof(struct ptlrpc_peer)) &&
                     peer->peer_ni == c->c_peer.peer_ni) {
                         ptlrpc_connection_addref(c);
                         list_del(&c->c_link);
index 254ae30..8ea1813 100644 (file)
@@ -197,8 +197,8 @@ void request_in_callback(ptl_event_t *ev)
                         CERROR("Can't allocate incoming request descriptor: "
                                "Dropping %s RPC from %s\n",
                                service->srv_name, 
-                               portals_nid2str(srv_ni->sni_ni->pni_number,
-                                               ev->initiator.nid, str));
+                               portals_id2str(srv_ni->sni_ni->pni_number,
+                                               ev->initiator, str));
                         return;
                 }
         }
@@ -212,7 +212,7 @@ void request_in_callback(ptl_event_t *ev)
             ev->ni_fail_type == PTL_NI_OK)
                 req->rq_reqlen = ev->mlength;
         do_gettimeofday(&req->rq_arrival_time);
-        req->rq_peer.peer_nid = ev->initiator.nid;
+        req->rq_peer.peer_id = ev->initiator;
         req->rq_peer.peer_ni = rqbd->rqbd_srv_ni->sni_ni;
         req->rq_rqbd = rqbd;
 
@@ -361,7 +361,8 @@ int ptlrpc_uuid_to_peer (struct obd_uuid *uuid, struct ptlrpc_peer *peer)
                 pni = &ptlrpc_interfaces[i];
 
                 if (pni->pni_number == peer_nal) {
-                        peer->peer_nid = peer_nid;
+                        peer->peer_id.nid = peer_nid;
+                        peer->peer_id.pid = LUSTRE_SRV_PTL_PID; //#4165:only client will call this func.
                         peer->peer_ni = pni;
                         return (0);
                 }
@@ -409,20 +410,37 @@ void ptlrpc_ni_fini(struct ptlrpc_ni *pni)
         /* notreached */
 }
 
+ptl_pid_t ptl_get_pid(void)
+{
+        ptl_pid_t        pid;
+
+#ifndef  __KERNEL__
+        pid = getpid();
+#else
+        pid = LUSTRE_SRV_PTL_PID;
+#endif
+        return pid;
+}
+        
 int ptlrpc_ni_init(int number, char *name, struct ptlrpc_ni *pni)
 {
         int              rc;
         char             str[20];
         ptl_handle_ni_t  nih;
-
+        ptl_pid_t        pid;
+        
+        pid = ptl_get_pid();
+        
         /* We're not passing any limits yet... */
-        rc = PtlNIInit(number, 0, NULL, NULL, &nih);
+        rc = PtlNIInit(number, pid, NULL, NULL, &nih);
         if (rc != PTL_OK && rc != PTL_IFACE_DUP) {
                 CDEBUG (D_NET, "Can't init network interface %s: %d\n", 
                         name, rc);
                 return (-ENOENT);
         }
 
+        CDEBUG(D_NET, "My pid is: %x\n", ptl_get_pid());
+        
         PtlSnprintHandle(str, sizeof(str), nih);
         CDEBUG (D_NET, "init %d %s: %s\n", number, name, str);
 
index b628216..e8e4660 100644 (file)
@@ -34,7 +34,6 @@ static int ptl_send_buf (ptl_handle_md_t *mdh, void *base, int len,
                          ptl_ack_req_t ack, struct ptlrpc_cb_id *cbid,
                          struct ptlrpc_connection *conn, int portal, __u64 xid)
 {
-        ptl_process_id_t remote_id;
         int              rc;
         int              rc2;
         ptl_md_t         md;
@@ -43,14 +42,10 @@ static int ptl_send_buf (ptl_handle_md_t *mdh, void *base, int len,
 
         LASSERT (portal != 0);
         LASSERT (conn != NULL);
-        CDEBUG (D_INFO, "conn=%p ni %s nid %s on %s\n",
+        CDEBUG (D_INFO, "conn=%p ni %s id %s on %s\n",
                 conn, conn->c_peer.peer_ni->pni_name,
-                ptlrpc_peernid2str(&conn->c_peer, str),
+                ptlrpc_id2str(&conn->c_peer, str),
                 conn->c_peer.peer_ni->pni_name);
-
-        remote_id.nid = conn->c_peer.peer_nid,
-        remote_id.pid = 0;
-
         md.start     = base;
         md.length    = len;
         md.threshold = (ack == PTL_ACK_REQ) ? 2 : 1;
@@ -76,13 +71,13 @@ static int ptl_send_buf (ptl_handle_md_t *mdh, void *base, int len,
         CDEBUG(D_NET, "Sending %d bytes to portal %d, xid "LPD64"\n",
                len, portal, xid);
 
-        rc2 = PtlPut (*mdh, ack, remote_id, portal, 0, xid, 0, 0);
+        rc = PtlPut (*mdh, ack, conn->c_peer.peer_id, portal, 0, xid, 0, 0);
         if (rc != PTL_OK) {
                 /* We're going to get an UNLINK event when I unlink below,
                  * which will complete just like any other failed send, so
                  * I fall through and return success here! */
                 CERROR("PtlPut(%s, %d, "LPD64") failed: %d\n",
-                       ptlrpc_peernid2str(&conn->c_peer, str),
+                       ptlrpc_id2str(&conn->c_peer, str),
                        portal, xid, rc);
                 rc2 = PtlMDUnlink(*mdh);
                 LASSERT (rc2 == PTL_OK);
@@ -96,7 +91,6 @@ int ptlrpc_start_bulk_transfer (struct ptlrpc_bulk_desc *desc)
         int                 rc;
         int                 rc2;
         struct ptlrpc_peer *peer;
-        ptl_process_id_t    remote_id;
         ptl_md_t            md;
         __u64               xid;
         char                str[PTL_NALFMT_SIZE];
@@ -134,22 +128,19 @@ int ptlrpc_start_bulk_transfer (struct ptlrpc_bulk_desc *desc)
 
         /* Client's bulk and reply matchbits are the same */
         xid = desc->bd_req->rq_xid;
-        remote_id.nid = peer->peer_nid;
-        remote_id.pid = 0;
-
         CDEBUG(D_NET, "Transferring %u pages %u bytes via portal %d on %s "
                "nid %s pid %d xid "LPX64"\n", desc->bd_iov_count,
                desc->bd_nob, desc->bd_portal, peer->peer_ni->pni_name,
-               ptlrpc_peernid2str(peer, str), remote_id.pid, xid);
+               ptlrpc_id2str(peer, str), peer->peer_id.pid, xid);
 
         /* Network is about to get at the memory */
         desc->bd_network_rw = 1;
 
         if (desc->bd_type == BULK_PUT_SOURCE)
-                rc = PtlPut (desc->bd_md_h, PTL_ACK_REQ, remote_id,
+                rc = PtlPut (desc->bd_md_h, PTL_ACK_REQ, peer->peer_id,
                              desc->bd_portal, 0, xid, 0, 0);
         else
-                rc = PtlGet (desc->bd_md_h, remote_id,
+                rc = PtlGet (desc->bd_md_h, peer->peer_id,
                              desc->bd_portal, 0, xid, 0);
         
         if (rc != PTL_OK) {
@@ -157,7 +148,7 @@ int ptlrpc_start_bulk_transfer (struct ptlrpc_bulk_desc *desc)
                  * event this creates will signal completion with failure,
                  * so we return SUCCESS here! */
                 CERROR("Transfer(%s, %d, "LPX64") failed: %d\n",
-                       ptlrpc_peernid2str(peer, str),
+                       ptlrpc_id2str(peer, str),
                        desc->bd_portal, xid, rc);
                 rc2 = PtlMDUnlink(desc->bd_md_h);
                 LASSERT (rc2 == PTL_OK);
@@ -205,7 +196,6 @@ int ptlrpc_register_bulk (struct ptlrpc_request *req)
         struct ptlrpc_peer *peer;
         int rc;
         int rc2;
-        ptl_process_id_t source_id;
         ptl_handle_me_t  me_h;
         ptl_md_t         md;
         ENTRY;
@@ -242,13 +232,10 @@ int ptlrpc_register_bulk (struct ptlrpc_request *req)
         LASSERT (!desc->bd_registered || req->rq_xid != desc->bd_last_xid);
         desc->bd_registered = 1;
         desc->bd_last_xid = req->rq_xid;
-
-        source_id.nid = desc->bd_import->imp_connection->c_peer.peer_nid;
-        source_id.pid = PTL_PID_ANY;
-
+        
         rc = PtlMEAttach(peer->peer_ni->pni_ni_h,
-                         desc->bd_portal, source_id, req->rq_xid, 0,
-                         PTL_UNLINK, PTL_INS_AFTER, &me_h);
+                         desc->bd_portal, desc->bd_import->imp_connection->c_peer.peer_id, 
+                         req->rq_xid, 0, PTL_UNLINK, PTL_INS_AFTER, &me_h);
         if (rc != PTL_OK) {
                 CERROR("PtlMEAttach failed: %d\n", rc);
                 LASSERT (rc == PTL_NO_SPACE);
@@ -397,7 +384,6 @@ int ptl_send_rpc(struct ptlrpc_request *request)
         int rc2;
         struct ptlrpc_connection *connection;
         unsigned long flags;
-        ptl_process_id_t source_id;
         ptl_handle_me_t  reply_me_h;
         ptl_md_t         reply_md;
         ENTRY;
@@ -419,10 +405,7 @@ int ptl_send_rpc(struct ptlrpc_request *request)
         request->rq_reqmsg->handle = request->rq_import->imp_remote_handle;
         request->rq_reqmsg->type = PTL_RPC_MSG_REQUEST;
         request->rq_reqmsg->conn_cnt = request->rq_import->imp_conn_cnt;
-
-        source_id.nid = connection->c_peer.peer_nid;
-        source_id.pid = PTL_PID_ANY;
-
+                
         LASSERT (request->rq_replen != 0);
         if (request->rq_repmsg == NULL)
                 OBD_ALLOC(request->rq_repmsg, request->rq_replen);
@@ -431,7 +414,7 @@ int ptl_send_rpc(struct ptlrpc_request *request)
 
         rc = PtlMEAttach(connection->c_peer.peer_ni->pni_ni_h,
                          request->rq_reply_portal, /* XXX FIXME bug 249 */
-                         source_id, request->rq_xid, 0, PTL_UNLINK,
+                         connection->c_peer.peer_id, request->rq_xid, 0, PTL_UNLINK,
                          PTL_INS_AFTER, &reply_me_h);
         if (rc != PTL_OK) {
                 CERROR("PtlMEAttach failed: %d\n", rc);