*/
int cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len);
/**
+ * print distance information of cpt-table
+ */
+int cfs_cpt_distance_print(struct cfs_cpt_table *cptab, char *buf, int len);
+/**
* return total number of CPU partitions in \a cptab
*/
int
*/
int cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu);
/**
+ * shadow HW node ID \a NODE to CPU-partition ID by \a cptab
+ */
+int cfs_cpt_of_node(struct cfs_cpt_table *cptab, int node);
+/**
+ * NUMA distance between \a cpt1 and \a cpt2 in \a cptab
+ */
+unsigned cfs_cpt_distance(struct cfs_cpt_table *cptab, int cpt1, int cpt2);
+/**
* bind current thread on a CPU-partition \a cpt of \a cptab
*/
int cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt);
* tools which might be accessing the IOCTL numbers, a new group of IOCTL
* number have been allocated.
*/
-#define IOCTL_CONFIG_SIZE struct lnet_ioctl_config_data
-#define IOC_LIBCFS_ADD_ROUTE _IOWR(IOC_LIBCFS_TYPE, 81, IOCTL_CONFIG_SIZE)
-#define IOC_LIBCFS_DEL_ROUTE _IOWR(IOC_LIBCFS_TYPE, 82, IOCTL_CONFIG_SIZE)
-#define IOC_LIBCFS_GET_ROUTE _IOWR(IOC_LIBCFS_TYPE, 83, IOCTL_CONFIG_SIZE)
-#define IOC_LIBCFS_ADD_NET _IOWR(IOC_LIBCFS_TYPE, 84, IOCTL_CONFIG_SIZE)
-#define IOC_LIBCFS_DEL_NET _IOWR(IOC_LIBCFS_TYPE, 85, IOCTL_CONFIG_SIZE)
-#define IOC_LIBCFS_GET_NET _IOWR(IOC_LIBCFS_TYPE, 86, IOCTL_CONFIG_SIZE)
-#define IOC_LIBCFS_CONFIG_RTR _IOWR(IOC_LIBCFS_TYPE, 87, IOCTL_CONFIG_SIZE)
-#define IOC_LIBCFS_ADD_BUF _IOWR(IOC_LIBCFS_TYPE, 88, IOCTL_CONFIG_SIZE)
-#define IOC_LIBCFS_GET_BUF _IOWR(IOC_LIBCFS_TYPE, 89, IOCTL_CONFIG_SIZE)
-#define IOC_LIBCFS_GET_PEER_INFO _IOWR(IOC_LIBCFS_TYPE, 90, IOCTL_CONFIG_SIZE)
-#define IOC_LIBCFS_GET_LNET_STATS _IOWR(IOC_LIBCFS_TYPE, 91, IOCTL_CONFIG_SIZE)
-#define IOC_LIBCFS_MAX_NR 91
+#define IOCTL_CONFIG_SIZE struct lnet_ioctl_config_data
+#define IOC_LIBCFS_ADD_ROUTE _IOWR(IOC_LIBCFS_TYPE, 81, IOCTL_CONFIG_SIZE)
+#define IOC_LIBCFS_DEL_ROUTE _IOWR(IOC_LIBCFS_TYPE, 82, IOCTL_CONFIG_SIZE)
+#define IOC_LIBCFS_GET_ROUTE _IOWR(IOC_LIBCFS_TYPE, 83, IOCTL_CONFIG_SIZE)
+#define IOC_LIBCFS_ADD_NET _IOWR(IOC_LIBCFS_TYPE, 84, IOCTL_CONFIG_SIZE)
+#define IOC_LIBCFS_DEL_NET _IOWR(IOC_LIBCFS_TYPE, 85, IOCTL_CONFIG_SIZE)
+#define IOC_LIBCFS_GET_NET _IOWR(IOC_LIBCFS_TYPE, 86, IOCTL_CONFIG_SIZE)
+#define IOC_LIBCFS_CONFIG_RTR _IOWR(IOC_LIBCFS_TYPE, 87, IOCTL_CONFIG_SIZE)
+#define IOC_LIBCFS_ADD_BUF _IOWR(IOC_LIBCFS_TYPE, 88, IOCTL_CONFIG_SIZE)
+#define IOC_LIBCFS_GET_BUF _IOWR(IOC_LIBCFS_TYPE, 89, IOCTL_CONFIG_SIZE)
+#define IOC_LIBCFS_GET_PEER_INFO _IOWR(IOC_LIBCFS_TYPE, 90, IOCTL_CONFIG_SIZE)
+#define IOC_LIBCFS_GET_LNET_STATS _IOWR(IOC_LIBCFS_TYPE, 91, IOCTL_CONFIG_SIZE)
+#define IOC_LIBCFS_ADD_PEER_NI _IOWR(IOC_LIBCFS_TYPE, 92, IOCTL_CONFIG_SIZE)
+#define IOC_LIBCFS_DEL_PEER_NI _IOWR(IOC_LIBCFS_TYPE, 93, IOCTL_CONFIG_SIZE)
+#define IOC_LIBCFS_GET_PEER_NI _IOWR(IOC_LIBCFS_TYPE, 94, IOCTL_CONFIG_SIZE)
+#define IOC_LIBCFS_ADD_LOCAL_NI _IOWR(IOC_LIBCFS_TYPE, 95, IOCTL_CONFIG_SIZE)
+#define IOC_LIBCFS_DEL_LOCAL_NI _IOWR(IOC_LIBCFS_TYPE, 96, IOCTL_CONFIG_SIZE)
+#define IOC_LIBCFS_GET_LOCAL_NI _IOWR(IOC_LIBCFS_TYPE, 97, IOCTL_CONFIG_SIZE)
+#define IOC_LIBCFS_SET_NUMA_RANGE _IOWR(IOC_LIBCFS_TYPE, 98, IOCTL_CONFIG_SIZE)
+#define IOC_LIBCFS_GET_NUMA_RANGE _IOWR(IOC_LIBCFS_TYPE, 99, IOCTL_CONFIG_SIZE)
+#define IOC_LIBCFS_DBG _IOWR(IOC_LIBCFS_TYPE, 100, IOCTL_CONFIG_SIZE)
+#define IOC_LIBCFS_MAX_NR 100
+
+extern int libcfs_ioctl_data_adjust(struct libcfs_ioctl_data *data);
#endif /* __LIBCFS_IOCTL_H__ */
struct cfs_expr_list *expr_list);
int cfs_expr_list_values(struct cfs_expr_list *expr_list,
int max, __u32 **values);
-static inline void
-cfs_expr_list_values_free(__u32 *values, int num)
-{
- /* This array is allocated by LIBCFS_ALLOC(), so it shouldn't be freed
- * by OBD_FREE() if it's called by module other than libcfs & LNet,
- * otherwise we will see fake memory leak */
- LIBCFS_FREE(values, num * sizeof(values[0]));
-}
-
+void cfs_expr_list_values_free(__u32 *values, int num);
void cfs_expr_list_free(struct cfs_expr_list *expr_list);
int cfs_expr_list_parse(char *str, int len, unsigned min, unsigned max,
struct cfs_expr_list **elpp);
cpumask_t *cpt_cpumask;
/* nodes mask for this partition */
nodemask_t *cpt_nodemask;
+ /* NUMA distance between CPTs */
+ unsigned *cpt_distance;
/* spread rotor for NUMA allocator */
unsigned cpt_spread_rotor;
};
struct cfs_cpt_table {
/* spread rotor for NUMA allocator */
unsigned ctb_spread_rotor;
+ /* maximum NUMA distance between all nodes in table */
+ unsigned ctb_distance;
/* # of CPU partitions */
unsigned ctb_nparts;
/* partitions tables */
int *ctb_cpu2cpt;
/* all cpus in this partition table */
cpumask_t *ctb_cpumask;
+ /* shadow HW node to CPU partition ID */
+ int *ctb_node2cpt;
/* all nodes in this partition table */
nodemask_t *ctb_nodemask;
};
struct list_head el_exprs;
};
+int cfs_expr_list_values(struct cfs_expr_list *expr_list, int max, __u32 **valpp);
int cfs_gettok(struct cfs_lstr *next, char delim, struct cfs_lstr *res);
int cfs_str2num_check(char *str, int nob, unsigned *num,
unsigned min, unsigned max);
struct cfs_expr_list *expr_list);
int cfs_expr_list_parse(char *str, int len, unsigned min, unsigned max,
struct cfs_expr_list **elpp);
+void cfs_expr_list_free(struct cfs_expr_list *expr_list);
void cfs_expr_list_free_list(struct list_head *list);
int cfs_ip_addr_parse(char *str, int len, struct list_head *list);
int cfs_ip_addr_match(__u32 addr, struct list_head *list);
#define CFS_CPU_VERSION_MAGIC 0xbabecafe
+#define CFS_CPT_DISTANCE 1 /* Arbitrary positive value */
+
struct cfs_cpt_table *
cfs_cpt_table_alloc(unsigned int ncpt)
{
LIBCFS_ALLOC(cptab, sizeof(*cptab));
if (cptab != NULL) {
cptab->ctb_version = CFS_CPU_VERSION_MAGIC;
+ cpu_set(0, cptab->ctb_cpumask);
node_set(0, cptab->ctb_nodemask);
cptab->ctb_nparts = ncpt;
}
EXPORT_SYMBOL(cfs_cpt_table_print);
int
+cfs_cpt_distance_print(struct cfs_cpt_table *cptab, char *buf, int len)
+{
+ int rc = 0;
+
+ rc = snprintf(buf, len, "%d\t: %d:%d\n", 0, CFS_CPT_DISTANCE);
+ len -= rc;
+ if (len <= 0)
+ return -EFBIG;
+
+ return rc;
+}
+EXPORT_SYMBOL(cfs_cpt_distance_print);
+
+int
cfs_cpt_number(struct cfs_cpt_table *cptab)
{
return 1;
}
EXPORT_SYMBOL(cfs_cpt_online);
+cpumask_t *
+cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt)
+{
+ return &cptab->ctb_mask;
+}
+EXPORT_SYMBOL(cfs_cpt_cpumask);
+
nodemask_t *
cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt)
{
return &cptab->ctb_nodemask;
}
-EXPORT_SYMBOL(cfs_cpt_cpumask);
+EXPORT_SYMBOL(cfs_cpt_nodemask);
+
+unsigned
+cfs_cpt_distance(struct cfs_cpt_table *cptab, int cpt1, int cpt2)
+{
+ return CFS_CPT_DISTANCE;
+}
+EXPORT_SYMBOL(cfs_cpt_distance);
int
cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
EXPORT_SYMBOL(cfs_cpt_of_cpu);
int
+cfs_cpt_of_node(struct cfs_cpt_table *cptab, int node)
+{
+ return 0;
+}
+EXPORT_SYMBOL(cfs_cpt_of_node);
+
+int
cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt)
{
return 0;
}
EXPORT_SYMBOL(cfs_expr_list_values);
+void
+cfs_expr_list_values_free(__u32 *values, int num)
+{
+ /* This array is allocated by LIBCFS_ALLOC(), so it shouldn't be freed
+ * by OBD_FREE() if it's called by module other than libcfs & LNet,
+ * otherwise we will see fake memory leak */
+ LIBCFS_FREE(values, num * sizeof(values[0]));
+}
+EXPORT_SYMBOL(cfs_expr_list_values_free);
+
/**
* Frees cfs_range_expr structures of \a expr_list.
*
if (cptab->ctb_cpu2cpt != NULL) {
LIBCFS_FREE(cptab->ctb_cpu2cpt,
- num_possible_cpus() *
- sizeof(cptab->ctb_cpu2cpt[0]));
+ nr_cpu_ids * sizeof(cptab->ctb_cpu2cpt[0]));
+ }
+
+ if (cptab->ctb_node2cpt != NULL) {
+ LIBCFS_FREE(cptab->ctb_node2cpt,
+ nr_node_ids * sizeof(cptab->ctb_node2cpt[0]));
}
for (i = 0; cptab->ctb_parts != NULL && i < cptab->ctb_nparts; i++) {
if (part->cpt_cpumask != NULL)
LIBCFS_FREE(part->cpt_cpumask, cpumask_size());
+
+ if (part->cpt_distance) {
+ LIBCFS_FREE(part->cpt_distance,
+ cptab->ctb_nparts *
+ sizeof(part->cpt_distance[0]));
+ }
}
if (cptab->ctb_parts != NULL) {
goto failed;
LIBCFS_ALLOC(cptab->ctb_cpu2cpt,
- num_possible_cpus() * sizeof(cptab->ctb_cpu2cpt[0]));
+ nr_cpu_ids * sizeof(cptab->ctb_cpu2cpt[0]));
if (cptab->ctb_cpu2cpt == NULL)
goto failed;
memset(cptab->ctb_cpu2cpt, -1,
- num_possible_cpus() * sizeof(cptab->ctb_cpu2cpt[0]));
+ nr_cpu_ids * sizeof(cptab->ctb_cpu2cpt[0]));
+
+ LIBCFS_ALLOC(cptab->ctb_node2cpt,
+ nr_node_ids * sizeof(cptab->ctb_node2cpt[0]));
+ if (cptab->ctb_node2cpt == NULL)
+ goto failed;
+
+ memset(cptab->ctb_node2cpt, -1,
+ nr_node_ids * sizeof(cptab->ctb_node2cpt[0]));
LIBCFS_ALLOC(cptab->ctb_parts, ncpt * sizeof(cptab->ctb_parts[0]));
if (cptab->ctb_parts == NULL)
struct cfs_cpu_partition *part = &cptab->ctb_parts[i];
LIBCFS_ALLOC(part->cpt_cpumask, cpumask_size());
+ if (!part->cpt_cpumask)
+ goto failed;
+
LIBCFS_ALLOC(part->cpt_nodemask, sizeof(*part->cpt_nodemask));
- if (part->cpt_cpumask == NULL || part->cpt_nodemask == NULL)
+ if (!part->cpt_nodemask)
+ goto failed;
+
+ LIBCFS_ALLOC(part->cpt_distance,
+ cptab->ctb_nparts * sizeof(part->cpt_distance[0]));
+ if (!part->cpt_distance)
goto failed;
}
cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len)
{
char *tmp = buf;
- int rc = 0;
+ int rc = -EFBIG;
int i;
int j;
for (i = 0; i < cptab->ctb_nparts; i++) {
- if (len > 0) {
- rc = snprintf(tmp, len, "%d\t: ", i);
- len -= rc;
- }
+ if (len <= 0)
+ goto out;
- if (len <= 0) {
- rc = -EFBIG;
+ rc = snprintf(tmp, len, "%d\t:", i);
+ len -= rc;
+
+ if (len <= 0)
goto out;
- }
tmp += rc;
for_each_cpu(j, cptab->ctb_parts[i].cpt_cpumask) {
- rc = snprintf(tmp, len, "%d ", j);
+ rc = snprintf(tmp, len, " %d", j);
len -= rc;
- if (len <= 0) {
- rc = -EFBIG;
+ if (len <= 0)
goto out;
- }
tmp += rc;
}
tmp++;
len--;
}
-
-out:
+ rc = 0;
+ out:
if (rc < 0)
return rc;
EXPORT_SYMBOL(cfs_cpt_table_print);
int
+cfs_cpt_distance_print(struct cfs_cpt_table *cptab, char *buf, int len)
+{
+ char *tmp = buf;
+ int rc = -EFBIG;
+ int i;
+ int j;
+
+ for (i = 0; i < cptab->ctb_nparts; i++) {
+ if (len <= 0)
+ goto out;
+
+ rc = snprintf(tmp, len, "%d\t:", i);
+ len -= rc;
+
+ if (len <= 0)
+ goto out;
+
+ tmp += rc;
+ for (j = 0; j < cptab->ctb_nparts; j++) {
+ rc = snprintf(tmp, len, " %d:%d",
+ j, cptab->ctb_parts[i].cpt_distance[j]);
+ len -= rc;
+ if (len <= 0)
+ goto out;
+ tmp += rc;
+ }
+
+ *tmp = '\n';
+ tmp++;
+ len--;
+ }
+ rc = 0;
+ out:
+ if (rc < 0)
+ return rc;
+
+ return tmp - buf;
+}
+EXPORT_SYMBOL(cfs_cpt_distance_print);
+
+int
cfs_cpt_number(struct cfs_cpt_table *cptab)
{
return cptab->ctb_nparts;
}
EXPORT_SYMBOL(cfs_cpt_nodemask);
+unsigned
+cfs_cpt_distance(struct cfs_cpt_table *cptab, int cpt1, int cpt2)
+{
+ LASSERT(cpt1 == CFS_CPT_ANY || (cpt1 >= 0 && cpt1 < cptab->ctb_nparts));
+ LASSERT(cpt2 == CFS_CPT_ANY || (cpt2 >= 0 && cpt2 < cptab->ctb_nparts));
+
+ if (cpt1 == CFS_CPT_ANY || cpt2 == CFS_CPT_ANY)
+ return cptab->ctb_distance;
+
+ return cptab->ctb_parts[cpt1].cpt_distance[cpt2];
+}
+EXPORT_SYMBOL(cfs_cpt_distance);
+
+/*
+ * Calculate the maximum NUMA distance between all nodes in the
+ * from_mask and all nodes in the to_mask.
+ */
+static unsigned
+cfs_cpt_distance_calculate(nodemask_t *from_mask, nodemask_t *to_mask)
+{
+ unsigned maximum;
+ unsigned distance;
+ int to;
+ int from;
+
+ maximum = 0;
+ for_each_node_mask(from, *from_mask) {
+ for_each_node_mask(to, *to_mask) {
+ distance = node_distance(from, to);
+ if (maximum < distance)
+ maximum = distance;
+ }
+ }
+ return maximum;
+}
+
+static void cfs_cpt_add_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
+{
+ cptab->ctb_cpu2cpt[cpu] = cpt;
+
+ cpumask_set_cpu(cpu, cptab->ctb_cpumask);
+ cpumask_set_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask);
+}
+
+static void cfs_cpt_del_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
+{
+ cpumask_clear_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask);
+ cpumask_clear_cpu(cpu, cptab->ctb_cpumask);
+
+ cptab->ctb_cpu2cpt[cpu] = -1;
+}
+
+static void cfs_cpt_add_node(struct cfs_cpt_table *cptab, int cpt, int node)
+{
+ int cpt2;
+ struct cfs_cpu_partition *part;
+ struct cfs_cpu_partition *part2;
+
+ if (!node_isset(node, *cptab->ctb_nodemask)) {
+ /* first time node is added to the CPT table */
+ node_set(node, *cptab->ctb_nodemask);
+ cptab->ctb_node2cpt[node] = cpt;
+ cptab->ctb_distance = cfs_cpt_distance_calculate(
+ cptab->ctb_nodemask,
+ cptab->ctb_nodemask);
+ }
+
+ part = &cptab->ctb_parts[cpt];
+ if (!node_isset(node, *part->cpt_nodemask)) {
+ /* first time node is added to this CPT */
+ node_set(node, *part->cpt_nodemask);
+ for (cpt2 = 0; cpt2 < cptab->ctb_nparts; cpt2++) {
+ part2 = &cptab->ctb_parts[cpt2];
+ part->cpt_distance[cpt2] = cfs_cpt_distance_calculate(
+ part->cpt_nodemask,
+ part2->cpt_nodemask);
+ part2->cpt_distance[cpt] = cfs_cpt_distance_calculate(
+ part2->cpt_nodemask,
+ part->cpt_nodemask);
+ }
+ }
+}
+
+static void cfs_cpt_del_node(struct cfs_cpt_table *cptab, int cpt, int node)
+{
+ int cpu;
+ int cpt2;
+ struct cfs_cpu_partition *part;
+ struct cfs_cpu_partition *part2;
+
+ part = &cptab->ctb_parts[cpt];
+
+ for_each_cpu(cpu, part->cpt_cpumask) {
+ /* this CPT has other CPU belonging to this node? */
+ if (cpu_to_node(cpu) == node)
+ break;
+ }
+
+ if (cpu >= nr_cpu_ids && node_isset(node, *part->cpt_nodemask)) {
+ /* No more CPUs in the node for this CPT. */
+ node_clear(node, *part->cpt_nodemask);
+ for (cpt2 = 0; cpt2 < cptab->ctb_nparts; cpt2++) {
+ part2 = &cptab->ctb_parts[cpt2];
+ if (node_isset(node, *part2->cpt_nodemask))
+ cptab->ctb_node2cpt[node] = cpt2;
+ part->cpt_distance[cpt2] = cfs_cpt_distance_calculate(
+ part->cpt_nodemask,
+ part2->cpt_nodemask);
+ part2->cpt_distance[cpt] = cfs_cpt_distance_calculate(
+ part2->cpt_nodemask,
+ part->cpt_nodemask);
+ }
+ }
+
+ for_each_cpu(cpu, cptab->ctb_cpumask) {
+ /* this CPT-table has other CPUs belonging to this node? */
+ if (cpu_to_node(cpu) == node)
+ break;
+ }
+
+ if (cpu >= nr_cpu_ids && node_isset(node, *cptab->ctb_nodemask)) {
+ /* No more CPUs in the table for this node. */
+ node_clear(node, *cptab->ctb_nodemask);
+ cptab->ctb_node2cpt[node] = -1;
+ cptab->ctb_distance =
+ cfs_cpt_distance_calculate(cptab->ctb_nodemask,
+ cptab->ctb_nodemask);
+ }
+}
+
int
cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
{
- int node;
-
LASSERT(cpt >= 0 && cpt < cptab->ctb_nparts);
if (cpu < 0 || cpu >= nr_cpu_ids || !cpu_online(cpu)) {
return 0;
}
- cptab->ctb_cpu2cpt[cpu] = cpt;
-
LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_cpumask));
LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask));
- cpumask_set_cpu(cpu, cptab->ctb_cpumask);
- cpumask_set_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask);
-
- node = cpu_to_node(cpu);
-
- /* first CPU of @node in this CPT table */
- if (!node_isset(node, *cptab->ctb_nodemask))
- node_set(node, *cptab->ctb_nodemask);
-
- /* first CPU of @node in this partition */
- if (!node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask))
- node_set(node, *cptab->ctb_parts[cpt].cpt_nodemask);
+ cfs_cpt_add_cpu(cptab, cpt, cpu);
+ cfs_cpt_add_node(cptab, cpt, cpu_to_node(cpu));
return 1;
}
void
cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
{
- int node;
- int i;
-
LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
if (cpu < 0 || cpu >= nr_cpu_ids) {
LASSERT(cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask));
LASSERT(cpumask_test_cpu(cpu, cptab->ctb_cpumask));
- cpumask_clear_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask);
- cpumask_clear_cpu(cpu, cptab->ctb_cpumask);
- cptab->ctb_cpu2cpt[cpu] = -1;
-
- node = cpu_to_node(cpu);
-
- LASSERT(node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask));
- LASSERT(node_isset(node, *cptab->ctb_nodemask));
-
- for_each_cpu(i, cptab->ctb_parts[cpt].cpt_cpumask) {
- /* this CPT has other CPU belonging to this node? */
- if (cpu_to_node(i) == node)
- break;
- }
-
- if (i >= nr_cpu_ids)
- node_clear(node, *cptab->ctb_parts[cpt].cpt_nodemask);
-
- for_each_cpu(i, cptab->ctb_cpumask) {
- /* this CPT-table has other CPU belonging to this node? */
- if (cpu_to_node(i) == node)
- break;
- }
-
- if (i >= nr_cpu_ids)
- node_clear(node, *cptab->ctb_nodemask);
+ cfs_cpt_del_cpu(cptab, cpt, cpu);
+ cfs_cpt_del_node(cptab, cpt, cpu_to_node(cpu));
}
EXPORT_SYMBOL(cfs_cpt_unset_cpu);
}
for_each_cpu(cpu, mask) {
- if (!cfs_cpt_set_cpu(cptab, cpt, cpu))
- return 0;
+ cfs_cpt_add_cpu(cptab, cpt, cpu);
+ cfs_cpt_add_node(cptab, cpt, cpu_to_node(cpu));
}
return 1;
cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node)
{
const cpumask_t *mask;
- int rc;
+ int cpu;
- if (node < 0 || node >= MAX_NUMNODES) {
+ if (node < 0 || node >= nr_node_ids) {
CDEBUG(D_INFO,
"Invalid NUMA id %d for CPU partition %d\n", node, cpt);
return 0;
}
mask = cpumask_of_node(node);
- rc = cfs_cpt_set_cpumask(cptab, cpt, mask);
- return rc;
+ for_each_cpu(cpu, mask)
+ cfs_cpt_add_cpu(cptab, cpt, cpu);
+
+ cfs_cpt_add_node(cptab, cpt, node);
+
+ return 1;
}
EXPORT_SYMBOL(cfs_cpt_set_node);
cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node)
{
const cpumask_t *mask;
+ int cpu;
- if (node < 0 || node >= MAX_NUMNODES) {
+ if (node < 0 || node >= nr_node_ids) {
CDEBUG(D_INFO,
"Invalid NUMA id %d for CPU partition %d\n", node, cpt);
return;
}
mask = cpumask_of_node(node);
- cfs_cpt_unset_cpumask(cptab, cpt, mask);
+ for_each_cpu(cpu, mask)
+ cfs_cpt_del_cpu(cptab, cpt, cpu);
+
+ cfs_cpt_del_node(cptab, cpt, node);
}
EXPORT_SYMBOL(cfs_cpt_unset_node);
EXPORT_SYMBOL(cfs_cpt_of_cpu);
int
+cfs_cpt_of_node(struct cfs_cpt_table *cptab, int node)
+{
+ if (node < 0 || node > nr_node_ids)
+ return CFS_CPT_ANY;
+
+ return cptab->ctb_node2cpt[node];
+}
+EXPORT_SYMBOL(cfs_cpt_of_node);
+
+int
cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt)
{
cpumask_t *cpumask;
return cptab;
}
- high = node ? MAX_NUMNODES - 1 : nr_cpu_ids - 1;
+ high = node ? nr_node_ids - 1 : nr_cpu_ids - 1;
for (str = cfs_trimwhite(pattern), c = 0;; c++) {
struct cfs_range_expr *range;
__proc_cpt_table);
}
+static int __proc_cpt_distance(void *data, int write,
+ loff_t pos, void __user *buffer, int nob)
+{
+ char *buf = NULL;
+ int len = 4096;
+ int rc = 0;
+
+ if (write)
+ return -EPERM;
+
+ LASSERT(cfs_cpt_table != NULL);
+
+ while (1) {
+ LIBCFS_ALLOC(buf, len);
+ if (buf == NULL)
+ return -ENOMEM;
+
+ rc = cfs_cpt_distance_print(cfs_cpt_table, buf, len);
+ if (rc >= 0)
+ break;
+
+ if (rc == -EFBIG) {
+ LIBCFS_FREE(buf, len);
+ len <<= 1;
+ continue;
+ }
+ goto out;
+ }
+
+ if (pos >= rc) {
+ rc = 0;
+ goto out;
+ }
+
+ rc = cfs_trace_copyout_string(buffer, nob, buf + pos, NULL);
+ out:
+ if (buf != NULL)
+ LIBCFS_FREE(buf, len);
+ return rc;
+}
+
+static int
+proc_cpt_distance(struct ctl_table *table, int write, void __user *buffer,
+ size_t *lenp, loff_t *ppos)
+{
+ return lprocfs_call_handler(table->data, write, ppos, buffer, lenp,
+ __proc_cpt_distance);
+}
+
static struct ctl_table lnet_table[] = {
/*
* NB No .strategy entries have been provided since sysctl(8) prefers
},
{
INIT_CTL_NAME
+ .procname = "cpu_partition_distance",
+ .maxlen = 128,
+ .mode = 0444,
+ .proc_handler = &proc_cpt_distance,
+ },
+ {
+ INIT_CTL_NAME
.procname = "debug_log_upcall",
.data = lnet_debug_log_upcall,
.maxlen = sizeof(lnet_debug_log_upcall),
}
/**
+ * Convert express list (\a expr_list) to an array of all matched values
+ *
+ * \retval N N is total number of all matched values
+ * \retval 0 if expression list is empty
+ * \retval < 0 for failure
+ */
+int
+cfs_expr_list_values(struct cfs_expr_list *expr_list, int max, __u32 **valpp)
+{
+ struct cfs_range_expr *expr;
+ __u32 *val;
+ int count = 0;
+ int i;
+
+ list_for_each_entry(expr, &expr_list->el_exprs, re_link) {
+ for (i = expr->re_lo; i <= expr->re_hi; i++) {
+ if (((i - expr->re_lo) % expr->re_stride) == 0)
+ count++;
+ }
+ }
+
+ if (count == 0) /* empty expression list */
+ return 0;
+
+ if (count > max)
+ return -EINVAL;
+
+ val = calloc(sizeof(val[0]), count);
+ if (val == NULL)
+ return -ENOMEM;
+
+ count = 0;
+ list_for_each_entry(expr, &expr_list->el_exprs, re_link) {
+ for (i = expr->re_lo; i <= expr->re_hi; i++) {
+ if (((i - expr->re_lo) % expr->re_stride) == 0)
+ val[count++] = i;
+ }
+ }
+
+ *valpp = val;
+ return count;
+}
+
+void
+cfs_expr_list_values_free(__u32 *values, int num)
+{
+ /* This array is allocated by LIBCFS_ALLOC(), so it shouldn't be freed
+ * by OBD_FREE() if it's called by module other than libcfs & LNet,
+ * otherwise we will see fake memory leak */
+ free(values);
+}
+
+/**
* Frees cfs_range_expr structures of \a expr_list.
*
* \retval none
*/
-static void
+void
cfs_expr_list_free(struct cfs_expr_list *expr_list)
{
while (!list_empty(&expr_list->el_exprs)) {
int LNetGetId(unsigned int index, lnet_process_id_t *id);
int LNetDist(lnet_nid_t nid, lnet_nid_t *srcnid, __u32 *order);
void LNetSnprintHandle(char *str, int str_len, lnet_handle_any_t handle);
+lnet_nid_t LNetPrimaryNID(lnet_nid_t nid);
/** @} lnet_addr */
#define LNET_MAX_SHOW_NUM_CPT 128
#define LNET_UNDEFINED_HOPS ((__u32) -1)
+/*
+ * To allow for future enhancements to extend the tunables
+ * add a hdr to this structure, so that the version can be set
+ * and checked for backwards compatibility. Newer versions of LNet
+ * can still work with older versions of lnetctl. The restriction is
+ * that the structure can be added to and not removed from in order
+ * to not invalidate older lnetctl utilities. Moreover, the order of
+ * fields must remain the same, and new fields appended to the structure
+ *
+ * That said all existing LND tunables will be added in this structure
+ * to avoid future changes.
+ */
struct lnet_ioctl_config_lnd_cmn_tunables {
__u32 lct_version;
- __u32 lct_peer_timeout;
- __u32 lct_peer_tx_credits;
- __u32 lct_peer_rtr_credits;
- __u32 lct_max_tx_credits;
+ __s32 lct_peer_timeout;
+ __s32 lct_peer_tx_credits;
+ __s32 lct_peer_rtr_credits;
+ __s32 lct_max_tx_credits;
};
struct lnet_ioctl_config_o2iblnd_tunables {
__u32 pad;
};
+struct lnet_lnd_tunables {
+ union {
+ struct lnet_ioctl_config_o2iblnd_tunables lnd_o2ib;
+ } lnd_tun_u;
+};
+
struct lnet_ioctl_config_lnd_tunables {
struct lnet_ioctl_config_lnd_cmn_tunables lt_cmn;
- union {
- struct lnet_ioctl_config_o2iblnd_tunables lt_o2ib;
- } lt_tun_u;
+ struct lnet_lnd_tunables lt_tun;
};
struct lnet_ioctl_net_config {
/* # different router buffer pools */
#define LNET_NRBPOOLS (LNET_LARGE_BUF_IDX + 1)
+enum lnet_dbg_task {
+ LNET_DBG_INCR_DLC_SEQ = 0
+};
+
struct lnet_ioctl_pool_cfg {
struct {
__u32 pl_npages;
char cfg_bulk[0];
};
+struct lnet_ioctl_element_stats {
+ __u32 send_count;
+ __u32 recv_count;
+ __u32 drop_count;
+};
+
+/*
+ * lnet_ioctl_config_ni
+ * This structure describes an NI configuration. There are multiple components
+ * when configuring an NI: Net, Interfaces, CPT list and LND tunables
+ * A network is passed as a string to the DLC and translated using
+ * libcfs_str2net()
+ * An interface is the name of the system configured interface
+ * (ex eth0, ib1)
+ * CPT is the list of CPTS LND tunables are passed in the lic_bulk area
+ */
+struct lnet_ioctl_config_ni {
+ struct libcfs_ioctl_hdr lic_cfg_hdr;
+ lnet_nid_t lic_nid;
+ char lic_ni_intf[LNET_MAX_INTERFACES][LNET_MAX_STR_LEN];
+ char lic_legacy_ip2nets[LNET_MAX_STR_LEN];
+ __u32 lic_cpts[LNET_MAX_SHOW_NUM_CPT];
+ __u32 lic_ncpts;
+ __u32 lic_status;
+ __u32 lic_tcp_bonding;
+ __u32 lic_idx;
+ __s32 lic_dev_cpt;
+ char pad[4];
+ char lic_bulk[0];
+};
+
+struct lnet_peer_ni_credit_info {
+ char cr_aliveness[LNET_MAX_STR_LEN];
+ __u32 cr_refcount;
+ __s32 cr_ni_peer_tx_credits;
+ __s32 cr_peer_tx_credits;
+ __s32 cr_peer_min_tx_credits;
+ __u32 cr_peer_tx_qnob;
+ __s32 cr_peer_rtr_credits;
+ __s32 cr_peer_min_rtr_credits;
+ __u32 cr_ncpt;
+};
+
struct lnet_ioctl_peer {
struct libcfs_ioctl_hdr pr_hdr;
__u32 pr_count;
__u32 pr_pad;
- __u64 pr_nid;
+ lnet_nid_t pr_nid;
union {
- struct {
- char cr_aliveness[LNET_MAX_STR_LEN];
- __u32 cr_refcount;
- __u32 cr_ni_peer_tx_credits;
- __u32 cr_peer_tx_credits;
- __u32 cr_peer_rtr_credits;
- __u32 cr_peer_min_rtr_credits;
- __u32 cr_peer_tx_qnob;
- __u32 cr_ncpt;
- } pr_peer_credits;
+ struct lnet_peer_ni_credit_info pr_peer_credits;
} pr_lnd_u;
};
+struct lnet_dbg_task_info {
+ /*
+ * TODO: a union can be added if the task requires more
+ * information from user space to be carried out in kernel space.
+ */
+};
+
+/*
+ * This structure is intended to allow execution of debugging tasks. This
+ * is not intended to be backwards compatible. Extra tasks can be added in
+ * the future
+ */
+struct lnet_ioctl_dbg {
+ struct libcfs_ioctl_hdr dbg_hdr;
+ enum lnet_dbg_task dbg_task;
+ char dbg_bulk[0];
+};
+
+struct lnet_ioctl_peer_cfg {
+ struct libcfs_ioctl_hdr prcfg_hdr;
+ lnet_nid_t prcfg_prim_nid;
+ lnet_nid_t prcfg_cfg_nid;
+ __u32 prcfg_idx;
+ bool prcfg_mr;
+ char prcfg_bulk[0];
+};
+
+struct lnet_ioctl_numa_range {
+ struct libcfs_ioctl_hdr nr_hdr;
+ __u32 nr_range;
+};
+
struct lnet_ioctl_lnet_stats {
struct libcfs_ioctl_hdr st_hdr;
struct lnet_counters st_cntrs;
static inline int lnet_is_route_alive(lnet_route_t *route)
{
- if (!route->lr_gateway->lp_alive)
+ if (!route->lr_gateway->lpni_alive)
return 0; /* gateway is down */
- if ((route->lr_gateway->lp_ping_feats &
+ if ((route->lr_gateway->lpni_ping_feats &
LNET_PING_FEAT_NI_STATUS) == 0)
return 1; /* no NI status, assume it's alive */
/* has NI status, check # down NIs */
kmem_cache_free(lnet_mes_cachep, me);
}
-static inline lnet_msg_t *
-lnet_msg_alloc(void)
-{
- lnet_msg_t *msg;
-
- LIBCFS_ALLOC(msg, sizeof(*msg));
-
- /* no need to zero, LIBCFS_ALLOC does for us */
- return (msg);
-}
-
-static inline void
-lnet_msg_free(lnet_msg_t *msg)
-{
- LASSERT(!msg->msg_onactivelist);
- LIBCFS_FREE(msg, sizeof(*msg));
-}
-
lnet_libhandle_t *lnet_res_lh_lookup(struct lnet_res_container *rec,
__u64 cookie);
void lnet_res_lh_initialize(struct lnet_res_container *rec,
}
static inline void
-lnet_peer_addref_locked(lnet_peer_t *lp)
+lnet_peer_ni_addref_locked(struct lnet_peer_ni *lp)
{
- LASSERT(lp->lp_refcount > 0);
- lp->lp_refcount++;
+ LASSERT (atomic_read(&lp->lpni_refcount) > 0);
+ atomic_inc(&lp->lpni_refcount);
}
-extern void lnet_destroy_peer_locked(lnet_peer_t *lp);
+extern void lnet_destroy_peer_ni_locked(struct lnet_peer_ni *lp);
static inline void
-lnet_peer_decref_locked(lnet_peer_t *lp)
+lnet_peer_ni_decref_locked(struct lnet_peer_ni *lp)
{
- LASSERT(lp->lp_refcount > 0);
- lp->lp_refcount--;
- if (lp->lp_refcount == 0)
- lnet_destroy_peer_locked(lp);
+ LASSERT (atomic_read(&lp->lpni_refcount) > 0);
+ atomic_dec(&lp->lpni_refcount);
+ if (atomic_read(&lp->lpni_refcount) == 0)
+ lnet_destroy_peer_ni_locked(lp);
}
static inline int
-lnet_isrouter(lnet_peer_t *lp)
+lnet_isrouter(struct lnet_peer_ni *lp)
{
- return lp->lp_rtr_refcount != 0;
+ return lp->lpni_rtr_refcount != 0;
}
static inline void
lnet_net_unlock(0);
}
-void lnet_ni_free(lnet_ni_t *ni);
-lnet_ni_t *
-lnet_ni_alloc(__u32 net, struct cfs_expr_list *el, struct list_head *nilist);
+static inline lnet_msg_t *
+lnet_msg_alloc(void)
+{
+ lnet_msg_t *msg;
+
+ LIBCFS_ALLOC(msg, sizeof(*msg));
+
+ /* no need to zero, LIBCFS_ALLOC does for us */
+ return (msg);
+}
+
+static inline void
+lnet_msg_free(lnet_msg_t *msg)
+{
+ LASSERT(!msg->msg_onactivelist);
+ LIBCFS_FREE(msg, sizeof(*msg));
+}
+
+void lnet_ni_free(struct lnet_ni *ni);
+void lnet_net_free(struct lnet_net *net);
+
+struct lnet_net *
+lnet_net_alloc(__u32 net_type, struct list_head *netlist);
+
+struct lnet_ni *
+lnet_ni_alloc(struct lnet_net *net, struct cfs_expr_list *el,
+ char *iface);
+struct lnet_ni *
+lnet_ni_alloc_w_cpt_array(struct lnet_net *net, __u32 *cpts, __u32 ncpts,
+ char *iface);
static inline int
lnet_nid2peerhash(lnet_nid_t nid)
extern lnd_t the_lolnd;
extern int avoid_asym_router_failure;
-extern int lnet_cpt_of_nid_locked(lnet_nid_t nid);
-extern int lnet_cpt_of_nid(lnet_nid_t nid);
+extern unsigned int lnet_nid_cpt_hash(lnet_nid_t nid, unsigned int number);
+extern int lnet_cpt_of_nid_locked(lnet_nid_t nid, struct lnet_ni *ni);
+extern int lnet_cpt_of_nid(lnet_nid_t nid, struct lnet_ni *ni);
extern lnet_ni_t *lnet_nid2ni_locked(lnet_nid_t nid, int cpt);
+extern lnet_ni_t *lnet_nid2ni_addref(lnet_nid_t nid);
extern lnet_ni_t *lnet_net2ni_locked(__u32 net, int cpt);
-extern lnet_ni_t *lnet_net2ni(__u32 net);
+extern lnet_ni_t *lnet_net2ni_addref(__u32 net);
+bool lnet_is_ni_healthy_locked(struct lnet_ni *ni);
+struct lnet_net *lnet_get_net_locked(__u32 net_id);
int lnet_lib_init(void);
void lnet_lib_exit(void);
+extern unsigned int lnet_numa_range;
extern int portal_rotor;
int lnet_notify(lnet_ni_t *ni, lnet_nid_t peer, int alive, cfs_time_t when);
-void lnet_notify_locked(lnet_peer_t *lp, int notifylnd, int alive, cfs_time_t when);
+void lnet_notify_locked(struct lnet_peer_ni *lp, int notifylnd, int alive,
+ cfs_time_t when);
int lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway_nid,
unsigned int priority);
int lnet_check_routes(void);
int lnet_get_route(int idx, __u32 *net, __u32 *hops,
lnet_nid_t *gateway, __u32 *alive, __u32 *priority);
int lnet_get_rtr_pool_cfg(int idx, struct lnet_ioctl_pool_cfg *pool_cfg);
+struct lnet_ni *lnet_get_next_ni_locked(struct lnet_net *mynet,
+ struct lnet_ni *prev);
+struct lnet_ni *lnet_get_ni_idx_locked(int idx);
struct libcfs_ioctl_handler {
struct list_head item;
int lnet_rtrpools_enable(void);
void lnet_rtrpools_disable(void);
void lnet_rtrpools_free(int keep_pools);
-lnet_remotenet_t *lnet_find_net_locked (__u32 net);
-int lnet_dyn_add_ni(lnet_pid_t requested_pid,
- struct lnet_ioctl_config_data *conf);
-int lnet_dyn_del_ni(__u32 net);
+lnet_remotenet_t *lnet_find_rnet_locked(__u32 net);
+int lnet_dyn_add_net(struct lnet_ioctl_config_data *conf);
+int lnet_dyn_del_net(__u32 net);
+int lnet_dyn_add_ni(struct lnet_ioctl_config_ni *conf);
+int lnet_dyn_del_ni(struct lnet_ioctl_config_ni *conf);
int lnet_clear_lazy_portal(struct lnet_ni *ni, int portal, char *reason);
+struct lnet_net *lnet_get_net_locked(__u32 net_id);
int lnet_islocalnid(lnet_nid_t nid);
int lnet_islocalnet(__u32 net);
void lnet_md_unlink(lnet_libmd_t *md);
void lnet_md_deconstruct(lnet_libmd_t *lmd, lnet_md_t *umd);
+int lnet_cpt_of_md(lnet_libmd_t *md);
void lnet_register_lnd(lnd_t *lnd);
void lnet_unregister_lnd(lnd_t *lnd);
int lnet_connect(struct socket **sockp, lnet_nid_t peer_nid,
__u32 local_ip, __u32 peer_ip, int peer_port);
void lnet_connect_console_error(int rc, lnet_nid_t peer_nid,
- __u32 peer_ip, int port);
-int lnet_count_acceptor_nis(void);
+ __u32 peer_ip, int port);
+int lnet_count_acceptor_nets(void);
int lnet_acceptor_timeout(void);
int lnet_acceptor_port(void);
int lnet_acceptor_start(void);
__u32 peer_ip, int peer_port);
int lnet_peers_start_down(void);
-int lnet_peer_buffer_credits(lnet_ni_t *ni);
+int lnet_peer_buffer_credits(struct lnet_net *net);
int lnet_router_checker_start(void);
void lnet_router_checker_stop(void);
-void lnet_router_ni_update_locked(lnet_peer_t *gw, __u32 net);
+void lnet_router_ni_update_locked(struct lnet_peer_ni *gw, __u32 net);
void lnet_swap_pinginfo(struct lnet_ping_info *info);
int lnet_parse_ip2nets(char **networksp, char *ip2nets);
int lnet_parse_routes(char *route_str, int *im_a_router);
-int lnet_parse_networks(struct list_head *nilist, char *networks);
-int lnet_net_unique(__u32 net, struct list_head *nilist);
-
-int lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid, int cpt);
-lnet_peer_t *lnet_find_peer_locked(struct lnet_peer_table *ptable,
- lnet_nid_t nid);
-void lnet_peer_tables_cleanup(lnet_ni_t *ni);
-void lnet_peer_tables_destroy(void);
+int lnet_parse_networks(struct list_head *nilist, char *networks,
+ bool use_tcp_bonding);
+bool lnet_net_unique(__u32 net_id, struct list_head *nilist,
+ struct lnet_net **net);
+bool lnet_ni_unique_net(struct list_head *nilist, char *iface);
+void lnet_incr_dlc_seq(void);
+__u32 lnet_get_dlc_seq_locked(void);
+
+struct lnet_peer_ni *lnet_get_next_peer_ni_locked(struct lnet_peer *peer,
+ struct lnet_peer_net *peer_net,
+ struct lnet_peer_ni *prev);
+struct lnet_peer *lnet_find_or_create_peer_locked(lnet_nid_t dst_nid, int cpt);
+struct lnet_peer_ni *lnet_nid2peerni_locked(lnet_nid_t nid, int cpt);
+struct lnet_peer_ni *lnet_nid2peerni_ex(lnet_nid_t nid, int cpt);
+struct lnet_peer_ni *lnet_find_peer_ni_locked(lnet_nid_t nid);
+void lnet_peer_net_added(struct lnet_net *net);
+lnet_nid_t lnet_peer_primary_nid(lnet_nid_t nid);
+void lnet_peer_tables_cleanup(struct lnet_net *net);
+void lnet_peer_uninit(void);
int lnet_peer_tables_create(void);
void lnet_debug_peer(lnet_nid_t nid);
-int lnet_get_peer_info(__u32 peer_index, __u64 *nid,
- char alivness[LNET_MAX_STR_LEN],
- __u32 *cpt_iter, __u32 *refcount,
- __u32 *ni_peer_tx_credits, __u32 *peer_tx_credits,
- __u32 *peer_rtr_credits, __u32 *peer_min_rtr_credtis,
- __u32 *peer_tx_qnob);
+struct lnet_peer_net *lnet_peer_get_net_locked(struct lnet_peer *peer,
+ __u32 net_id);
+bool lnet_peer_is_ni_pref_locked(struct lnet_peer_ni *lpni,
+ struct lnet_ni *ni);
+int lnet_add_peer_ni_to_peer(lnet_nid_t key_nid, lnet_nid_t nid, bool mr);
+int lnet_del_peer_ni_from_peer(lnet_nid_t key_nid, lnet_nid_t nid);
+int lnet_get_peer_info(__u32 idx, lnet_nid_t *primary_nid, lnet_nid_t *nid,
+ bool *mr, struct lnet_peer_ni_credit_info *peer_ni_info,
+ struct lnet_ioctl_element_stats *peer_ni_stats);
+int lnet_get_peer_ni_info(__u32 peer_index, __u64 *nid,
+ char alivness[LNET_MAX_STR_LEN],
+ __u32 *cpt_iter, __u32 *refcount,
+ __u32 *ni_peer_tx_credits, __u32 *peer_tx_credits,
+ __u32 *peer_rtr_credits, __u32 *peer_min_rtr_credtis,
+ __u32 *peer_tx_qnob);
+
+
+static inline __u32
+lnet_get_num_peer_nis(struct lnet_peer *peer)
+{
+ struct lnet_peer_net *lpn;
+ struct lnet_peer_ni *lpni;
+ __u32 count = 0;
+
+ list_for_each_entry(lpn, &peer->lp_peer_nets, lpn_on_peer_list)
+ list_for_each_entry(lpni, &lpn->lpn_peer_nis,
+ lpni_on_peer_net_list)
+ count++;
+
+ return count;
+}
+
+static inline bool
+lnet_is_peer_ni_healthy_locked(struct lnet_peer_ni *lpni)
+{
+ return lpni->lpni_healthy;
+}
+
+static inline void
+lnet_set_peer_ni_health_locked(struct lnet_peer_ni *lpni, bool health)
+{
+ lpni->lpni_healthy = health;
+}
+
+static inline bool
+lnet_is_peer_net_healthy_locked(struct lnet_peer_net *peer_net)
+{
+ struct lnet_peer_ni *lpni;
+
+ list_for_each_entry(lpni, &peer_net->lpn_peer_nis,
+ lpni_on_peer_net_list) {
+ if (lnet_is_peer_ni_healthy_locked(lpni))
+ return true;
+ }
+
+ return false;
+}
+
+static inline bool
+lnet_is_peer_healthy_locked(struct lnet_peer *peer)
+{
+ struct lnet_peer_net *peer_net;
+
+ list_for_each_entry(peer_net, &peer->lp_peer_nets, lpn_on_peer_list) {
+ if (lnet_is_peer_net_healthy_locked(peer_net))
+ return true;
+ }
+
+ return false;
+}
static inline void
-lnet_peer_set_alive(lnet_peer_t *lp)
+lnet_peer_set_alive(struct lnet_peer_ni *lp)
{
- lp->lp_last_alive = lp->lp_last_query = cfs_time_current();
- if (!lp->lp_alive)
- lnet_notify_locked(lp, 0, 1, lp->lp_last_alive);
+ lp->lpni_last_alive = lp->lpni_last_query = cfs_time_current();
+ if (!lp->lpni_alive)
+ lnet_notify_locked(lp, 0, 1, lp->lpni_last_alive);
}
#endif
struct list_head msg_list; /* Q for credits/MD */
lnet_process_id_t msg_target;
+ /* Primary NID of the source. */
+ lnet_nid_t msg_initiator;
/* where is it from, it's only for building event */
lnet_nid_t msg_from;
__u32 msg_type;
/* ready for pending on RX delay list */
unsigned int msg_rx_ready_delay:1;
- unsigned int msg_vmflush:1; /* VM trying to free memory */
- unsigned int msg_target_is_router:1; /* sending to a router */
- unsigned int msg_routing:1; /* being forwarded */
- unsigned int msg_ack:1; /* ack on finalize (PUT) */
- unsigned int msg_sending:1; /* outgoing message */
- unsigned int msg_receiving:1; /* being received */
- unsigned int msg_txcredit:1; /* taken an NI send credit */
- unsigned int msg_peertxcredit:1; /* taken a peer send credit */
- unsigned int msg_rtrcredit:1; /* taken a globel router credit */
- unsigned int msg_peerrtrcredit:1; /* taken a peer router credit */
- unsigned int msg_onactivelist:1; /* on the activelist */
+ unsigned int msg_vmflush:1; /* VM trying to free memory */
+ unsigned int msg_target_is_router:1; /* sending to a router */
+ unsigned int msg_routing:1; /* being forwarded */
+ unsigned int msg_ack:1; /* ack on finalize (PUT) */
+ unsigned int msg_sending:1; /* outgoing message */
+ unsigned int msg_receiving:1; /* being received */
+ unsigned int msg_txcredit:1; /* taken an NI send credit */
+ unsigned int msg_peertxcredit:1; /* taken a peer send credit */
+ unsigned int msg_rtrcredit:1; /* taken a globel router credit */
+ unsigned int msg_peerrtrcredit:1; /* taken a peer router credit */
+ unsigned int msg_onactivelist:1; /* on the activelist */
unsigned int msg_rdma_get:1;
- struct lnet_peer *msg_txpeer; /* peer I'm sending to */
- struct lnet_peer *msg_rxpeer; /* peer I received from */
+ struct lnet_peer_ni *msg_txpeer; /* peer I'm sending to */
+ struct lnet_peer_ni *msg_rxpeer; /* peer I received from */
- void *msg_private;
+ void *msg_private;
struct lnet_libmd *msg_md;
-
- unsigned int msg_len;
- unsigned int msg_wanted;
- unsigned int msg_offset;
- unsigned int msg_niov;
+ /* the NI the message was sent or received over */
+ struct lnet_ni *msg_txni;
+ struct lnet_ni *msg_rxni;
+
+ unsigned int msg_len;
+ unsigned int msg_wanted;
+ unsigned int msg_offset;
+ unsigned int msg_niov;
struct kvec *msg_iov;
- lnet_kiov_t *msg_kiov;
+ lnet_kiov_t *msg_kiov;
- lnet_event_t msg_ev;
- lnet_hdr_t msg_hdr;
+ lnet_event_t msg_ev;
+ lnet_hdr_t msg_hdr;
} lnet_msg_t;
unsigned int md_niov; /* # frags at end of struct */
void *md_user_ptr;
lnet_eq_t *md_eq;
+ lnet_handle_md_t md_bulk_handle;
union {
struct kvec iov[LNET_MAX_IOV];
lnet_kiov_t kiov[LNET_MAX_IOV];
struct list_head tq_delayed; /* delayed TXs */
};
+enum lnet_net_state {
+ /* set when net block is allocated */
+ LNET_NET_STATE_INIT = 0,
+ /* set when NIs in net are started successfully */
+ LNET_NET_STATE_ACTIVE,
+ /* set if all NIs in net are in FAILED state */
+ LNET_NET_STATE_INACTIVE,
+ /* set when shutting down a NET */
+ LNET_NET_STATE_DELETING
+};
+
+enum lnet_ni_state {
+ /* set when NI block is allocated */
+ LNET_NI_STATE_INIT = 0,
+ /* set when NI is started successfully */
+ LNET_NI_STATE_ACTIVE,
+ /* set when LND notifies NI failed */
+ LNET_NI_STATE_FAILED,
+ /* set when LND notifies NI degraded */
+ LNET_NI_STATE_DEGRADED,
+ /* set when shuttding down NI */
+ LNET_NI_STATE_DELETING
+};
+
+struct lnet_element_stats {
+ atomic_t send_count;
+ atomic_t recv_count;
+ atomic_t drop_count;
+};
+
+struct lnet_net {
+ /* chain on the ln_nets */
+ struct list_head net_list;
+
+ /* net ID, which is composed of
+ * (net_type << 16) | net_num.
+ * net_type can be one of the enumerated types defined in
+ * lnet/include/lnet/nidstr.h */
+ __u32 net_id;
+
+ /* priority of the network */
+ __u32 net_prio;
+
+ /* total number of CPTs in the array */
+ __u32 net_ncpts;
+
+ /* cumulative CPTs of all NIs in this net */
+ __u32 *net_cpts;
+
+ /* network tunables */
+ struct lnet_ioctl_config_lnd_cmn_tunables net_tunables;
+
+ /*
+ * boolean to indicate that the tunables have been set and
+ * shouldn't be reset
+ */
+ bool net_tunables_set;
+
+ /* procedural interface */
+ lnd_t *net_lnd;
+
+ /* list of NIs on this net */
+ struct list_head net_ni_list;
+
+ /* list of NIs being added, but not started yet */
+ struct list_head net_ni_added;
+
+ /* dying LND instances */
+ struct list_head net_ni_zombie;
+
+ /* network state */
+ enum lnet_net_state net_state;
+};
+
typedef struct lnet_ni {
+ /* chain on the lnet_net structure */
+ struct list_head ni_netlist;
+
+ /* chain on net_ni_cpt */
+ struct list_head ni_cptlist;
+
spinlock_t ni_lock;
- struct list_head ni_list; /* chain on ln_nis */
- struct list_head ni_cptlist; /* chain on ln_nis_cpt */
- int ni_maxtxcredits; /* # tx credits */
- /* # per-peer send credits */
- int ni_peertxcredits;
- /* # per-peer router buffer credits */
- int ni_peerrtrcredits;
- /* seconds to consider peer dead */
- int ni_peertimeout;
- int ni_ncpts; /* number of CPTs */
- __u32 *ni_cpts; /* bond NI on some CPTs */
- lnet_nid_t ni_nid; /* interface's NID */
- void *ni_data; /* instance-specific data */
- lnd_t *ni_lnd; /* procedural interface */
- struct lnet_tx_queue **ni_tx_queues; /* percpt TX queues */
- int **ni_refs; /* percpt reference count */
- time64_t ni_last_alive; /* when I was last alive */
- struct lnet_ni_status *ni_status; /* my health status */
+
+ /* number of CPTs */
+ int ni_ncpts;
+
+ /* bond NI on some CPTs */
+ __u32 *ni_cpts;
+
+ /* interface's NID */
+ lnet_nid_t ni_nid;
+
+ /* instance-specific data */
+ void *ni_data;
+
+ /* per ni credits */
+ atomic_t ni_tx_credits;
+
+ /* percpt TX queues */
+ struct lnet_tx_queue **ni_tx_queues;
+
+ /* percpt reference count */
+ int **ni_refs;
+
+ /* when I was last alive */
+ long ni_last_alive;
+
+ /* pointer to parent network */
+ struct lnet_net *ni_net;
+
+ /* my health status */
+ lnet_ni_status_t *ni_status;
+
+ /* NI FSM */
+ enum lnet_ni_state ni_state;
+
/* per NI LND tunables */
- struct lnet_ioctl_config_lnd_tunables *ni_lnd_tunables;
- /* equivalent interfaces to use */
+ struct lnet_lnd_tunables ni_lnd_tunables;
+
+ /* lnd tunables set explicitly */
+ bool ni_lnd_tunables_set;
+
+ /* NI statistics */
+ struct lnet_element_stats ni_stats;
+
+ /* physical device CPT */
+ int ni_dev_cpt;
+
+ /* sequence number used to round robin over nis within a net */
+ __u32 ni_seq;
+
+ /*
+ * equivalent interfaces to use
+ * This is an array because socklnd bonding can still be configured
+ */
char *ni_interfaces[LNET_MAX_INTERFACES];
struct net *ni_net_ns; /* original net namespace */
} lnet_ni_t;
/* chain on the_lnet.ln_zombie_rcd or ln_deathrow_rcd */
struct list_head rcd_list;
lnet_handle_md_t rcd_mdh; /* ping buffer MD */
- struct lnet_peer *rcd_gateway; /* reference to gateway */
+ struct lnet_peer_ni *rcd_gateway; /* reference to gateway */
struct lnet_ping_info *rcd_pinginfo; /* ping buffer */
} lnet_rc_data_t;
-typedef struct lnet_peer {
+struct lnet_peer_ni {
+ /* chain on peer_net */
+ struct list_head lpni_on_peer_net_list;
+ /* chain on remote peer list */
+ struct list_head lpni_on_remote_peer_ni_list;
/* chain on peer hash */
- struct list_head lp_hashlist;
+ struct list_head lpni_hashlist;
/* messages blocking for tx credits */
- struct list_head lp_txq;
+ struct list_head lpni_txq;
/* messages blocking for router credits */
- struct list_head lp_rtrq;
+ struct list_head lpni_rtrq;
/* chain on router list */
- struct list_head lp_rtr_list;
+ struct list_head lpni_rtr_list;
+ /* pointer to peer net I'm part of */
+ struct lnet_peer_net *lpni_peer_net;
+ /* statistics kept on each peer NI */
+ struct lnet_element_stats lpni_stats;
+ /* spin lock protecting credits and lpni_txq / lpni_rtrq */
+ spinlock_t lpni_lock;
/* # tx credits available */
- int lp_txcredits;
+ int lpni_txcredits;
/* low water mark */
- int lp_mintxcredits;
+ int lpni_mintxcredits;
/* # router credits */
- int lp_rtrcredits;
+ int lpni_rtrcredits;
/* low water mark */
- int lp_minrtrcredits;
+ int lpni_minrtrcredits;
+ /* bytes queued for sending */
+ long lpni_txqnob;
/* alive/dead? */
- unsigned int lp_alive:1;
+ bool lpni_alive;
/* notification outstanding? */
- unsigned int lp_notify:1;
+ bool lpni_notify;
/* outstanding notification for LND? */
- unsigned int lp_notifylnd:1;
+ bool lpni_notifylnd;
/* some thread is handling notification */
- unsigned int lp_notifying:1;
+ bool lpni_notifying;
/* SEND event outstanding from ping */
- unsigned int lp_ping_notsent;
- /* # times router went dead<->alive */
- int lp_alive_count;
- /* bytes queued for sending */
- long lp_txqnob;
+ bool lpni_ping_notsent;
+ /* # times router went dead<->alive. Protected with lpni_lock */
+ int lpni_alive_count;
/* time of last aliveness news */
- cfs_time_t lp_timestamp;
+ cfs_time_t lpni_timestamp;
/* time of last ping attempt */
- cfs_time_t lp_ping_timestamp;
+ cfs_time_t lpni_ping_timestamp;
/* != 0 if ping reply expected */
- cfs_time_t lp_ping_deadline;
+ cfs_time_t lpni_ping_deadline;
/* when I was last alive */
- cfs_time_t lp_last_alive;
- /* when lp_ni was queried last time */
- cfs_time_t lp_last_query;
- /* interface peer is on */
- lnet_ni_t *lp_ni;
- lnet_nid_t lp_nid; /* peer's NID */
- int lp_refcount; /* # refs */
- int lp_cpt; /* CPT this peer attached on */
+ cfs_time_t lpni_last_alive;
+ /* when lpni_ni was queried last time */
+ cfs_time_t lpni_last_query;
+ /* network peer is on */
+ struct lnet_net *lpni_net;
+ /* peer's NID */
+ lnet_nid_t lpni_nid;
+ /* # refs */
+ atomic_t lpni_refcount;
+ /* CPT this peer attached on */
+ int lpni_cpt;
/* # refs from lnet_route_t::lr_gateway */
- int lp_rtr_refcount;
- /* returned RC ping features */
- unsigned int lp_ping_feats;
- struct list_head lp_routes; /* routers on this peer */
- lnet_rc_data_t *lp_rcd; /* router checker state */
-} lnet_peer_t;
+ int lpni_rtr_refcount;
+ /* sequence number used to round robin over peer nis within a net */
+ __u32 lpni_seq;
+ /* sequence number used to round robin over gateways */
+ __u32 lpni_gw_seq;
+ /* health flag */
+ bool lpni_healthy;
+ /* returned RC ping features. Protected with lpni_lock */
+ unsigned int lpni_ping_feats;
+ /* routes on this peer */
+ struct list_head lpni_routes;
+ /* array of preferred local nids */
+ lnet_nid_t *lpni_pref_nids;
+ /* number of preferred NIDs in lnpi_pref_nids */
+ __u32 lpni_pref_nnids;
+ /* router checker state */
+ lnet_rc_data_t *lpni_rcd;
+};
+
+struct lnet_peer {
+ /* chain on global peer list */
+ struct list_head lp_on_lnet_peer_list;
+
+ /* list of peer nets */
+ struct list_head lp_peer_nets;
+
+ /* primary NID of the peer */
+ lnet_nid_t lp_primary_nid;
+
+ /* peer is Multi-Rail enabled peer */
+ bool lp_multi_rail;
+};
+
+struct lnet_peer_net {
+ /* chain on peer block */
+ struct list_head lpn_on_peer_list;
+
+ /* list of peer_nis on this network */
+ struct list_head lpn_peer_nis;
+
+ /* pointer to the peer I'm part of */
+ struct lnet_peer *lpn_peer;
+
+ /* Net ID */
+ __u32 lpn_net_id;
+};
/* peer hash size */
#define LNET_PEER_HASH_BITS 9
/* peer hash table */
struct lnet_peer_table {
int pt_version; /* /proc validity stamp */
- int pt_number; /* # peers extant */
- int pt_zombies; /* # zombies to go to deathrow
- * (and not there yet) */
- struct list_head pt_deathrow; /* zombie peers */
+ atomic_t pt_number; /* # peers extant */
struct list_head *pt_hash; /* NID->peer hash */
+ struct list_head pt_zombie_list; /* zombie peers */
+ int pt_zombies; /* # zombie peers */
+ spinlock_t pt_zombie_lock; /* protect list and count */
};
/* peer aliveness is enabled only on routers for peers in a network where the
* lnet_ni_t::ni_peertimeout has been set to a positive value */
#define lnet_peer_aliveness_enabled(lp) (the_lnet.ln_routing != 0 && \
- (lp)->lp_ni->ni_peertimeout > 0)
+ ((lp)->lpni_net) && \
+ (lp)->lpni_net->net_tunables.lct_peer_timeout > 0)
typedef struct {
struct list_head lr_list; /* chain on net */
struct list_head lr_gwlist; /* chain on gateway */
- lnet_peer_t *lr_gateway; /* router node */
+ struct lnet_peer_ni *lr_gateway; /* router node */
__u32 lr_net; /* remote network number */
int lr_seq; /* sequence for round-robin */
unsigned int lr_downis; /* number of down NIs */
struct lnet_match_info {
__u64 mi_mbits;
lnet_process_id_t mi_id;
+ unsigned int mi_cpt;
unsigned int mi_opc;
unsigned int mi_portal;
unsigned int mi_rlength;
struct lnet_msg_container **ln_msg_containers;
lnet_counters_t **ln_counters;
struct lnet_peer_table **ln_peer_tables;
+ /* list of configured or discovered peers */
+ struct list_head ln_peers;
+ /* list of peer nis not on a local network */
+ struct list_head ln_remote_peer_ni_list;
/* failure simulation */
struct list_head ln_test_peers;
struct list_head ln_drop_rules;
struct list_head ln_delay_rules;
-
- struct list_head ln_nis; /* LND instances */
- /* NIs bond on specific CPT(s) */
- struct list_head ln_nis_cpt;
- /* dying LND instances */
- struct list_head ln_nis_zombie;
- lnet_ni_t *ln_loni; /* the loopback NI */
+ /* LND instances */
+ struct list_head ln_nets;
+ /* the loopback NI */
+ struct lnet_ni *ln_loni;
+ /* network zombie list */
+ struct list_head ln_net_zombie;
/* remote networks with routes to them */
struct list_head *ln_remote_nets_hash;
* - LNET_MD_IOVEC: The start and length fields specify an array of
* struct iovec.
* - LNET_MD_MAX_SIZE: The max_size field is valid.
+ * - LNET_MD_BULK_HANDLE: The bulk_handle field is valid.
*
* Note:
* - LNET_MD_KIOV or LNET_MD_IOVEC allows for a scatter/gather
* descriptor are not logged.
*/
lnet_handle_eq_t eq_handle;
+ /**
+ * The bulk MD handle which was registered to describe the buffers
+ * either to be used to transfer data to the peer or receive data
+ * from the peer. This allows LNet to properly determine the NUMA
+ * node on which the memory was allocated and use that to select the
+ * nearest local network interface. This value is only used
+ * if the LNET_MD_BULK_HANDLE option is set.
+ */
+ lnet_handle_md_t bulk_handle;
} lnet_md_t;
/* Max Transfer Unit (minimum supported everywhere).
#define LNET_MD_MAX_SIZE (1 << 7)
/** See lnet_md_t::options. */
#define LNET_MD_KIOV (1 << 8)
+/** See lnet_md_t::options. */
+#define LNET_MD_BULK_HANDLE (1 << 9)
/* For compatibility with Cray Portals */
#define LNET_MD_PHYS 0
lnet_process_id_t target;
/** The identifier (nid, pid) of the initiator. */
lnet_process_id_t initiator;
+ /** The source NID on the initiator. */
+ lnet_process_id_t source;
/**
* The NID of the immediate sender. If the request has been forwarded
* by routers, this is the NID of the last hop; otherwise it's the
- * same as the initiator.
+ * same as the source.
*/
- lnet_nid_t sender;
+ lnet_nid_t sender;
/** Indicates the type of the event. */
lnet_event_kind_t type;
/** The portal table index specified in the request */
- unsigned int pt_index;
+ unsigned int pt_index;
/** A copy of the match bits specified in the request. */
- __u64 match_bits;
+ __u64 match_bits;
/** The length (in bytes) specified in the request. */
- unsigned int rlength;
+ unsigned int rlength;
/**
* The length (in bytes) of the data that was manipulated by the
* operation. For truncated operations, the manipulated length will be
* see lnet_md_t). For all other operations, the manipulated length
* will be the length of the requested operation, i.e. rlength.
*/
- unsigned int mlength;
+ unsigned int mlength;
/**
* The handle to the MD associated with the event. The handle may be
* invalid if the MD has been unlinked.
* been processed. In particular, the threshold field in md will
* reflect the value of the threshold after the operation occurred.
*/
- lnet_md_t md;
+ lnet_md_t md;
/**
* 64 bits of out-of-band user data. Only valid for LNET_EVENT_PUT.
* \see LNetPut
*/
- __u64 hdr_data;
+ __u64 hdr_data;
/**
* Indicates the completion status of the operation. It's 0 for
* successful operations, otherwise it's an error code.
*/
- int status;
+ int status;
/**
* Indicates whether the MD has been unlinked. Note that:
* - An event with unlinked set is the last event on the MD.
* - This field is also set for an explicit LNET_EVENT_UNLINK event.
* \see LNetMDUnlink
*/
- int unlinked;
+ int unlinked;
/**
* The displacement (in bytes) into the memory region that the
* operation used. The offset can be determined by the operation for
* a remote managed MD or by the local MD.
* \see lnet_md_t::options
*/
- unsigned int offset;
+ unsigned int offset;
/**
* The sequence number for this event. Sequence numbers are unique
* to each event.
kgn_net_t *net;
ENTRY;
- LASSERTF(ni->ni_lnd == &the_kgnilnd,
+ LASSERTF(ni->ni_net->net_lnd == &the_kgnilnd,
"bad LND 0x%p != the_kgnilnd @ 0x%p\n",
- ni->ni_lnd, &the_kgnilnd);
+ ni->ni_net->net_lnd, &the_kgnilnd);
if (kgnilnd_data.kgn_init == GNILND_INIT_NOTHING) {
rc = kgnilnd_base_startup();
msg->ibm_cksum = msg_cksum;
if (flip) {
- /* leave magic unflipped as a clue to peer endianness */
+ /* leave magic unflipped as a clue to peer_ni endianness */
msg->ibm_version = version;
CLASSERT (sizeof(msg->ibm_type) == 1);
CLASSERT (sizeof(msg->ibm_credits) == 1);
}
int
-kiblnd_create_peer(lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid)
+kiblnd_create_peer(lnet_ni_t *ni, kib_peer_ni_t **peerp, lnet_nid_t nid)
{
- kib_peer_t *peer;
+ kib_peer_ni_t *peer_ni;
kib_net_t *net = ni->ni_data;
- int cpt = lnet_cpt_of_nid(nid);
+ int cpt = lnet_cpt_of_nid(nid, ni);
unsigned long flags;
LASSERT(net != NULL);
LASSERT(nid != LNET_NID_ANY);
- LIBCFS_CPT_ALLOC(peer, lnet_cpt_table(), cpt, sizeof(*peer));
- if (peer == NULL) {
- CERROR("Cannot allocate peer\n");
+ LIBCFS_CPT_ALLOC(peer_ni, lnet_cpt_table(), cpt, sizeof(*peer_ni));
+ if (peer_ni == NULL) {
+ CERROR("Cannot allocate peer_ni\n");
return -ENOMEM;
}
- peer->ibp_ni = ni;
- peer->ibp_nid = nid;
- peer->ibp_error = 0;
- peer->ibp_last_alive = 0;
- peer->ibp_max_frags = kiblnd_cfg_rdma_frags(peer->ibp_ni);
- peer->ibp_queue_depth = ni->ni_peertxcredits;
- atomic_set(&peer->ibp_refcount, 1); /* 1 ref for caller */
+ peer_ni->ibp_ni = ni;
+ peer_ni->ibp_nid = nid;
+ peer_ni->ibp_error = 0;
+ peer_ni->ibp_last_alive = 0;
+ peer_ni->ibp_max_frags = kiblnd_cfg_rdma_frags(peer_ni->ibp_ni);
+ peer_ni->ibp_queue_depth = ni->ni_net->net_tunables.lct_peer_tx_credits;
+ atomic_set(&peer_ni->ibp_refcount, 1); /* 1 ref for caller */
- INIT_LIST_HEAD(&peer->ibp_list); /* not in the peer table yet */
- INIT_LIST_HEAD(&peer->ibp_conns);
- INIT_LIST_HEAD(&peer->ibp_tx_queue);
+ INIT_LIST_HEAD(&peer_ni->ibp_list); /* not in the peer_ni table yet */
+ INIT_LIST_HEAD(&peer_ni->ibp_conns);
+ INIT_LIST_HEAD(&peer_ni->ibp_tx_queue);
write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
- *peerp = peer;
+ *peerp = peer_ni;
return 0;
}
void
-kiblnd_destroy_peer (kib_peer_t *peer)
+kiblnd_destroy_peer (kib_peer_ni_t *peer_ni)
{
- kib_net_t *net = peer->ibp_ni->ni_data;
+ kib_net_t *net = peer_ni->ibp_ni->ni_data;
LASSERT(net != NULL);
- LASSERT (atomic_read(&peer->ibp_refcount) == 0);
- LASSERT(!kiblnd_peer_active(peer));
- LASSERT(kiblnd_peer_idle(peer));
- LASSERT(list_empty(&peer->ibp_tx_queue));
+ LASSERT (atomic_read(&peer_ni->ibp_refcount) == 0);
+ LASSERT(!kiblnd_peer_active(peer_ni));
+ LASSERT(kiblnd_peer_idle(peer_ni));
+ LASSERT(list_empty(&peer_ni->ibp_tx_queue));
- LIBCFS_FREE(peer, sizeof(*peer));
+ LIBCFS_FREE(peer_ni, sizeof(*peer_ni));
- /* NB a peer's connections keep a reference on their peer until
+ /* NB a peer_ni's connections keep a reference on their peer_ni until
* they are destroyed, so we can be assured that _all_ state to do
- * with this peer has been cleaned up when its refcount drops to
+ * with this peer_ni has been cleaned up when its refcount drops to
* zero. */
atomic_dec(&net->ibn_npeers);
}
-kib_peer_t *
-kiblnd_find_peer_locked (lnet_nid_t nid)
+kib_peer_ni_t *
+kiblnd_find_peer_locked(struct lnet_ni *ni, lnet_nid_t nid)
{
/* the caller is responsible for accounting the additional reference
* that this creates */
struct list_head *peer_list = kiblnd_nid2peerlist(nid);
struct list_head *tmp;
- kib_peer_t *peer;
+ kib_peer_ni_t *peer_ni;
list_for_each(tmp, peer_list) {
- peer = list_entry(tmp, kib_peer_t, ibp_list);
- LASSERT(!kiblnd_peer_idle(peer));
-
- if (peer->ibp_nid != nid)
+ peer_ni = list_entry(tmp, kib_peer_ni_t, ibp_list);
+ LASSERT(!kiblnd_peer_idle(peer_ni));
+
+ /*
+ * Match a peer if its NID and the NID of the local NI it
+ * communicates over are the same. Otherwise don't match
+ * the peer, which will result in a new lnd peer being
+ * created.
+ */
+ if (peer_ni->ibp_nid != nid ||
+ peer_ni->ibp_ni->ni_nid != ni->ni_nid)
continue;
- CDEBUG(D_NET, "got peer [%p] -> %s (%d) version: %x\n",
- peer, libcfs_nid2str(nid),
- atomic_read(&peer->ibp_refcount),
- peer->ibp_version);
- return peer;
+ CDEBUG(D_NET, "got peer_ni [%p] -> %s (%d) version: %x\n",
+ peer_ni, libcfs_nid2str(nid),
+ atomic_read(&peer_ni->ibp_refcount),
+ peer_ni->ibp_version);
+ return peer_ni;
}
return NULL;
}
void
-kiblnd_unlink_peer_locked (kib_peer_t *peer)
+kiblnd_unlink_peer_locked (kib_peer_ni_t *peer_ni)
{
- LASSERT(list_empty(&peer->ibp_conns));
+ LASSERT(list_empty(&peer_ni->ibp_conns));
- LASSERT (kiblnd_peer_active(peer));
- list_del_init(&peer->ibp_list);
+ LASSERT (kiblnd_peer_active(peer_ni));
+ list_del_init(&peer_ni->ibp_list);
/* lose peerlist's ref */
- kiblnd_peer_decref(peer);
+ kiblnd_peer_decref(peer_ni);
}
static int
kiblnd_get_peer_info(lnet_ni_t *ni, int index,
lnet_nid_t *nidp, int *count)
{
- kib_peer_t *peer;
+ kib_peer_ni_t *peer_ni;
struct list_head *ptmp;
int i;
unsigned long flags;
list_for_each(ptmp, &kiblnd_data.kib_peers[i]) {
- peer = list_entry(ptmp, kib_peer_t, ibp_list);
- LASSERT(!kiblnd_peer_idle(peer));
+ peer_ni = list_entry(ptmp, kib_peer_ni_t, ibp_list);
+ LASSERT(!kiblnd_peer_idle(peer_ni));
- if (peer->ibp_ni != ni)
+ if (peer_ni->ibp_ni != ni)
continue;
if (index-- > 0)
continue;
- *nidp = peer->ibp_nid;
- *count = atomic_read(&peer->ibp_refcount);
+ *nidp = peer_ni->ibp_nid;
+ *count = atomic_read(&peer_ni->ibp_refcount);
read_unlock_irqrestore(&kiblnd_data.kib_global_lock,
flags);
}
static void
-kiblnd_del_peer_locked (kib_peer_t *peer)
+kiblnd_del_peer_locked (kib_peer_ni_t *peer_ni)
{
struct list_head *ctmp;
struct list_head *cnxt;
kib_conn_t *conn;
- if (list_empty(&peer->ibp_conns)) {
- kiblnd_unlink_peer_locked(peer);
+ if (list_empty(&peer_ni->ibp_conns)) {
+ kiblnd_unlink_peer_locked(peer_ni);
} else {
- list_for_each_safe(ctmp, cnxt, &peer->ibp_conns) {
+ list_for_each_safe(ctmp, cnxt, &peer_ni->ibp_conns) {
conn = list_entry(ctmp, kib_conn_t, ibc_list);
kiblnd_close_conn_locked(conn, 0);
}
- /* NB closing peer's last conn unlinked it. */
+ /* NB closing peer_ni's last conn unlinked it. */
}
- /* NB peer now unlinked; might even be freed if the peer table had the
+ /* NB peer_ni now unlinked; might even be freed if the peer_ni table had the
* last ref on it. */
}
struct list_head zombies = LIST_HEAD_INIT(zombies);
struct list_head *ptmp;
struct list_head *pnxt;
- kib_peer_t *peer;
+ kib_peer_ni_t *peer_ni;
int lo;
int hi;
int i;
for (i = lo; i <= hi; i++) {
list_for_each_safe(ptmp, pnxt, &kiblnd_data.kib_peers[i]) {
- peer = list_entry(ptmp, kib_peer_t, ibp_list);
- LASSERT(!kiblnd_peer_idle(peer));
+ peer_ni = list_entry(ptmp, kib_peer_ni_t, ibp_list);
+ LASSERT(!kiblnd_peer_idle(peer_ni));
- if (peer->ibp_ni != ni)
+ if (peer_ni->ibp_ni != ni)
continue;
- if (!(nid == LNET_NID_ANY || peer->ibp_nid == nid))
+ if (!(nid == LNET_NID_ANY || peer_ni->ibp_nid == nid))
continue;
- if (!list_empty(&peer->ibp_tx_queue)) {
- LASSERT(list_empty(&peer->ibp_conns));
+ if (!list_empty(&peer_ni->ibp_tx_queue)) {
+ LASSERT(list_empty(&peer_ni->ibp_conns));
- list_splice_init(&peer->ibp_tx_queue,
+ list_splice_init(&peer_ni->ibp_tx_queue,
&zombies);
}
- kiblnd_del_peer_locked(peer);
+ kiblnd_del_peer_locked(peer_ni);
rc = 0; /* matched something */
}
}
static kib_conn_t *
kiblnd_get_conn_by_idx(lnet_ni_t *ni, int index)
{
- kib_peer_t *peer;
+ kib_peer_ni_t *peer_ni;
struct list_head *ptmp;
kib_conn_t *conn;
struct list_head *ctmp;
for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) {
list_for_each(ptmp, &kiblnd_data.kib_peers[i]) {
- peer = list_entry(ptmp, kib_peer_t, ibp_list);
- LASSERT(!kiblnd_peer_idle(peer));
+ peer_ni = list_entry(ptmp, kib_peer_ni_t, ibp_list);
+ LASSERT(!kiblnd_peer_idle(peer_ni));
- if (peer->ibp_ni != ni)
+ if (peer_ni->ibp_ni != ni)
continue;
- list_for_each(ctmp, &peer->ibp_conns) {
+ list_for_each(ctmp, &peer_ni->ibp_conns) {
if (index-- > 0)
continue;
}
kib_conn_t *
-kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid,
+kiblnd_create_conn(kib_peer_ni_t *peer_ni, struct rdma_cm_id *cmid,
int state, int version)
{
/* CAVEAT EMPTOR:
* If the new conn is created successfully it takes over the caller's
- * ref on 'peer'. It also "owns" 'cmid' and destroys it when it itself
- * is destroyed. On failure, the caller's ref on 'peer' remains and
+ * ref on 'peer_ni'. It also "owns" 'cmid' and destroys it when it itself
+ * is destroyed. On failure, the caller's ref on 'peer_ni' remains and
* she must dispose of 'cmid'. (Actually I'd block forever if I tried
* to destroy 'cmid' here since I'm called from the CM which still has
* its ref on 'cmid'). */
rwlock_t *glock = &kiblnd_data.kib_global_lock;
- kib_net_t *net = peer->ibp_ni->ni_data;
+ kib_net_t *net = peer_ni->ibp_ni->ni_data;
kib_dev_t *dev;
struct ib_qp_init_attr *init_qp_attr;
struct kib_sched_info *sched;
dev = net->ibn_dev;
- cpt = lnet_cpt_of_nid(peer->ibp_nid);
+ cpt = lnet_cpt_of_nid(peer_ni->ibp_nid, peer_ni->ibp_ni);
sched = kiblnd_data.kib_scheds[cpt];
LASSERT(sched->ibs_nthreads > 0);
sizeof(*init_qp_attr));
if (init_qp_attr == NULL) {
CERROR("Can't allocate qp_attr for %s\n",
- libcfs_nid2str(peer->ibp_nid));
+ libcfs_nid2str(peer_ni->ibp_nid));
goto failed_0;
}
LIBCFS_CPT_ALLOC(conn, lnet_cpt_table(), cpt, sizeof(*conn));
if (conn == NULL) {
CERROR("Can't allocate connection for %s\n",
- libcfs_nid2str(peer->ibp_nid));
+ libcfs_nid2str(peer_ni->ibp_nid));
goto failed_1;
}
conn->ibc_state = IBLND_CONN_INIT;
conn->ibc_version = version;
- conn->ibc_peer = peer; /* I take the caller's ref */
+ conn->ibc_peer = peer_ni; /* I take the caller's ref */
cmid->context = conn; /* for future CM callbacks */
conn->ibc_cmid = cmid;
- conn->ibc_max_frags = peer->ibp_max_frags;
- conn->ibc_queue_depth = peer->ibp_queue_depth;
+ conn->ibc_max_frags = peer_ni->ibp_max_frags;
+ conn->ibc_queue_depth = peer_ni->ibp_queue_depth;
INIT_LIST_HEAD(&conn->ibc_early_rxs);
INIT_LIST_HEAD(&conn->ibc_tx_noops);
kiblnd_destroy_conn(kib_conn_t *conn, bool free_conn)
{
struct rdma_cm_id *cmid = conn->ibc_cmid;
- kib_peer_t *peer = conn->ibc_peer;
+ kib_peer_ni_t *peer_ni = conn->ibc_peer;
int rc;
LASSERT (!in_interrupt());
/* See CAVEAT EMPTOR above in kiblnd_create_conn */
if (conn->ibc_state != IBLND_CONN_INIT) {
- kib_net_t *net = peer->ibp_ni->ni_data;
+ kib_net_t *net = peer_ni->ibp_ni->ni_data;
- kiblnd_peer_decref(peer);
+ kiblnd_peer_decref(peer_ni);
rdma_destroy_id(cmid);
atomic_dec(&net->ibn_nconns);
}
}
int
-kiblnd_close_peer_conns_locked(kib_peer_t *peer, int why)
+kiblnd_close_peer_conns_locked(kib_peer_ni_t *peer_ni, int why)
{
kib_conn_t *conn;
struct list_head *ctmp;
struct list_head *cnxt;
int count = 0;
- list_for_each_safe(ctmp, cnxt, &peer->ibp_conns) {
+ list_for_each_safe(ctmp, cnxt, &peer_ni->ibp_conns) {
conn = list_entry(ctmp, kib_conn_t, ibc_list);
CDEBUG(D_NET, "Closing conn -> %s, "
"version: %x, reason: %d\n",
- libcfs_nid2str(peer->ibp_nid),
+ libcfs_nid2str(peer_ni->ibp_nid),
conn->ibc_version, why);
kiblnd_close_conn_locked(conn, why);
}
int
-kiblnd_close_stale_conns_locked(kib_peer_t *peer,
+kiblnd_close_stale_conns_locked(kib_peer_ni_t *peer_ni,
int version, __u64 incarnation)
{
kib_conn_t *conn;
struct list_head *cnxt;
int count = 0;
- list_for_each_safe(ctmp, cnxt, &peer->ibp_conns) {
+ list_for_each_safe(ctmp, cnxt, &peer_ni->ibp_conns) {
conn = list_entry(ctmp, kib_conn_t, ibc_list);
if (conn->ibc_version == version &&
CDEBUG(D_NET, "Closing stale conn -> %s version: %x, "
"incarnation:%#llx(%x, %#llx)\n",
- libcfs_nid2str(peer->ibp_nid),
+ libcfs_nid2str(peer_ni->ibp_nid),
conn->ibc_version, conn->ibc_incarnation,
version, incarnation);
static int
kiblnd_close_matching_conns(lnet_ni_t *ni, lnet_nid_t nid)
{
- kib_peer_t *peer;
+ kib_peer_ni_t *peer_ni;
struct list_head *ptmp;
struct list_head *pnxt;
int lo;
for (i = lo; i <= hi; i++) {
list_for_each_safe(ptmp, pnxt, &kiblnd_data.kib_peers[i]) {
- peer = list_entry(ptmp, kib_peer_t, ibp_list);
- LASSERT(!kiblnd_peer_idle(peer));
+ peer_ni = list_entry(ptmp, kib_peer_ni_t, ibp_list);
+ LASSERT(!kiblnd_peer_idle(peer_ni));
- if (peer->ibp_ni != ni)
+ if (peer_ni->ibp_ni != ni)
continue;
- if (!(nid == LNET_NID_ANY || nid == peer->ibp_nid))
+ if (!(nid == LNET_NID_ANY || nid == peer_ni->ibp_nid))
continue;
- count += kiblnd_close_peer_conns_locked(peer, 0);
+ count += kiblnd_close_peer_conns_locked(peer_ni, 0);
}
}
cfs_time_t last_alive = 0;
cfs_time_t now = cfs_time_current();
rwlock_t *glock = &kiblnd_data.kib_global_lock;
- kib_peer_t *peer;
+ kib_peer_ni_t *peer_ni;
unsigned long flags;
read_lock_irqsave(glock, flags);
- peer = kiblnd_find_peer_locked(nid);
- if (peer != NULL)
- last_alive = peer->ibp_last_alive;
+ peer_ni = kiblnd_find_peer_locked(ni, nid);
+ if (peer_ni != NULL)
+ last_alive = peer_ni->ibp_last_alive;
read_unlock_irqrestore(glock, flags);
if (last_alive != 0)
*when = last_alive;
- /* peer is not persistent in hash, trigger peer creation
+ /* peer_ni is not persistent in hash, trigger peer_ni creation
* and connection establishment with a NULL tx */
- if (peer == NULL)
+ if (peer_ni == NULL)
kiblnd_launch_tx(ni, NULL, nid);
- CDEBUG(D_NET, "Peer %s %p, alive %ld secs ago\n",
- libcfs_nid2str(nid), peer,
+ CDEBUG(D_NET, "peer_ni %s %p, alive %ld secs ago\n",
+ libcfs_nid2str(nid), peer_ni,
last_alive ? cfs_duration_sec(now - last_alive) : -1);
return;
}
int mod;
__u16 nfrags;
- tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+ tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib;
mod = tunables->lnd_map_on_demand;
nfrags = (negotiated_nfrags != -1) ? negotiated_nfrags : mod;
int rc;
int i;
- tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+ tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib;
read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
if (tunables->lnd_map_on_demand == 0) {
/* nuke all existing peers within this net */
kiblnd_del_peer(ni, LNET_NID_ANY);
- /* Wait for all peer state to clean up */
+ /* Wait for all peer_ni state to clean up */
i = 2;
while (atomic_read(&net->ibn_npeers) != 0) {
i++;
unsigned long flags;
int rc;
int newdev;
+ int node_id;
- LASSERT (ni->ni_lnd == &the_o2iblnd);
+ LASSERT (ni->ni_net->net_lnd == &the_o2iblnd);
if (kiblnd_data.kib_init == IBLND_INIT_NOTHING) {
rc = kiblnd_base_startup();
newdev = ibdev == NULL;
/* hmm...create kib_dev even for alias */
if (ibdev == NULL || strcmp(&ibdev->ibd_ifname[0], ifname) != 0)
- ibdev = kiblnd_create_dev(ifname);
+ ibdev = kiblnd_create_dev(ifname);
- if (ibdev == NULL)
- goto failed;
+ if (ibdev == NULL)
+ goto failed;
+
+ node_id = dev_to_node(ibdev->ibd_hdev->ibh_ibdev->dma_device);
+ ni->ni_dev_cpt = cfs_cpt_of_node(lnet_cpt_table(), node_id);
- net->ibn_dev = ibdev;
- ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ibdev->ibd_ifip);
+ net->ibn_dev = ibdev;
+ ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ibdev->ibd_ifip);
rc = kiblnd_dev_start_threads(ibdev, newdev,
ni->ni_cpts, ni->ni_ncpts);
#include <lnet/lnet.h>
#include <lnet/lib-lnet.h>
-#define IBLND_PEER_HASH_SIZE 101 /* # peer lists */
+#define IBLND_PEER_HASH_SIZE 101 /* # peer_ni lists */
/* # scheduler loops before reschedule */
#define IBLND_RESCHED 100
#define IBLND_MSG_QUEUE_SIZE_V1 8 /* V1 only : # messages/RDMAs in-flight */
#define IBLND_CREDIT_HIGHWATER_V1 7 /* V1 only : when eagerly to return credits */
-#define IBLND_CREDITS_DEFAULT 8 /* default # of peer credits */
-#define IBLND_CREDITS_MAX ((typeof(((kib_msg_t*) 0)->ibm_credits)) - 1) /* Max # of peer credits */
+#define IBLND_CREDITS_DEFAULT 8 /* default # of peer_ni credits */
+#define IBLND_CREDITS_MAX ((typeof(((kib_msg_t*) 0)->ibm_credits)) - 1) /* Max # of peer_ni credits */
/* when eagerly to return credits */
#define IBLND_CREDITS_HIGHWATER(t, v) ((v) == IBLND_MSG_VERSION_1 ? \
/* schedulers sleep here */
wait_queue_head_t kib_failover_waitq;
atomic_t kib_nthreads; /* # live threads */
- /* stabilize net/dev/peer/conn ops */
+ /* stabilize net/dev/peer_ni/conn ops */
rwlock_t kib_global_lock;
/* hash table of all my known peers */
struct list_head *kib_peers;
__u16 ibr_version; /* sender's version */
__u8 ibr_why; /* reject reason */
__u8 ibr_padding; /* padding */
- __u64 ibr_incarnation; /* incarnation of peer */
+ __u64 ibr_incarnation; /* incarnation of peer_ni */
kib_connparams_t ibr_cp; /* connection parameters */
} WIRE_ATTR kib_rej_t;
#define IBLND_REJECT_NO_RESOURCES 2 /* Out of memory/conns etc */
#define IBLND_REJECT_FATAL 3 /* Anything else */
-#define IBLND_REJECT_CONN_UNCOMPAT 4 /* incompatible version peer */
-#define IBLND_REJECT_CONN_STALE 5 /* stale peer */
+#define IBLND_REJECT_CONN_UNCOMPAT 4 /* incompatible version peer_ni */
+#define IBLND_REJECT_CONN_STALE 5 /* stale peer_ni */
-/* peer's rdma frags doesn't match mine */
+/* peer_ni's rdma frags doesn't match mine */
#define IBLND_REJECT_RDMA_FRAGS 6
-/* peer's msg queue size doesn't match mine */
+/* peer_ni's msg queue size doesn't match mine */
#define IBLND_REJECT_MSG_QUEUE_SIZE 7
/***********************************************************************/
#define IBLND_POSTRX_DONT_POST 0 /* don't post */
#define IBLND_POSTRX_NO_CREDIT 1 /* post: no credits */
-#define IBLND_POSTRX_PEER_CREDIT 2 /* post: give peer back 1 credit */
+#define IBLND_POSTRX_PEER_CREDIT 2 /* post: give peer_ni back 1 credit */
#define IBLND_POSTRX_RSRVD_CREDIT 3 /* post: give myself back 1 reserved credit */
typedef struct kib_tx /* transmit message */
short tx_sending;
/* queued for sending */
short tx_queued;
- /* waiting for peer */
+ /* waiting for peer_ni */
short tx_waiting;
/* LNET completion status */
int tx_status;
{
/* scheduler information */
struct kib_sched_info *ibc_sched;
- /* owning peer */
+ /* owning peer_ni */
struct kib_peer *ibc_peer;
/* HCA bound on */
kib_hca_dev_t *ibc_hdev;
- /* stash on peer's conn list */
+ /* stash on peer_ni's conn list */
struct list_head ibc_list;
/* schedule for attention */
struct list_head ibc_sched_list;
typedef struct kib_peer
{
- /* stash on global peer list */
+ /* stash on global peer_ni list */
struct list_head ibp_list;
/* who's on the other end(s) */
lnet_nid_t ibp_nid;
struct list_head ibp_conns;
/* msgs waiting for a conn */
struct list_head ibp_tx_queue;
- /* incarnation of peer */
+ /* incarnation of peer_ni */
__u64 ibp_incarnation;
/* when (in jiffies) I was last alive */
cfs_time_t ibp_last_alive;
/* # users */
atomic_t ibp_refcount;
- /* version of peer */
+ /* version of peer_ni */
__u16 ibp_version;
/* current passive connection attempts */
unsigned short ibp_accepting;
/* current active connection attempts */
unsigned short ibp_connecting;
- /* reconnect this peer later */
+ /* reconnect this peer_ni later */
unsigned short ibp_reconnecting:1;
/* counter of how many times we triggered a conn race */
unsigned char ibp_races;
/* # consecutive reconnection attempts to this peer */
unsigned int ibp_reconnected;
- /* errno on closing this peer */
+ /* errno on closing this peer_ni */
int ibp_error;
/* max map_on_demand */
__u16 ibp_max_frags;
/* max_peer_credits */
__u16 ibp_queue_depth;
-} kib_peer_t;
+} kib_peer_ni_t;
#ifndef HAVE_IB_INC_RKEY
/**
struct lnet_ioctl_config_o2iblnd_tunables *tunables;
int mod;
- tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+ tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib;
mod = tunables->lnd_map_on_demand;
return mod != 0 ? mod : IBLND_MAX_RDMA_FRAGS;
}
struct lnet_ioctl_config_o2iblnd_tunables *tunables;
int concurrent_sends;
- tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+ tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib;
concurrent_sends = tunables->lnd_concurrent_sends;
if (version == IBLND_MSG_VERSION_1) {
} \
} while (0)
-#define kiblnd_peer_addref(peer) \
+#define kiblnd_peer_addref(peer_ni) \
do { \
- CDEBUG(D_NET, "peer[%p] -> %s (%d)++\n", \
- (peer), libcfs_nid2str((peer)->ibp_nid), \
- atomic_read (&(peer)->ibp_refcount)); \
- atomic_inc(&(peer)->ibp_refcount); \
+ CDEBUG(D_NET, "peer_ni[%p] -> %s (%d)++\n", \
+ (peer_ni), libcfs_nid2str((peer_ni)->ibp_nid), \
+ atomic_read (&(peer_ni)->ibp_refcount)); \
+ atomic_inc(&(peer_ni)->ibp_refcount); \
} while (0)
-#define kiblnd_peer_decref(peer) \
+#define kiblnd_peer_decref(peer_ni) \
do { \
- CDEBUG(D_NET, "peer[%p] -> %s (%d)--\n", \
- (peer), libcfs_nid2str((peer)->ibp_nid), \
- atomic_read (&(peer)->ibp_refcount)); \
- LASSERT_ATOMIC_POS(&(peer)->ibp_refcount); \
- if (atomic_dec_and_test(&(peer)->ibp_refcount)) \
- kiblnd_destroy_peer(peer); \
+ CDEBUG(D_NET, "peer_ni[%p] -> %s (%d)--\n", \
+ (peer_ni), libcfs_nid2str((peer_ni)->ibp_nid), \
+ atomic_read (&(peer_ni)->ibp_refcount)); \
+ LASSERT_ATOMIC_POS(&(peer_ni)->ibp_refcount); \
+ if (atomic_dec_and_test(&(peer_ni)->ibp_refcount)) \
+ kiblnd_destroy_peer(peer_ni); \
} while (0)
static inline bool
-kiblnd_peer_connecting(kib_peer_t *peer)
+kiblnd_peer_connecting(kib_peer_ni_t *peer_ni)
{
- return peer->ibp_connecting != 0 ||
- peer->ibp_reconnecting != 0 ||
- peer->ibp_accepting != 0;
+ return peer_ni->ibp_connecting != 0 ||
+ peer_ni->ibp_reconnecting != 0 ||
+ peer_ni->ibp_accepting != 0;
}
static inline bool
-kiblnd_peer_idle(kib_peer_t *peer)
+kiblnd_peer_idle(kib_peer_ni_t *peer_ni)
{
- return !kiblnd_peer_connecting(peer) && list_empty(&peer->ibp_conns);
+ return !kiblnd_peer_connecting(peer_ni) && list_empty(&peer_ni->ibp_conns);
}
static inline struct list_head *
}
static inline int
-kiblnd_peer_active (kib_peer_t *peer)
+kiblnd_peer_active (kib_peer_ni_t *peer_ni)
{
- /* Am I in the peer hash table? */
- return !list_empty(&peer->ibp_list);
+ /* Am I in the peer_ni hash table? */
+ return !list_empty(&peer_ni->ibp_list);
}
static inline kib_conn_t *
-kiblnd_get_conn_locked (kib_peer_t *peer)
+kiblnd_get_conn_locked (kib_peer_ni_t *peer_ni)
{
- LASSERT(!list_empty(&peer->ibp_conns));
+ LASSERT(!list_empty(&peer_ni->ibp_conns));
/* just return the first connection */
- return list_entry(peer->ibp_conns.next, kib_conn_t, ibc_list);
+ return list_entry(peer_ni->ibp_conns.next, kib_conn_t, ibc_list);
}
static inline int
struct lnet_ioctl_config_o2iblnd_tunables *tunables;
LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED);
- tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+ tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib;
if (conn->ibc_outstanding_credits <
IBLND_CREDITS_HIGHWATER(tunables, conn->ibc_version) &&
int kiblnd_translate_mtu(int value);
int kiblnd_dev_failover(kib_dev_t *dev);
-int kiblnd_create_peer(lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid);
-void kiblnd_destroy_peer (kib_peer_t *peer);
-bool kiblnd_reconnect_peer(kib_peer_t *peer);
+int kiblnd_create_peer(lnet_ni_t *ni, kib_peer_ni_t **peerp, lnet_nid_t nid);
+void kiblnd_destroy_peer (kib_peer_ni_t *peer);
+bool kiblnd_reconnect_peer(kib_peer_ni_t *peer);
void kiblnd_destroy_dev (kib_dev_t *dev);
-void kiblnd_unlink_peer_locked (kib_peer_t *peer);
-kib_peer_t *kiblnd_find_peer_locked (lnet_nid_t nid);
-int kiblnd_close_stale_conns_locked (kib_peer_t *peer,
+void kiblnd_unlink_peer_locked (kib_peer_ni_t *peer_ni);
+kib_peer_ni_t *kiblnd_find_peer_locked(struct lnet_ni *ni, lnet_nid_t nid);
+int kiblnd_close_stale_conns_locked (kib_peer_ni_t *peer_ni,
int version, __u64 incarnation);
-int kiblnd_close_peer_conns_locked (kib_peer_t *peer, int why);
+int kiblnd_close_peer_conns_locked (kib_peer_ni_t *peer_ni, int why);
-kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid,
+kib_conn_t *kiblnd_create_conn(kib_peer_ni_t *peer_ni, struct rdma_cm_id *cmid,
int state, int version);
void kiblnd_destroy_conn(kib_conn_t *conn, bool free_conn);
void kiblnd_close_conn (kib_conn_t *conn, int error);
#define MAX_CONN_RACES_BEFORE_ABORT 20
-static void kiblnd_peer_alive(kib_peer_t *peer);
-static void kiblnd_peer_connect_failed(kib_peer_t *peer, int active, int error);
+static void kiblnd_peer_alive(kib_peer_ni_t *peer_ni);
+static void kiblnd_peer_connect_failed(kib_peer_ni_t *peer_ni, int active, int error);
static void kiblnd_init_tx_msg(lnet_ni_t *ni, kib_tx_t *tx,
int type, int body_nob);
static int kiblnd_init_rdma(kib_conn_t *conn, kib_tx_t *tx, int type,
LASSERT (!in_interrupt());
LASSERT (!tx->tx_queued); /* mustn't be queued for sending */
LASSERT (tx->tx_sending == 0); /* mustn't be awaiting sent callback */
- LASSERT (!tx->tx_waiting); /* mustn't be awaiting peer response */
+ LASSERT (!tx->tx_waiting); /* mustn't be awaiting peer_ni response */
LASSERT (tx->tx_pool != NULL);
kiblnd_unmap_tx(ni, tx);
kib_tx_t *tx;
kib_tx_poolset_t *tps;
- tps = net->ibn_tx_ps[lnet_cpt_of_nid(target)];
+ tps = net->ibn_tx_ps[lnet_cpt_of_nid(target, ni)];
node = kiblnd_pool_alloc_node(&tps->tps_poolset);
if (node == NULL)
return NULL;
LASSERT (tx->tx_waiting);
/* CAVEAT EMPTOR: I could be racing with tx_complete, but...
- * (a) I can overwrite tx_msg since my peer has received it!
+ * (a) I can overwrite tx_msg since my peer_ni has received it!
* (b) tx_waiting set tells tx_complete() it's not done. */
tx->tx_nwrq = 0; /* overwrite PUT_REQ */
return rc;
}
- /* If rd is not tx_rd, it's going to get sent to a peer, who will need
+ /* If rd is not tx_rd, it's going to get sent to a peer_ni, who will need
* the rkey */
rd->rd_key = tx->fmr.fmr_key;
rd->rd_frags[0].rf_addr &= ~hdev->ibh_page_mask;
__u32 nob;
int i;
- /* If rd is not tx_rd, it's going to get sent to a peer and I'm the
+ /* If rd is not tx_rd, it's going to get sent to a peer_ni and I'm the
* RDMA sink */
tx->tx_dmadir = (rd != tx->tx_rd) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
tx->tx_nfrags = nfrags;
kiblnd_post_tx_locked (kib_conn_t *conn, kib_tx_t *tx, int credit)
__must_hold(&conn->ibc_lock)
{
- kib_msg_t *msg = tx->tx_msg;
- kib_peer_t *peer = conn->ibc_peer;
- struct lnet_ni *ni = peer->ibp_ni;
- int ver = conn->ibc_version;
- int rc;
- int done;
+ kib_msg_t *msg = tx->tx_msg;
+ kib_peer_ni_t *peer_ni = conn->ibc_peer;
+ struct lnet_ni *ni = peer_ni->ibp_ni;
+ int ver = conn->ibc_version;
+ int rc;
+ int done;
LASSERT(tx->tx_queued);
/* We rely on this for QP sizing */
kiblnd_concurrent_sends(ver, ni)) {
/* tx completions outstanding... */
CDEBUG(D_NET, "%s: posted enough\n",
- libcfs_nid2str(peer->ibp_nid));
+ libcfs_nid2str(peer_ni->ibp_nid));
return -EAGAIN;
}
if (credit != 0 && conn->ibc_credits == 0) { /* no credits */
CDEBUG(D_NET, "%s: no credits\n",
- libcfs_nid2str(peer->ibp_nid));
+ libcfs_nid2str(peer_ni->ibp_nid));
return -EAGAIN;
}
conn->ibc_credits == 1 && /* last credit reserved */
msg->ibm_type != IBLND_MSG_NOOP) { /* for NOOP */
CDEBUG(D_NET, "%s: not using last credit\n",
- libcfs_nid2str(peer->ibp_nid));
+ libcfs_nid2str(peer_ni->ibp_nid));
return -EAGAIN;
}
* kiblnd_check_sends_locked will queue NOOP again when
* posted NOOPs complete */
spin_unlock(&conn->ibc_lock);
- kiblnd_tx_done(peer->ibp_ni, tx);
+ kiblnd_tx_done(peer_ni->ibp_ni, tx);
spin_lock(&conn->ibc_lock);
CDEBUG(D_NET, "%s(%d): redundant or enough NOOP\n",
- libcfs_nid2str(peer->ibp_nid),
+ libcfs_nid2str(peer_ni->ibp_nid),
conn->ibc_noops_posted);
return 0;
}
- kiblnd_pack_msg(peer->ibp_ni, msg, ver, conn->ibc_outstanding_credits,
- peer->ibp_nid, conn->ibc_incarnation);
+ kiblnd_pack_msg(peer_ni->ibp_ni, msg, ver, conn->ibc_outstanding_credits,
+ peer_ni->ibp_nid, conn->ibc_incarnation);
conn->ibc_credits -= credit;
conn->ibc_outstanding_credits = 0;
}
LASSERTF(bad->wr_id == kiblnd_ptr2wreqid(tx, IBLND_WID_TX),
- "bad wr_id %#llx, opc %d, flags %d, peer: %s\n",
+ "bad wr_id %#llx, opc %d, flags %d, peer_ni: %s\n",
bad->wr_id, bad->opcode, bad->send_flags,
libcfs_nid2str(conn->ibc_peer->ibp_nid));
if (conn->ibc_state == IBLND_CONN_ESTABLISHED)
CERROR("Error %d posting transmit to %s\n",
- rc, libcfs_nid2str(peer->ibp_nid));
+ rc, libcfs_nid2str(peer_ni->ibp_nid));
else
CDEBUG(D_NET, "Error %d posting transmit to %s\n",
- rc, libcfs_nid2str(peer->ibp_nid));
+ rc, libcfs_nid2str(peer_ni->ibp_nid));
kiblnd_close_conn(conn, rc);
if (done)
- kiblnd_tx_done(peer->ibp_ni, tx);
+ kiblnd_tx_done(peer_ni->ibp_ni, tx);
spin_lock(&conn->ibc_lock);
conn->ibc_noops_posted--;
if (failed) {
- tx->tx_waiting = 0; /* don't wait for peer */
+ tx->tx_waiting = 0; /* don't wait for peer_ni */
tx->tx_status = -EIO;
}
idle = (tx->tx_sending == 0) && /* This is the final callback */
- !tx->tx_waiting && /* Not waiting for peer */
+ !tx->tx_waiting && /* Not waiting for peer_ni */
!tx->tx_queued; /* Not re-queued (PUT_DONE) */
if (idle)
list_del(&tx->tx_list);
}
if (tx->tx_nwrq >= conn->ibc_max_frags) {
- CERROR("RDMA has too many fragments for peer %s (%d), "
+ CERROR("RDMA has too many fragments for peer_ni %s (%d), "
"src idx/frags: %d/%d dst idx/frags: %d/%d\n",
libcfs_nid2str(conn->ibc_peer->ibp_nid),
conn->ibc_max_frags,
}
static void
-kiblnd_connect_peer (kib_peer_t *peer)
+kiblnd_connect_peer (kib_peer_ni_t *peer_ni)
{
struct rdma_cm_id *cmid;
kib_dev_t *dev;
- kib_net_t *net = peer->ibp_ni->ni_data;
+ kib_net_t *net = peer_ni->ibp_ni->ni_data;
struct sockaddr_in srcaddr;
struct sockaddr_in dstaddr;
int rc;
LASSERT (net != NULL);
- LASSERT (peer->ibp_connecting > 0);
- LASSERT(!peer->ibp_reconnecting);
+ LASSERT (peer_ni->ibp_connecting > 0);
+ LASSERT(!peer_ni->ibp_reconnecting);
- cmid = kiblnd_rdma_create_id(kiblnd_cm_callback, peer, RDMA_PS_TCP,
+ cmid = kiblnd_rdma_create_id(kiblnd_cm_callback, peer_ni, RDMA_PS_TCP,
IB_QPT_RC);
if (IS_ERR(cmid)) {
CERROR("Can't create CMID for %s: %ld\n",
- libcfs_nid2str(peer->ibp_nid), PTR_ERR(cmid));
+ libcfs_nid2str(peer_ni->ibp_nid), PTR_ERR(cmid));
rc = PTR_ERR(cmid);
goto failed;
}
memset(&dstaddr, 0, sizeof(dstaddr));
dstaddr.sin_family = AF_INET;
dstaddr.sin_port = htons(*kiblnd_tunables.kib_service);
- dstaddr.sin_addr.s_addr = htonl(LNET_NIDADDR(peer->ibp_nid));
+ dstaddr.sin_addr.s_addr = htonl(LNET_NIDADDR(peer_ni->ibp_nid));
- kiblnd_peer_addref(peer); /* cmid's ref */
+ kiblnd_peer_addref(peer_ni); /* cmid's ref */
if (*kiblnd_tunables.kib_use_priv_port) {
rc = kiblnd_resolve_addr(cmid, &srcaddr, &dstaddr,
if (rc != 0) {
/* Can't initiate address resolution: */
CERROR("Can't resolve addr for %s: %d\n",
- libcfs_nid2str(peer->ibp_nid), rc);
+ libcfs_nid2str(peer_ni->ibp_nid), rc);
goto failed2;
}
LASSERT (cmid->device != NULL);
CDEBUG(D_NET, "%s: connection bound to %s:%pI4h:%s\n",
- libcfs_nid2str(peer->ibp_nid), dev->ibd_ifname,
+ libcfs_nid2str(peer_ni->ibp_nid), dev->ibd_ifname,
&dev->ibd_ifip, cmid->device->name);
return;
failed2:
- kiblnd_peer_connect_failed(peer, 1, rc);
- kiblnd_peer_decref(peer); /* cmid's ref */
+ kiblnd_peer_connect_failed(peer_ni, 1, rc);
+ kiblnd_peer_decref(peer_ni); /* cmid's ref */
rdma_destroy_id(cmid);
return;
failed:
- kiblnd_peer_connect_failed(peer, 1, rc);
+ kiblnd_peer_connect_failed(peer_ni, 1, rc);
}
bool
-kiblnd_reconnect_peer(kib_peer_t *peer)
+kiblnd_reconnect_peer(kib_peer_ni_t *peer_ni)
{
rwlock_t *glock = &kiblnd_data.kib_global_lock;
char *reason = NULL;
INIT_LIST_HEAD(&txs);
write_lock_irqsave(glock, flags);
- if (peer->ibp_reconnecting == 0) {
- if (peer->ibp_accepting)
+ if (peer_ni->ibp_reconnecting == 0) {
+ if (peer_ni->ibp_accepting)
reason = "accepting";
- else if (peer->ibp_connecting)
+ else if (peer_ni->ibp_connecting)
reason = "connecting";
- else if (!list_empty(&peer->ibp_conns))
+ else if (!list_empty(&peer_ni->ibp_conns))
reason = "connected";
else /* connected then closed */
reason = "closed";
goto no_reconnect;
}
- LASSERT(!peer->ibp_accepting && !peer->ibp_connecting &&
- list_empty(&peer->ibp_conns));
- peer->ibp_reconnecting = 0;
+ LASSERT(!peer_ni->ibp_accepting && !peer_ni->ibp_connecting &&
+ list_empty(&peer_ni->ibp_conns));
+ peer_ni->ibp_reconnecting = 0;
- if (!kiblnd_peer_active(peer)) {
- list_splice_init(&peer->ibp_tx_queue, &txs);
+ if (!kiblnd_peer_active(peer_ni)) {
+ list_splice_init(&peer_ni->ibp_tx_queue, &txs);
reason = "unlinked";
goto no_reconnect;
}
- peer->ibp_connecting++;
- peer->ibp_reconnected++;
+ peer_ni->ibp_connecting++;
+ peer_ni->ibp_reconnected++;
+
write_unlock_irqrestore(glock, flags);
- kiblnd_connect_peer(peer);
+ kiblnd_connect_peer(peer_ni);
return true;
no_reconnect:
write_unlock_irqrestore(glock, flags);
CWARN("Abort reconnection of %s: %s\n",
- libcfs_nid2str(peer->ibp_nid), reason);
- kiblnd_txlist_done(peer->ibp_ni, &txs, -ECONNABORTED);
+ libcfs_nid2str(peer_ni->ibp_nid), reason);
+ kiblnd_txlist_done(peer_ni->ibp_ni, &txs, -ECONNABORTED);
return false;
}
void
kiblnd_launch_tx (lnet_ni_t *ni, kib_tx_t *tx, lnet_nid_t nid)
{
- kib_peer_t *peer;
- kib_peer_t *peer2;
+ kib_peer_ni_t *peer_ni;
+ kib_peer_ni_t *peer2;
kib_conn_t *conn;
rwlock_t *g_lock = &kiblnd_data.kib_global_lock;
unsigned long flags;
LASSERT (tx == NULL || tx->tx_conn == NULL); /* only set when assigned a conn */
LASSERT (tx == NULL || tx->tx_nwrq > 0); /* work items have been set up */
- /* First time, just use a read lock since I expect to find my peer
+ /* First time, just use a read lock since I expect to find my peer_ni
* connected */
read_lock_irqsave(g_lock, flags);
- peer = kiblnd_find_peer_locked(nid);
- if (peer != NULL && !list_empty(&peer->ibp_conns)) {
- /* Found a peer with an established connection */
- conn = kiblnd_get_conn_locked(peer);
+ peer_ni = kiblnd_find_peer_locked(ni, nid);
+ if (peer_ni != NULL && !list_empty(&peer_ni->ibp_conns)) {
+ /* Found a peer_ni with an established connection */
+ conn = kiblnd_get_conn_locked(peer_ni);
kiblnd_conn_addref(conn); /* 1 ref for me... */
read_unlock_irqrestore(g_lock, flags);
/* Re-try with a write lock */
write_lock(g_lock);
- peer = kiblnd_find_peer_locked(nid);
- if (peer != NULL) {
- if (list_empty(&peer->ibp_conns)) {
- /* found a peer, but it's still connecting... */
- LASSERT(kiblnd_peer_connecting(peer));
+ peer_ni = kiblnd_find_peer_locked(ni, nid);
+ if (peer_ni != NULL) {
+ if (list_empty(&peer_ni->ibp_conns)) {
+ /* found a peer_ni, but it's still connecting... */
+ LASSERT(kiblnd_peer_connecting(peer_ni));
if (tx != NULL)
list_add_tail(&tx->tx_list,
- &peer->ibp_tx_queue);
+ &peer_ni->ibp_tx_queue);
write_unlock_irqrestore(g_lock, flags);
} else {
- conn = kiblnd_get_conn_locked(peer);
+ conn = kiblnd_get_conn_locked(peer_ni);
kiblnd_conn_addref(conn); /* 1 ref for me... */
write_unlock_irqrestore(g_lock, flags);
write_unlock_irqrestore(g_lock, flags);
- /* Allocate a peer ready to add to the peer table and retry */
- rc = kiblnd_create_peer(ni, &peer, nid);
+ /* Allocate a peer_ni ready to add to the peer_ni table and retry */
+ rc = kiblnd_create_peer(ni, &peer_ni, nid);
if (rc != 0) {
- CERROR("Can't create peer %s\n", libcfs_nid2str(nid));
+ CERROR("Can't create peer_ni %s\n", libcfs_nid2str(nid));
if (tx != NULL) {
tx->tx_status = -EHOSTUNREACH;
tx->tx_waiting = 0;
write_lock_irqsave(g_lock, flags);
- peer2 = kiblnd_find_peer_locked(nid);
+ peer2 = kiblnd_find_peer_locked(ni, nid);
if (peer2 != NULL) {
if (list_empty(&peer2->ibp_conns)) {
- /* found a peer, but it's still connecting... */
+ /* found a peer_ni, but it's still connecting... */
LASSERT(kiblnd_peer_connecting(peer2));
if (tx != NULL)
list_add_tail(&tx->tx_list,
kiblnd_conn_decref(conn); /* ...to here */
}
- kiblnd_peer_decref(peer);
+ kiblnd_peer_decref(peer_ni);
return;
}
- /* Brand new peer */
- LASSERT (peer->ibp_connecting == 0);
- peer->ibp_connecting = 1;
+ /* Brand new peer_ni */
+ LASSERT (peer_ni->ibp_connecting == 0);
+ peer_ni->ibp_connecting = 1;
/* always called with a ref on ni, which prevents ni being shutdown */
LASSERT (((kib_net_t *)ni->ni_data)->ibn_shutdown == 0);
if (tx != NULL)
- list_add_tail(&tx->tx_list, &peer->ibp_tx_queue);
+ list_add_tail(&tx->tx_list, &peer_ni->ibp_tx_queue);
- kiblnd_peer_addref(peer);
- list_add_tail(&peer->ibp_list, kiblnd_nid2peerlist(nid));
+ kiblnd_peer_addref(peer_ni);
+ list_add_tail(&peer_ni->ibp_list, kiblnd_nid2peerlist(nid));
write_unlock_irqrestore(g_lock, flags);
- kiblnd_connect_peer(peer);
- kiblnd_peer_decref(peer);
+ kiblnd_connect_peer(peer_ni);
+ kiblnd_peer_decref(peer_ni);
}
int
CERROR("Can't setup PUT sink for %s: %d\n",
libcfs_nid2str(conn->ibc_peer->ibp_nid), rc);
kiblnd_tx_done(ni, tx);
- /* tell peer it's over */
+ /* tell peer_ni it's over */
kiblnd_send_completion(rx->rx_conn, IBLND_MSG_PUT_NAK, rc,
rxmsg->ibm_u.putreq.ibprm_cookie);
break;
}
static void
-kiblnd_peer_alive (kib_peer_t *peer)
+kiblnd_peer_alive (kib_peer_ni_t *peer_ni)
{
/* This is racy, but everyone's only writing cfs_time_current() */
- peer->ibp_last_alive = cfs_time_current();
+ peer_ni->ibp_last_alive = cfs_time_current();
smp_mb();
}
static void
-kiblnd_peer_notify (kib_peer_t *peer)
+kiblnd_peer_notify (kib_peer_ni_t *peer_ni)
{
int error = 0;
cfs_time_t last_alive = 0;
read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
- if (kiblnd_peer_idle(peer) && peer->ibp_error != 0) {
- error = peer->ibp_error;
- peer->ibp_error = 0;
+ if (kiblnd_peer_idle(peer_ni) && peer_ni->ibp_error != 0) {
+ error = peer_ni->ibp_error;
+ peer_ni->ibp_error = 0;
- last_alive = peer->ibp_last_alive;
+ last_alive = peer_ni->ibp_last_alive;
}
read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
if (error != 0)
- lnet_notify(peer->ibp_ni,
- peer->ibp_nid, 0, last_alive);
+ lnet_notify(peer_ni->ibp_ni,
+ peer_ni->ibp_nid, 0, last_alive);
}
void
* connection to be finished off by the connd. Otherwise the connd is
* already dealing with it (either to set it up or tear it down).
* Caller holds kib_global_lock exclusively in irq context */
- kib_peer_t *peer = conn->ibc_peer;
+ kib_peer_ni_t *peer_ni = conn->ibc_peer;
kib_dev_t *dev;
unsigned long flags;
list_empty(&conn->ibc_tx_queue_nocred) &&
list_empty(&conn->ibc_active_txs)) {
CDEBUG(D_NET, "closing conn to %s\n",
- libcfs_nid2str(peer->ibp_nid));
+ libcfs_nid2str(peer_ni->ibp_nid));
} else {
CNETERR("Closing conn to %s: error %d%s%s%s%s%s\n",
- libcfs_nid2str(peer->ibp_nid), error,
+ libcfs_nid2str(peer_ni->ibp_nid), error,
list_empty(&conn->ibc_tx_queue) ? "" : "(sending)",
list_empty(&conn->ibc_tx_noops) ? "" : "(sending_noops)",
list_empty(&conn->ibc_tx_queue_rsrvd) ?
list_empty(&conn->ibc_active_txs) ? "" : "(waiting)");
}
- dev = ((kib_net_t *)peer->ibp_ni->ni_data)->ibn_dev;
+ dev = ((kib_net_t *)peer_ni->ibp_ni->ni_data)->ibn_dev;
list_del(&conn->ibc_list);
/* connd (see below) takes over ibc_list's ref */
- if (list_empty(&peer->ibp_conns) && /* no more conns */
- kiblnd_peer_active(peer)) { /* still in peer table */
- kiblnd_unlink_peer_locked(peer);
+ if (list_empty(&peer_ni->ibp_conns) && /* no more conns */
+ kiblnd_peer_active(peer_ni)) { /* still in peer_ni table */
+ kiblnd_unlink_peer_locked(peer_ni);
/* set/clear error on last conn */
- peer->ibp_error = conn->ibc_comms_error;
+ peer_ni->ibp_error = conn->ibc_comms_error;
}
kiblnd_set_conn_state(conn, IBLND_CONN_CLOSING);
}
static void
-kiblnd_peer_connect_failed(kib_peer_t *peer, int active, int error)
+kiblnd_peer_connect_failed(kib_peer_ni_t *peer_ni, int active, int error)
{
struct list_head zombies = LIST_HEAD_INIT(zombies);
unsigned long flags;
write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
if (active) {
- LASSERT (peer->ibp_connecting > 0);
- peer->ibp_connecting--;
+ LASSERT(peer_ni->ibp_connecting > 0);
+ peer_ni->ibp_connecting--;
} else {
- LASSERT (peer->ibp_accepting > 0);
- peer->ibp_accepting--;
+ LASSERT (peer_ni->ibp_accepting > 0);
+ peer_ni->ibp_accepting--;
}
- if (kiblnd_peer_connecting(peer)) {
+ if (kiblnd_peer_connecting(peer_ni)) {
/* another connection attempt under way... */
write_unlock_irqrestore(&kiblnd_data.kib_global_lock,
flags);
return;
}
- peer->ibp_reconnected = 0;
- if (list_empty(&peer->ibp_conns)) {
- /* Take peer's blocked transmits to complete with error */
- list_add(&zombies, &peer->ibp_tx_queue);
- list_del_init(&peer->ibp_tx_queue);
+ peer_ni->ibp_reconnected = 0;
+ if (list_empty(&peer_ni->ibp_conns)) {
+ /* Take peer_ni's blocked transmits to complete with error */
+ list_add(&zombies, &peer_ni->ibp_tx_queue);
+ list_del_init(&peer_ni->ibp_tx_queue);
- if (kiblnd_peer_active(peer))
- kiblnd_unlink_peer_locked(peer);
+ if (kiblnd_peer_active(peer_ni))
+ kiblnd_unlink_peer_locked(peer_ni);
- peer->ibp_error = error;
+ peer_ni->ibp_error = error;
} else {
/* Can't have blocked transmits if there are connections */
- LASSERT(list_empty(&peer->ibp_tx_queue));
+ LASSERT(list_empty(&peer_ni->ibp_tx_queue));
}
write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
- kiblnd_peer_notify(peer);
+ kiblnd_peer_notify(peer_ni);
if (list_empty(&zombies))
return;
CNETERR("Deleting messages for %s: connection failed\n",
- libcfs_nid2str(peer->ibp_nid));
+ libcfs_nid2str(peer_ni->ibp_nid));
- kiblnd_txlist_done(peer->ibp_ni, &zombies, -EHOSTUNREACH);
+ kiblnd_txlist_done(peer_ni->ibp_ni, &zombies, -EHOSTUNREACH);
}
static void
kiblnd_connreq_done(kib_conn_t *conn, int status)
{
- kib_peer_t *peer = conn->ibc_peer;
+ kib_peer_ni_t *peer_ni = conn->ibc_peer;
kib_tx_t *tx;
struct list_head txs;
unsigned long flags;
active = (conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT);
CDEBUG(D_NET,"%s: active(%d), version(%x), status(%d)\n",
- libcfs_nid2str(peer->ibp_nid), active,
+ libcfs_nid2str(peer_ni->ibp_nid), active,
conn->ibc_version, status);
LASSERT (!in_interrupt());
LASSERT ((conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT &&
- peer->ibp_connecting > 0) ||
+ peer_ni->ibp_connecting > 0) ||
(conn->ibc_state == IBLND_CONN_PASSIVE_WAIT &&
- peer->ibp_accepting > 0));
+ peer_ni->ibp_accepting > 0));
LIBCFS_FREE(conn->ibc_connvars, sizeof(*conn->ibc_connvars));
conn->ibc_connvars = NULL;
if (status != 0) {
/* failed to establish connection */
- kiblnd_peer_connect_failed(peer, active, status);
+ kiblnd_peer_connect_failed(peer_ni, active, status);
kiblnd_finalise_conn(conn);
return;
}
conn->ibc_last_send = jiffies;
kiblnd_set_conn_state(conn, IBLND_CONN_ESTABLISHED);
- kiblnd_peer_alive(peer);
+ kiblnd_peer_alive(peer_ni);
- /* Add conn to peer's list and nuke any dangling conns from a different
- * peer instance... */
+ /* Add conn to peer_ni's list and nuke any dangling conns from a different
+ * peer_ni instance... */
kiblnd_conn_addref(conn); /* +1 ref for ibc_list */
- list_add(&conn->ibc_list, &peer->ibp_conns);
- peer->ibp_reconnected = 0;
+ list_add(&conn->ibc_list, &peer_ni->ibp_conns);
+ peer_ni->ibp_reconnected = 0;
if (active)
- peer->ibp_connecting--;
+ peer_ni->ibp_connecting--;
else
- peer->ibp_accepting--;
+ peer_ni->ibp_accepting--;
- if (peer->ibp_version == 0) {
- peer->ibp_version = conn->ibc_version;
- peer->ibp_incarnation = conn->ibc_incarnation;
+ if (peer_ni->ibp_version == 0) {
+ peer_ni->ibp_version = conn->ibc_version;
+ peer_ni->ibp_incarnation = conn->ibc_incarnation;
}
- if (peer->ibp_version != conn->ibc_version ||
- peer->ibp_incarnation != conn->ibc_incarnation) {
- kiblnd_close_stale_conns_locked(peer, conn->ibc_version,
+ if (peer_ni->ibp_version != conn->ibc_version ||
+ peer_ni->ibp_incarnation != conn->ibc_incarnation) {
+ kiblnd_close_stale_conns_locked(peer_ni, conn->ibc_version,
conn->ibc_incarnation);
- peer->ibp_version = conn->ibc_version;
- peer->ibp_incarnation = conn->ibc_incarnation;
+ peer_ni->ibp_version = conn->ibc_version;
+ peer_ni->ibp_incarnation = conn->ibc_incarnation;
}
/* grab pending txs while I have the lock */
- list_add(&txs, &peer->ibp_tx_queue);
- list_del_init(&peer->ibp_tx_queue);
+ list_add(&txs, &peer_ni->ibp_tx_queue);
+ list_del_init(&peer_ni->ibp_tx_queue);
- if (!kiblnd_peer_active(peer) || /* peer has been deleted */
+ if (!kiblnd_peer_active(peer_ni) || /* peer_ni has been deleted */
conn->ibc_comms_error != 0) { /* error has happened already */
- lnet_ni_t *ni = peer->ibp_ni;
+ lnet_ni_t *ni = peer_ni->ibp_ni;
/* start to shut down connection */
kiblnd_close_conn_locked(conn, -ECONNABORTED);
kib_msg_t *reqmsg = priv;
kib_msg_t *ackmsg;
kib_dev_t *ibdev;
- kib_peer_t *peer;
- kib_peer_t *peer2;
+ kib_peer_ni_t *peer_ni;
+ kib_peer_ni_t *peer2;
kib_conn_t *conn;
lnet_ni_t *ni = NULL;
kib_net_t *net = NULL;
if (*kiblnd_tunables.kib_require_priv_port &&
ntohs(peer_addr->sin_port) >= PROT_SOCK) {
__u32 ip = ntohl(peer_addr->sin_addr.s_addr);
- CERROR("Peer's port (%pI4h:%hu) is not privileged\n",
+ CERROR("peer_ni's port (%pI4h:%hu) is not privileged\n",
&ip, ntohs(peer_addr->sin_port));
- goto failed;
- }
+ goto failed;
+ }
- if (priv_nob < offsetof(kib_msg_t, ibm_type)) {
- CERROR("Short connection request\n");
- goto failed;
- }
+ if (priv_nob < offsetof(kib_msg_t, ibm_type)) {
+ CERROR("Short connection request\n");
+ goto failed;
+ }
- /* Future protocol version compatibility support! If the
- * o2iblnd-specific protocol changes, or when LNET unifies
- * protocols over all LNDs, the initial connection will
- * negotiate a protocol version. I trap this here to avoid
- * console errors; the reject tells the peer which protocol I
- * speak. */
- if (reqmsg->ibm_magic == LNET_PROTO_MAGIC ||
- reqmsg->ibm_magic == __swab32(LNET_PROTO_MAGIC))
- goto failed;
- if (reqmsg->ibm_magic == IBLND_MSG_MAGIC &&
- reqmsg->ibm_version != IBLND_MSG_VERSION &&
- reqmsg->ibm_version != IBLND_MSG_VERSION_1)
- goto failed;
- if (reqmsg->ibm_magic == __swab32(IBLND_MSG_MAGIC) &&
- reqmsg->ibm_version != __swab16(IBLND_MSG_VERSION) &&
- reqmsg->ibm_version != __swab16(IBLND_MSG_VERSION_1))
- goto failed;
+ /* Future protocol version compatibility support! If the
+ * o2iblnd-specific protocol changes, or when LNET unifies
+ * protocols over all LNDs, the initial connection will
+ * negotiate a protocol version. I trap this here to avoid
+ * console errors; the reject tells the peer_ni which protocol I
+ * speak. */
+ if (reqmsg->ibm_magic == LNET_PROTO_MAGIC ||
+ reqmsg->ibm_magic == __swab32(LNET_PROTO_MAGIC))
+ goto failed;
+ if (reqmsg->ibm_magic == IBLND_MSG_MAGIC &&
+ reqmsg->ibm_version != IBLND_MSG_VERSION &&
+ reqmsg->ibm_version != IBLND_MSG_VERSION_1)
+ goto failed;
+ if (reqmsg->ibm_magic == __swab32(IBLND_MSG_MAGIC) &&
+ reqmsg->ibm_version != __swab16(IBLND_MSG_VERSION) &&
+ reqmsg->ibm_version != __swab16(IBLND_MSG_VERSION_1))
+ goto failed;
- rc = kiblnd_unpack_msg(reqmsg, priv_nob);
- if (rc != 0) {
- CERROR("Can't parse connection request: %d\n", rc);
- goto failed;
- }
+ rc = kiblnd_unpack_msg(reqmsg, priv_nob);
+ if (rc != 0) {
+ CERROR("Can't parse connection request: %d\n", rc);
+ goto failed;
+ }
- nid = reqmsg->ibm_srcnid;
- ni = lnet_net2ni(LNET_NIDNET(reqmsg->ibm_dstnid));
+ nid = reqmsg->ibm_srcnid;
+ ni = lnet_nid2ni_addref(reqmsg->ibm_dstnid);
- if (ni != NULL) {
- net = (kib_net_t *)ni->ni_data;
- rej.ibr_incarnation = net->ibn_incarnation;
- }
+ if (ni != NULL) {
+ net = (kib_net_t *)ni->ni_data;
+ rej.ibr_incarnation = net->ibn_incarnation;
+ }
- if (ni == NULL || /* no matching net */
- ni->ni_nid != reqmsg->ibm_dstnid || /* right NET, wrong NID! */
- net->ibn_dev != ibdev) { /* wrong device */
+ if (ni == NULL || /* no matching net */
+ ni->ni_nid != reqmsg->ibm_dstnid || /* right NET, wrong NID! */
+ net->ibn_dev != ibdev) { /* wrong device */
CERROR("Can't accept conn from %s on %s (%s:%d:%pI4h): "
- "bad dst nid %s\n", libcfs_nid2str(nid),
- ni == NULL ? "NA" : libcfs_nid2str(ni->ni_nid),
- ibdev->ibd_ifname, ibdev->ibd_nnets,
+ "bad dst nid %s\n", libcfs_nid2str(nid),
+ ni == NULL ? "NA" : libcfs_nid2str(ni->ni_nid),
+ ibdev->ibd_ifname, ibdev->ibd_nnets,
&ibdev->ibd_ifip,
- libcfs_nid2str(reqmsg->ibm_dstnid));
+ libcfs_nid2str(reqmsg->ibm_dstnid));
- goto failed;
- }
+ goto failed;
+ }
/* check time stamp as soon as possible */
- if (reqmsg->ibm_dststamp != 0 &&
- reqmsg->ibm_dststamp != net->ibn_incarnation) {
- CWARN("Stale connection request\n");
- rej.ibr_why = IBLND_REJECT_CONN_STALE;
- goto failed;
- }
+ if (reqmsg->ibm_dststamp != 0 &&
+ reqmsg->ibm_dststamp != net->ibn_incarnation) {
+ CWARN("Stale connection request\n");
+ rej.ibr_why = IBLND_REJECT_CONN_STALE;
+ goto failed;
+ }
- /* I can accept peer's version */
- version = reqmsg->ibm_version;
+ /* I can accept peer_ni's version */
+ version = reqmsg->ibm_version;
- if (reqmsg->ibm_type != IBLND_MSG_CONNREQ) {
- CERROR("Unexpected connreq msg type: %x from %s\n",
- reqmsg->ibm_type, libcfs_nid2str(nid));
- goto failed;
- }
+ if (reqmsg->ibm_type != IBLND_MSG_CONNREQ) {
+ CERROR("Unexpected connreq msg type: %x from %s\n",
+ reqmsg->ibm_type, libcfs_nid2str(nid));
+ goto failed;
+ }
if (reqmsg->ibm_u.connparams.ibcp_queue_depth >
kiblnd_msg_queue_size(version, ni)) {
goto failed;
}
- /* assume 'nid' is a new peer; create */
- rc = kiblnd_create_peer(ni, &peer, nid);
+ /* assume 'nid' is a new peer_ni; create */
+ rc = kiblnd_create_peer(ni, &peer_ni, nid);
if (rc != 0) {
- CERROR("Can't create peer for %s\n", libcfs_nid2str(nid));
+ CERROR("Can't create peer_ni for %s\n", libcfs_nid2str(nid));
rej.ibr_why = IBLND_REJECT_NO_RESOURCES;
goto failed;
}
/* We have validated the peer's parameters so use those */
- peer->ibp_max_frags = reqmsg->ibm_u.connparams.ibcp_max_frags;
- peer->ibp_queue_depth = reqmsg->ibm_u.connparams.ibcp_queue_depth;
+ peer_ni->ibp_max_frags = reqmsg->ibm_u.connparams.ibcp_max_frags;
+ peer_ni->ibp_queue_depth = reqmsg->ibm_u.connparams.ibcp_queue_depth;
write_lock_irqsave(g_lock, flags);
- peer2 = kiblnd_find_peer_locked(nid);
+ peer2 = kiblnd_find_peer_locked(ni, nid);
if (peer2 != NULL) {
if (peer2->ibp_version == 0) {
peer2->ibp_version = version;
libcfs_nid2str(nid), peer2->ibp_version, version,
peer2->ibp_incarnation, reqmsg->ibm_srcstamp);
- kiblnd_peer_decref(peer);
- rej.ibr_why = IBLND_REJECT_CONN_STALE;
- goto failed;
- }
+ kiblnd_peer_decref(peer_ni);
+ rej.ibr_why = IBLND_REJECT_CONN_STALE;
+ goto failed;
+ }
/* Tie-break connection race in favour of the higher NID.
* If we keep running into a race condition multiple times,
CDEBUG(D_NET, "Conn race %s\n",
libcfs_nid2str(peer2->ibp_nid));
- kiblnd_peer_decref(peer);
+ kiblnd_peer_decref(peer_ni);
rej.ibr_why = IBLND_REJECT_CONN_RACE;
goto failed;
}
libcfs_nid2str(peer2->ibp_nid),
MAX_CONN_RACES_BEFORE_ABORT);
/*
- * passive connection is allowed even this peer is waiting for
+ * passive connection is allowed even this peer_ni is waiting for
* reconnection.
*/
peer2->ibp_reconnecting = 0;
peer2->ibp_accepting++;
kiblnd_peer_addref(peer2);
- /* Race with kiblnd_launch_tx (active connect) to create peer
+ /* Race with kiblnd_launch_tx (active connect) to create peer_ni
* so copy validated parameters since we now know what the
- * peer's limits are */
- peer2->ibp_max_frags = peer->ibp_max_frags;
- peer2->ibp_queue_depth = peer->ibp_queue_depth;
+ * peer_ni's limits are */
+ peer2->ibp_max_frags = peer_ni->ibp_max_frags;
+ peer2->ibp_queue_depth = peer_ni->ibp_queue_depth;
write_unlock_irqrestore(g_lock, flags);
- kiblnd_peer_decref(peer);
- peer = peer2;
+ kiblnd_peer_decref(peer_ni);
+ peer_ni = peer2;
} else {
- /* Brand new peer */
- LASSERT (peer->ibp_accepting == 0);
- LASSERT (peer->ibp_version == 0 &&
- peer->ibp_incarnation == 0);
+ /* Brand new peer_ni */
+ LASSERT (peer_ni->ibp_accepting == 0);
+ LASSERT (peer_ni->ibp_version == 0 &&
+ peer_ni->ibp_incarnation == 0);
- peer->ibp_accepting = 1;
- peer->ibp_version = version;
- peer->ibp_incarnation = reqmsg->ibm_srcstamp;
+ peer_ni->ibp_accepting = 1;
+ peer_ni->ibp_version = version;
+ peer_ni->ibp_incarnation = reqmsg->ibm_srcstamp;
/* I have a ref on ni that prevents it being shutdown */
LASSERT (net->ibn_shutdown == 0);
- kiblnd_peer_addref(peer);
- list_add_tail(&peer->ibp_list, kiblnd_nid2peerlist(nid));
+ kiblnd_peer_addref(peer_ni);
+ list_add_tail(&peer_ni->ibp_list, kiblnd_nid2peerlist(nid));
write_unlock_irqrestore(g_lock, flags);
}
- conn = kiblnd_create_conn(peer, cmid, IBLND_CONN_PASSIVE_WAIT, version);
+ conn = kiblnd_create_conn(peer_ni, cmid, IBLND_CONN_PASSIVE_WAIT, version);
if (conn == NULL) {
- kiblnd_peer_connect_failed(peer, 0, -ENOMEM);
- kiblnd_peer_decref(peer);
+ kiblnd_peer_connect_failed(peer_ni, 0, -ENOMEM);
+ kiblnd_peer_decref(peer_ni);
rej.ibr_why = IBLND_REJECT_NO_RESOURCES;
goto failed;
}
__u64 incarnation, int why, kib_connparams_t *cp)
{
rwlock_t *glock = &kiblnd_data.kib_global_lock;
- kib_peer_t *peer = conn->ibc_peer;
+ kib_peer_ni_t *peer_ni = conn->ibc_peer;
char *reason;
int msg_size = IBLND_MSG_SIZE;
int frag_num = -1;
unsigned long flags;
LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT);
- LASSERT(peer->ibp_connecting > 0); /* 'conn' at least */
- LASSERT(!peer->ibp_reconnecting);
+ LASSERT(peer_ni->ibp_connecting > 0); /* 'conn' at least */
+ LASSERT(!peer_ni->ibp_reconnecting);
if (cp) {
msg_size = cp->ibcp_max_msg_size;
* NB: reconnect is still needed even when ibp_tx_queue is
* empty if ibp_version != version because reconnect may be
* initiated by kiblnd_query() */
- reconnect = (!list_empty(&peer->ibp_tx_queue) ||
- peer->ibp_version != version) &&
- peer->ibp_connecting == 1 &&
- peer->ibp_accepting == 0;
+ reconnect = (!list_empty(&peer_ni->ibp_tx_queue) ||
+ peer_ni->ibp_version != version) &&
+ peer_ni->ibp_connecting == 1 &&
+ peer_ni->ibp_accepting == 0;
if (!reconnect) {
reason = "no need";
goto out;
break;
case IBLND_REJECT_RDMA_FRAGS: {
- struct lnet_ioctl_config_lnd_tunables *tunables;
+ struct lnet_ioctl_config_o2iblnd_tunables *tunables;
if (!cp) {
reason = "can't negotiate max frags";
goto out;
}
- tunables = peer->ibp_ni->ni_lnd_tunables;
- if (!tunables->lt_tun_u.lt_o2ib.lnd_map_on_demand) {
+ tunables = &peer_ni->ibp_ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib;
+ if (!tunables->lnd_map_on_demand) {
reason = "map_on_demand must be enabled";
goto out;
}
goto out;
}
- peer->ibp_max_frags = frag_num;
+ peer_ni->ibp_max_frags = frag_num;
reason = "rdma fragments";
break;
}
goto out;
}
- peer->ibp_queue_depth = queue_dep;
+ peer_ni->ibp_queue_depth = queue_dep;
reason = "queue depth";
break;
}
conn->ibc_reconnect = 1;
- peer->ibp_reconnecting = 1;
- peer->ibp_version = version;
+ peer_ni->ibp_reconnecting = 1;
+ peer_ni->ibp_version = version;
if (incarnation != 0)
- peer->ibp_incarnation = incarnation;
+ peer_ni->ibp_incarnation = incarnation;
out:
write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
CNETERR("%s: %s (%s), %x, %x, msg_size: %d, queue_depth: %d/%d, max_frags: %d/%d\n",
- libcfs_nid2str(peer->ibp_nid),
+ libcfs_nid2str(peer_ni->ibp_nid),
reconnect ? "reconnect" : "don't reconnect",
reason, IBLND_MSG_VERSION, version, msg_size,
conn->ibc_queue_depth, queue_dep,
conn->ibc_max_frags, frag_num);
/*
- * if conn::ibc_reconnect is TRUE, connd will reconnect to the peer
+ * if conn::ibc_reconnect is TRUE, connd will reconnect to the peer_ni
* while destroying the zombie
*/
}
static void
kiblnd_rejected (kib_conn_t *conn, int reason, void *priv, int priv_nob)
{
- kib_peer_t *peer = conn->ibc_peer;
+ kib_peer_ni_t *peer_ni = conn->ibc_peer;
LASSERT (!in_interrupt());
LASSERT (conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT);
case IB_CM_REJ_INVALID_SERVICE_ID:
CNETERR("%s rejected: no listener at %d\n",
- libcfs_nid2str(peer->ibp_nid),
+ libcfs_nid2str(peer_ni->ibp_nid),
*kiblnd_tunables.kib_service);
break;
* b) V2 will provide incarnation while rejecting me,
* -1 will be overwrote.
*
- * if I try to connect to a V1 peer with V2 protocol,
+ * if I try to connect to a V1 peer_ni with V2 protocol,
* it rejected me then upgrade to V2, I have no idea
* about the upgrading and try to reconnect with V1,
* in this case upgraded V2 can find out I'm trying to
if (rej->ibr_magic != IBLND_MSG_MAGIC &&
rej->ibr_magic != LNET_PROTO_MAGIC) {
CERROR("%s rejected: consumer defined fatal error\n",
- libcfs_nid2str(peer->ibp_nid));
+ libcfs_nid2str(peer_ni->ibp_nid));
break;
}
if (rej->ibr_version != IBLND_MSG_VERSION &&
rej->ibr_version != IBLND_MSG_VERSION_1) {
CERROR("%s rejected: o2iblnd version %x error\n",
- libcfs_nid2str(peer->ibp_nid),
+ libcfs_nid2str(peer_ni->ibp_nid),
rej->ibr_version);
break;
}
if (rej->ibr_why == IBLND_REJECT_FATAL &&
rej->ibr_version == IBLND_MSG_VERSION_1) {
- CDEBUG(D_NET, "rejected by old version peer %s: %x\n",
- libcfs_nid2str(peer->ibp_nid), rej->ibr_version);
+ CDEBUG(D_NET, "rejected by old version peer_ni %s: %x\n",
+ libcfs_nid2str(peer_ni->ibp_nid), rej->ibr_version);
if (conn->ibc_version != IBLND_MSG_VERSION_1)
rej->ibr_why = IBLND_REJECT_CONN_UNCOMPAT;
case IBLND_REJECT_NO_RESOURCES:
CERROR("%s rejected: o2iblnd no resources\n",
- libcfs_nid2str(peer->ibp_nid));
+ libcfs_nid2str(peer_ni->ibp_nid));
break;
case IBLND_REJECT_FATAL:
CERROR("%s rejected: o2iblnd fatal error\n",
- libcfs_nid2str(peer->ibp_nid));
+ libcfs_nid2str(peer_ni->ibp_nid));
break;
default:
CERROR("%s rejected: o2iblnd reason %d\n",
- libcfs_nid2str(peer->ibp_nid),
+ libcfs_nid2str(peer_ni->ibp_nid),
rej->ibr_why);
break;
}
/* fall through */
default:
CNETERR("%s rejected: reason %d, size %d\n",
- libcfs_nid2str(peer->ibp_nid), reason, priv_nob);
+ libcfs_nid2str(peer_ni->ibp_nid), reason, priv_nob);
break;
}
static void
kiblnd_check_connreply (kib_conn_t *conn, void *priv, int priv_nob)
{
- kib_peer_t *peer = conn->ibc_peer;
- lnet_ni_t *ni = peer->ibp_ni;
+ kib_peer_ni_t *peer_ni = conn->ibc_peer;
+ lnet_ni_t *ni = peer_ni->ibp_ni;
kib_net_t *net = ni->ni_data;
kib_msg_t *msg = priv;
int ver = conn->ibc_version;
if (rc != 0) {
CERROR("Can't unpack connack from %s: %d\n",
- libcfs_nid2str(peer->ibp_nid), rc);
+ libcfs_nid2str(peer_ni->ibp_nid), rc);
goto failed;
}
if (msg->ibm_type != IBLND_MSG_CONNACK) {
CERROR("Unexpected message %d from %s\n",
- msg->ibm_type, libcfs_nid2str(peer->ibp_nid));
+ msg->ibm_type, libcfs_nid2str(peer_ni->ibp_nid));
rc = -EPROTO;
goto failed;
}
if (ver != msg->ibm_version) {
CERROR("%s replied version %x is different with "
"requested version %x\n",
- libcfs_nid2str(peer->ibp_nid), msg->ibm_version, ver);
+ libcfs_nid2str(peer_ni->ibp_nid), msg->ibm_version, ver);
rc = -EPROTO;
goto failed;
}
if (msg->ibm_u.connparams.ibcp_queue_depth >
conn->ibc_queue_depth) {
CERROR("%s has incompatible queue depth %d (<=%d wanted)\n",
- libcfs_nid2str(peer->ibp_nid),
+ libcfs_nid2str(peer_ni->ibp_nid),
msg->ibm_u.connparams.ibcp_queue_depth,
conn->ibc_queue_depth);
rc = -EPROTO;
if (msg->ibm_u.connparams.ibcp_max_frags >
conn->ibc_max_frags) {
CERROR("%s has incompatible max_frags %d (<=%d wanted)\n",
- libcfs_nid2str(peer->ibp_nid),
+ libcfs_nid2str(peer_ni->ibp_nid),
msg->ibm_u.connparams.ibcp_max_frags,
conn->ibc_max_frags);
rc = -EPROTO;
if (msg->ibm_u.connparams.ibcp_max_msg_size > IBLND_MSG_SIZE) {
CERROR("%s max message size %d too big (%d max)\n",
- libcfs_nid2str(peer->ibp_nid),
+ libcfs_nid2str(peer_ni->ibp_nid),
msg->ibm_u.connparams.ibcp_max_msg_size,
IBLND_MSG_SIZE);
rc = -EPROTO;
if (rc != 0) {
CERROR("Bad connection reply from %s, rc = %d, "
"version: %x max_frags: %d\n",
- libcfs_nid2str(peer->ibp_nid), rc,
+ libcfs_nid2str(peer_ni->ibp_nid), rc,
msg->ibm_version, msg->ibm_u.connparams.ibcp_max_frags);
goto failed;
}
static int
kiblnd_active_connect (struct rdma_cm_id *cmid)
{
- kib_peer_t *peer = (kib_peer_t *)cmid->context;
+ kib_peer_ni_t *peer_ni = (kib_peer_ni_t *)cmid->context;
kib_conn_t *conn;
kib_msg_t *msg;
struct rdma_conn_param cp;
read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
- incarnation = peer->ibp_incarnation;
- version = (peer->ibp_version == 0) ? IBLND_MSG_VERSION :
- peer->ibp_version;
+ incarnation = peer_ni->ibp_incarnation;
+ version = (peer_ni->ibp_version == 0) ? IBLND_MSG_VERSION :
+ peer_ni->ibp_version;
read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
- conn = kiblnd_create_conn(peer, cmid, IBLND_CONN_ACTIVE_CONNECT,
+ conn = kiblnd_create_conn(peer_ni, cmid, IBLND_CONN_ACTIVE_CONNECT,
version);
if (conn == NULL) {
- kiblnd_peer_connect_failed(peer, 1, -ENOMEM);
- kiblnd_peer_decref(peer); /* lose cmid's ref */
+ kiblnd_peer_connect_failed(peer_ni, 1, -ENOMEM);
+ kiblnd_peer_decref(peer_ni); /* lose cmid's ref */
return -ENOMEM;
}
/* conn "owns" cmid now, so I return success from here on to ensure the
* CM callback doesn't destroy cmid. conn also takes over cmid's ref
- * on peer */
+ * on peer_ni */
msg = &conn->ibc_connvars->cv_msg;
msg->ibm_u.connparams.ibcp_max_frags = conn->ibc_max_frags;
msg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE;
- kiblnd_pack_msg(peer->ibp_ni, msg, version,
- 0, peer->ibp_nid, incarnation);
+ kiblnd_pack_msg(peer_ni->ibp_ni, msg, version,
+ 0, peer_ni->ibp_nid, incarnation);
memset(&cp, 0, sizeof(cp));
cp.private_data = msg;
rc = rdma_connect(cmid, &cp);
if (rc != 0) {
CERROR("Can't connect to %s: %d\n",
- libcfs_nid2str(peer->ibp_nid), rc);
+ libcfs_nid2str(peer_ni->ibp_nid), rc);
kiblnd_connreq_done(conn, rc);
kiblnd_conn_decref(conn);
}
int
kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event)
{
- kib_peer_t *peer;
+ kib_peer_ni_t *peer_ni;
kib_conn_t *conn;
int rc;
return rc;
case RDMA_CM_EVENT_ADDR_ERROR:
- peer = (kib_peer_t *)cmid->context;
+ peer_ni = (kib_peer_ni_t *)cmid->context;
CNETERR("%s: ADDR ERROR %d\n",
- libcfs_nid2str(peer->ibp_nid), event->status);
- kiblnd_peer_connect_failed(peer, 1, -EHOSTUNREACH);
- kiblnd_peer_decref(peer);
+ libcfs_nid2str(peer_ni->ibp_nid), event->status);
+ kiblnd_peer_connect_failed(peer_ni, 1, -EHOSTUNREACH);
+ kiblnd_peer_decref(peer_ni);
return -EHOSTUNREACH; /* rc != 0 destroys cmid */
case RDMA_CM_EVENT_ADDR_RESOLVED:
- peer = (kib_peer_t *)cmid->context;
+ peer_ni = (kib_peer_ni_t *)cmid->context;
CDEBUG(D_NET,"%s Addr resolved: %d\n",
- libcfs_nid2str(peer->ibp_nid), event->status);
+ libcfs_nid2str(peer_ni->ibp_nid), event->status);
if (event->status != 0) {
CNETERR("Can't resolve address for %s: %d\n",
- libcfs_nid2str(peer->ibp_nid), event->status);
+ libcfs_nid2str(peer_ni->ibp_nid), event->status);
rc = event->status;
} else {
rc = rdma_resolve_route(
return 0;
/* Can't initiate route resolution */
CERROR("Can't resolve route for %s: %d\n",
- libcfs_nid2str(peer->ibp_nid), rc);
+ libcfs_nid2str(peer_ni->ibp_nid), rc);
}
- kiblnd_peer_connect_failed(peer, 1, rc);
- kiblnd_peer_decref(peer);
+ kiblnd_peer_connect_failed(peer_ni, 1, rc);
+ kiblnd_peer_decref(peer_ni);
return rc; /* rc != 0 destroys cmid */
case RDMA_CM_EVENT_ROUTE_ERROR:
- peer = (kib_peer_t *)cmid->context;
+ peer_ni = (kib_peer_ni_t *)cmid->context;
CNETERR("%s: ROUTE ERROR %d\n",
- libcfs_nid2str(peer->ibp_nid), event->status);
- kiblnd_peer_connect_failed(peer, 1, -EHOSTUNREACH);
- kiblnd_peer_decref(peer);
+ libcfs_nid2str(peer_ni->ibp_nid), event->status);
+ kiblnd_peer_connect_failed(peer_ni, 1, -EHOSTUNREACH);
+ kiblnd_peer_decref(peer_ni);
return -EHOSTUNREACH; /* rc != 0 destroys cmid */
case RDMA_CM_EVENT_ROUTE_RESOLVED:
- peer = (kib_peer_t *)cmid->context;
+ peer_ni = (kib_peer_ni_t *)cmid->context;
CDEBUG(D_NET,"%s Route resolved: %d\n",
- libcfs_nid2str(peer->ibp_nid), event->status);
+ libcfs_nid2str(peer_ni->ibp_nid), event->status);
if (event->status == 0)
return kiblnd_active_connect(cmid);
CNETERR("Can't resolve route for %s: %d\n",
- libcfs_nid2str(peer->ibp_nid), event->status);
- kiblnd_peer_connect_failed(peer, 1, event->status);
- kiblnd_peer_decref(peer);
+ libcfs_nid2str(peer_ni->ibp_nid), event->status);
+ kiblnd_peer_connect_failed(peer_ni, 1, event->status);
+ kiblnd_peer_decref(peer_ni);
return event->status; /* rc != 0 destroys cmid */
case RDMA_CM_EVENT_UNREACHABLE:
struct list_head checksends = LIST_HEAD_INIT(checksends);
struct list_head *peers = &kiblnd_data.kib_peers[idx];
struct list_head *ptmp;
- kib_peer_t *peer;
+ kib_peer_ni_t *peer_ni;
kib_conn_t *conn;
struct list_head *ctmp;
unsigned long flags;
read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
list_for_each(ptmp, peers) {
- peer = list_entry(ptmp, kib_peer_t, ibp_list);
+ peer_ni = list_entry(ptmp, kib_peer_ni_t, ibp_list);
- list_for_each(ctmp, &peer->ibp_conns) {
+ list_for_each(ctmp, &peer_ni->ibp_conns) {
int timedout;
int sendnoop;
if (timedout) {
CERROR("Timed out RDMA with %s (%lu): "
"c: %u, oc: %u, rc: %u\n",
- libcfs_nid2str(peer->ibp_nid),
+ libcfs_nid2str(peer_ni->ibp_nid),
cfs_duration_sec(cfs_time_current() -
- peer->ibp_last_alive),
+ peer_ni->ibp_last_alive),
conn->ibc_credits,
conn->ibc_outstanding_credits,
conn->ibc_reserved_credits);
}
/*
- * High-water for reconnection to the same peer, reconnection attempt should
+ * High-water for reconnection to the same peer_ni, reconnection attempt should
* be delayed after trying more than KIB_RECONN_HIGH_RACE.
*/
#define KIB_RECONN_HIGH_RACE 10
dropped_lock = 0;
if (!list_empty(&kiblnd_data.kib_connd_zombies)) {
- kib_peer_t *peer = NULL;
+ kib_peer_ni_t *peer_ni = NULL;
conn = list_entry(kiblnd_data.kib_connd_zombies.next,
kib_conn_t, ibc_list);
list_del(&conn->ibc_list);
if (conn->ibc_reconnect) {
- peer = conn->ibc_peer;
- kiblnd_peer_addref(peer);
+ peer_ni = conn->ibc_peer;
+ kiblnd_peer_addref(peer_ni);
}
spin_unlock_irqrestore(lock, flags);
dropped_lock = 1;
- kiblnd_destroy_conn(conn, !peer);
+ kiblnd_destroy_conn(conn, !peer_ni);
spin_lock_irqsave(lock, flags);
- if (!peer)
+ if (!peer_ni)
continue;
- conn->ibc_peer = peer;
- if (peer->ibp_reconnected < KIB_RECONN_HIGH_RACE)
+ conn->ibc_peer = peer_ni;
+ if (peer_ni->ibp_reconnected < KIB_RECONN_HIGH_RACE)
list_add_tail(&conn->ibc_list,
&kiblnd_data.kib_reconn_list);
else
/* Time to check for RDMA timeouts on a few more
* peers: I do checks every 'p' seconds on a
- * proportion of the peer table and I need to check
+ * proportion of the peer_ni table and I need to check
* every connection 'n' times within a timeout
* interval, to ensure I detect a timeout on any
* connection within (n+1)/n times the timeout
if (version == IBLND_MSG_VERSION_1)
return IBLND_MSG_QUEUE_SIZE_V1;
else if (ni)
- return ni->ni_peertxcredits;
+ return ni->ni_net->net_tunables.lct_peer_tx_credits;
else
return peer_credits;
}
kiblnd_tunables_setup(lnet_ni_t *ni)
{
struct lnet_ioctl_config_o2iblnd_tunables *tunables;
+ struct lnet_ioctl_config_lnd_cmn_tunables *net_tunables;
/*
* if there was no tunables specified, setup the tunables to be
* defaulted
*/
- if (!ni->ni_lnd_tunables) {
- LIBCFS_ALLOC(ni->ni_lnd_tunables,
- sizeof(*ni->ni_lnd_tunables));
- if (!ni->ni_lnd_tunables)
- return -ENOMEM;
-
- memcpy(&ni->ni_lnd_tunables->lt_tun_u.lt_o2ib,
+ if (!ni->ni_lnd_tunables_set)
+ memcpy(&ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib,
&default_tunables, sizeof(*tunables));
- }
- tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+
+ tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib;
/* Current API version */
tunables->lnd_version = 0;
return -EINVAL;
}
- if (!ni->ni_peertimeout)
- ni->ni_peertimeout = peer_timeout;
+ net_tunables = &ni->ni_net->net_tunables;
- if (!ni->ni_maxtxcredits)
- ni->ni_maxtxcredits = credits;
+ if (net_tunables->lct_peer_timeout == -1)
+ net_tunables->lct_peer_timeout = peer_timeout;
- if (!ni->ni_peertxcredits)
- ni->ni_peertxcredits = peer_credits;
+ if (net_tunables->lct_max_tx_credits == -1)
+ net_tunables->lct_max_tx_credits = credits;
- if (!ni->ni_peerrtrcredits)
- ni->ni_peerrtrcredits = peer_buffer_credits;
+ if (net_tunables->lct_peer_tx_credits == -1)
+ net_tunables->lct_peer_tx_credits = peer_credits;
- if (ni->ni_peertxcredits < IBLND_CREDITS_DEFAULT)
- ni->ni_peertxcredits = IBLND_CREDITS_DEFAULT;
+ if (net_tunables->lct_peer_rtr_credits == -1)
+ net_tunables->lct_peer_rtr_credits = peer_buffer_credits;
- if (ni->ni_peertxcredits > IBLND_CREDITS_MAX)
- ni->ni_peertxcredits = IBLND_CREDITS_MAX;
+ if (net_tunables->lct_peer_tx_credits < IBLND_CREDITS_DEFAULT)
+ net_tunables->lct_peer_tx_credits = IBLND_CREDITS_DEFAULT;
- if (ni->ni_peertxcredits > credits)
- ni->ni_peertxcredits = credits;
+ if (net_tunables->lct_peer_tx_credits > IBLND_CREDITS_MAX)
+ net_tunables->lct_peer_tx_credits = IBLND_CREDITS_MAX;
+
+ if (net_tunables->lct_peer_tx_credits >
+ net_tunables->lct_max_tx_credits)
+ net_tunables->lct_peer_tx_credits =
+ net_tunables->lct_max_tx_credits;
if (!tunables->lnd_peercredits_hiw)
tunables->lnd_peercredits_hiw = peer_credits_hiw;
- if (tunables->lnd_peercredits_hiw < ni->ni_peertxcredits / 2)
- tunables->lnd_peercredits_hiw = ni->ni_peertxcredits / 2;
+ if (tunables->lnd_peercredits_hiw < net_tunables->lct_peer_tx_credits / 2)
+ tunables->lnd_peercredits_hiw = net_tunables->lct_peer_tx_credits / 2;
- if (tunables->lnd_peercredits_hiw >= ni->ni_peertxcredits)
- tunables->lnd_peercredits_hiw = ni->ni_peertxcredits - 1;
+ if (tunables->lnd_peercredits_hiw >= net_tunables->lct_peer_tx_credits)
+ tunables->lnd_peercredits_hiw = net_tunables->lct_peer_tx_credits - 1;
if (tunables->lnd_map_on_demand < 0 ||
tunables->lnd_map_on_demand > IBLND_MAX_RDMA_FRAGS) {
if (tunables->lnd_map_on_demand > 0 &&
tunables->lnd_map_on_demand <= IBLND_MAX_RDMA_FRAGS / 8) {
tunables->lnd_concurrent_sends =
- ni->ni_peertxcredits * 2;
+ net_tunables->lct_peer_tx_credits * 2;
} else {
- tunables->lnd_concurrent_sends = ni->ni_peertxcredits;
+ tunables->lnd_concurrent_sends =
+ net_tunables->lct_peer_tx_credits;
}
}
- if (tunables->lnd_concurrent_sends > ni->ni_peertxcredits * 2)
- tunables->lnd_concurrent_sends = ni->ni_peertxcredits * 2;
+ if (tunables->lnd_concurrent_sends > net_tunables->lct_peer_tx_credits * 2)
+ tunables->lnd_concurrent_sends = net_tunables->lct_peer_tx_credits * 2;
- if (tunables->lnd_concurrent_sends < ni->ni_peertxcredits / 2)
- tunables->lnd_concurrent_sends = ni->ni_peertxcredits / 2;
+ if (tunables->lnd_concurrent_sends < net_tunables->lct_peer_tx_credits / 2)
+ tunables->lnd_concurrent_sends = net_tunables->lct_peer_tx_credits / 2;
- if (tunables->lnd_concurrent_sends < ni->ni_peertxcredits) {
+ if (tunables->lnd_concurrent_sends < net_tunables->lct_peer_tx_credits) {
CWARN("Concurrent sends %d is lower than message "
"queue size: %d, performance may drop slightly.\n",
- tunables->lnd_concurrent_sends, ni->ni_peertxcredits);
+ tunables->lnd_concurrent_sends,
+ net_tunables->lct_peer_tx_credits);
}
if (!tunables->lnd_fmr_pool_size)
* Author: Eric Barton <eric@bartonsoftware.com>
*/
+#include <linux/pci.h>
#include "socklnd.h"
static lnd_t the_ksocklnd;
}
static int
-ksocknal_create_peer(ksock_peer_t **peerp, lnet_ni_t *ni, lnet_process_id_t id)
+ksocknal_create_peer(ksock_peer_ni_t **peerp, lnet_ni_t *ni, lnet_process_id_t id)
{
- int cpt = lnet_cpt_of_nid(id.nid);
+ int cpt = lnet_cpt_of_nid(id.nid, ni);
ksock_net_t *net = ni->ni_data;
- ksock_peer_t *peer;
+ ksock_peer_ni_t *peer_ni;
LASSERT(id.nid != LNET_NID_ANY);
LASSERT(id.pid != LNET_PID_ANY);
LASSERT(!in_interrupt());
- LIBCFS_CPT_ALLOC(peer, lnet_cpt_table(), cpt, sizeof(*peer));
- if (peer == NULL)
+ LIBCFS_CPT_ALLOC(peer_ni, lnet_cpt_table(), cpt, sizeof(*peer_ni));
+ if (peer_ni == NULL)
return -ENOMEM;
- peer->ksnp_ni = ni;
- peer->ksnp_id = id;
- atomic_set(&peer->ksnp_refcount, 1); /* 1 ref for caller */
- peer->ksnp_closing = 0;
- peer->ksnp_accepting = 0;
- peer->ksnp_proto = NULL;
- peer->ksnp_last_alive = 0;
- peer->ksnp_zc_next_cookie = SOCKNAL_KEEPALIVE_PING + 1;
-
- INIT_LIST_HEAD(&peer->ksnp_conns);
- INIT_LIST_HEAD(&peer->ksnp_routes);
- INIT_LIST_HEAD(&peer->ksnp_tx_queue);
- INIT_LIST_HEAD(&peer->ksnp_zc_req_list);
- spin_lock_init(&peer->ksnp_lock);
+ peer_ni->ksnp_ni = ni;
+ peer_ni->ksnp_id = id;
+ atomic_set(&peer_ni->ksnp_refcount, 1); /* 1 ref for caller */
+ peer_ni->ksnp_closing = 0;
+ peer_ni->ksnp_accepting = 0;
+ peer_ni->ksnp_proto = NULL;
+ peer_ni->ksnp_last_alive = 0;
+ peer_ni->ksnp_zc_next_cookie = SOCKNAL_KEEPALIVE_PING + 1;
+
+ INIT_LIST_HEAD(&peer_ni->ksnp_conns);
+ INIT_LIST_HEAD(&peer_ni->ksnp_routes);
+ INIT_LIST_HEAD(&peer_ni->ksnp_tx_queue);
+ INIT_LIST_HEAD(&peer_ni->ksnp_zc_req_list);
+ spin_lock_init(&peer_ni->ksnp_lock);
spin_lock_bh(&net->ksnn_lock);
if (net->ksnn_shutdown) {
spin_unlock_bh(&net->ksnn_lock);
- LIBCFS_FREE(peer, sizeof(*peer));
- CERROR("Can't create peer: network shutdown\n");
+ LIBCFS_FREE(peer_ni, sizeof(*peer_ni));
+ CERROR("Can't create peer_ni: network shutdown\n");
return -ESHUTDOWN;
}
spin_unlock_bh(&net->ksnn_lock);
- *peerp = peer;
+ *peerp = peer_ni;
return 0;
}
void
-ksocknal_destroy_peer (ksock_peer_t *peer)
+ksocknal_destroy_peer (ksock_peer_ni_t *peer_ni)
{
- ksock_net_t *net = peer->ksnp_ni->ni_data;
+ ksock_net_t *net = peer_ni->ksnp_ni->ni_data;
- CDEBUG (D_NET, "peer %s %p deleted\n",
- libcfs_id2str(peer->ksnp_id), peer);
+ CDEBUG (D_NET, "peer_ni %s %p deleted\n",
+ libcfs_id2str(peer_ni->ksnp_id), peer_ni);
- LASSERT(atomic_read(&peer->ksnp_refcount) == 0);
- LASSERT(peer->ksnp_accepting == 0);
- LASSERT(list_empty(&peer->ksnp_conns));
- LASSERT(list_empty(&peer->ksnp_routes));
- LASSERT(list_empty(&peer->ksnp_tx_queue));
- LASSERT(list_empty(&peer->ksnp_zc_req_list));
+ LASSERT(atomic_read(&peer_ni->ksnp_refcount) == 0);
+ LASSERT(peer_ni->ksnp_accepting == 0);
+ LASSERT(list_empty(&peer_ni->ksnp_conns));
+ LASSERT(list_empty(&peer_ni->ksnp_routes));
+ LASSERT(list_empty(&peer_ni->ksnp_tx_queue));
+ LASSERT(list_empty(&peer_ni->ksnp_zc_req_list));
- LIBCFS_FREE(peer, sizeof(*peer));
+ LIBCFS_FREE(peer_ni, sizeof(*peer_ni));
- /* NB a peer's connections and routes keep a reference on their peer
+ /* NB a peer_ni's connections and routes keep a reference on their peer_ni
* until they are destroyed, so we can be assured that _all_ state to
- * do with this peer has been cleaned up when its refcount drops to
+ * do with this peer_ni has been cleaned up when its refcount drops to
* zero. */
spin_lock_bh(&net->ksnn_lock);
net->ksnn_npeers--;
spin_unlock_bh(&net->ksnn_lock);
}
-ksock_peer_t *
+ksock_peer_ni_t *
ksocknal_find_peer_locked (lnet_ni_t *ni, lnet_process_id_t id)
{
struct list_head *peer_list = ksocknal_nid2peerlist(id.nid);
struct list_head *tmp;
- ksock_peer_t *peer;
+ ksock_peer_ni_t *peer_ni;
list_for_each(tmp, peer_list) {
- peer = list_entry(tmp, ksock_peer_t, ksnp_list);
+ peer_ni = list_entry(tmp, ksock_peer_ni_t, ksnp_list);
- LASSERT(!peer->ksnp_closing);
+ LASSERT(!peer_ni->ksnp_closing);
- if (peer->ksnp_ni != ni)
+ if (peer_ni->ksnp_ni != ni)
continue;
- if (peer->ksnp_id.nid != id.nid ||
- peer->ksnp_id.pid != id.pid)
+ if (peer_ni->ksnp_id.nid != id.nid ||
+ peer_ni->ksnp_id.pid != id.pid)
continue;
- CDEBUG(D_NET, "got peer [%p] -> %s (%d)\n",
- peer, libcfs_id2str(id),
- atomic_read(&peer->ksnp_refcount));
- return peer;
+ CDEBUG(D_NET, "got peer_ni [%p] -> %s (%d)\n",
+ peer_ni, libcfs_id2str(id),
+ atomic_read(&peer_ni->ksnp_refcount));
+ return peer_ni;
}
return NULL;
}
-ksock_peer_t *
+ksock_peer_ni_t *
ksocknal_find_peer (lnet_ni_t *ni, lnet_process_id_t id)
{
- ksock_peer_t *peer;
+ ksock_peer_ni_t *peer_ni;
read_lock(&ksocknal_data.ksnd_global_lock);
- peer = ksocknal_find_peer_locked(ni, id);
- if (peer != NULL) /* +1 ref for caller? */
- ksocknal_peer_addref(peer);
+ peer_ni = ksocknal_find_peer_locked(ni, id);
+ if (peer_ni != NULL) /* +1 ref for caller? */
+ ksocknal_peer_addref(peer_ni);
read_unlock(&ksocknal_data.ksnd_global_lock);
- return (peer);
+ return (peer_ni);
}
static void
-ksocknal_unlink_peer_locked (ksock_peer_t *peer)
+ksocknal_unlink_peer_locked (ksock_peer_ni_t *peer_ni)
{
int i;
__u32 ip;
ksock_interface_t *iface;
- for (i = 0; i < peer->ksnp_n_passive_ips; i++) {
+ for (i = 0; i < peer_ni->ksnp_n_passive_ips; i++) {
LASSERT (i < LNET_MAX_INTERFACES);
- ip = peer->ksnp_passive_ips[i];
+ ip = peer_ni->ksnp_passive_ips[i];
- iface = ksocknal_ip2iface(peer->ksnp_ni, ip);
- /* All IPs in peer->ksnp_passive_ips[] come from the
+ iface = ksocknal_ip2iface(peer_ni->ksnp_ni, ip);
+ /* All IPs in peer_ni->ksnp_passive_ips[] come from the
* interface list, therefore the call must succeed. */
LASSERT (iface != NULL);
- CDEBUG(D_NET, "peer=%p iface=%p ksni_nroutes=%d\n",
- peer, iface, iface->ksni_nroutes);
+ CDEBUG(D_NET, "peer_ni=%p iface=%p ksni_nroutes=%d\n",
+ peer_ni, iface, iface->ksni_nroutes);
iface->ksni_npeers--;
}
- LASSERT(list_empty(&peer->ksnp_conns));
- LASSERT(list_empty(&peer->ksnp_routes));
- LASSERT(!peer->ksnp_closing);
- peer->ksnp_closing = 1;
- list_del(&peer->ksnp_list);
+ LASSERT(list_empty(&peer_ni->ksnp_conns));
+ LASSERT(list_empty(&peer_ni->ksnp_routes));
+ LASSERT(!peer_ni->ksnp_closing);
+ peer_ni->ksnp_closing = 1;
+ list_del(&peer_ni->ksnp_list);
/* lose peerlist's ref */
- ksocknal_peer_decref(peer);
+ ksocknal_peer_decref(peer_ni);
}
static int
lnet_process_id_t *id, __u32 *myip, __u32 *peer_ip,
int *port, int *conn_count, int *share_count)
{
- ksock_peer_t *peer;
+ ksock_peer_ni_t *peer_ni;
struct list_head *ptmp;
ksock_route_t *route;
struct list_head *rtmp;
for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) {
- peer = list_entry(ptmp, ksock_peer_t, ksnp_list);
+ peer_ni = list_entry(ptmp, ksock_peer_ni_t, ksnp_list);
- if (peer->ksnp_ni != ni)
+ if (peer_ni->ksnp_ni != ni)
continue;
- if (peer->ksnp_n_passive_ips == 0 &&
- list_empty(&peer->ksnp_routes)) {
+ if (peer_ni->ksnp_n_passive_ips == 0 &&
+ list_empty(&peer_ni->ksnp_routes)) {
if (index-- > 0)
continue;
- *id = peer->ksnp_id;
+ *id = peer_ni->ksnp_id;
*myip = 0;
*peer_ip = 0;
*port = 0;
goto out;
}
- for (j = 0; j < peer->ksnp_n_passive_ips; j++) {
+ for (j = 0; j < peer_ni->ksnp_n_passive_ips; j++) {
if (index-- > 0)
continue;
- *id = peer->ksnp_id;
- *myip = peer->ksnp_passive_ips[j];
+ *id = peer_ni->ksnp_id;
+ *myip = peer_ni->ksnp_passive_ips[j];
*peer_ip = 0;
*port = 0;
*conn_count = 0;
goto out;
}
- list_for_each(rtmp, &peer->ksnp_routes) {
+ list_for_each(rtmp, &peer_ni->ksnp_routes) {
if (index-- > 0)
continue;
route = list_entry(rtmp, ksock_route_t,
ksnr_list);
- *id = peer->ksnp_id;
+ *id = peer_ni->ksnp_id;
*myip = route->ksnr_myipaddr;
*peer_ip = route->ksnr_ipaddr;
*port = route->ksnr_port;
static void
ksocknal_associate_route_conn_locked(ksock_route_t *route, ksock_conn_t *conn)
{
- ksock_peer_t *peer = route->ksnr_peer;
+ ksock_peer_ni_t *peer_ni = route->ksnr_peer;
int type = conn->ksnc_type;
ksock_interface_t *iface;
if (route->ksnr_myipaddr == 0) {
/* route wasn't bound locally yet (the initial route) */
CDEBUG(D_NET, "Binding %s %pI4h to %pI4h\n",
- libcfs_id2str(peer->ksnp_id),
+ libcfs_id2str(peer_ni->ksnp_id),
&route->ksnr_ipaddr,
&conn->ksnc_myipaddr);
} else {
CDEBUG(D_NET, "Rebinding %s %pI4h from %pI4h "
- "to %pI4h\n", libcfs_id2str(peer->ksnp_id),
+ "to %pI4h\n", libcfs_id2str(peer_ni->ksnp_id),
&route->ksnr_ipaddr,
&route->ksnr_myipaddr,
&conn->ksnc_myipaddr);
}
static void
-ksocknal_add_route_locked (ksock_peer_t *peer, ksock_route_t *route)
+ksocknal_add_route_locked (ksock_peer_ni_t *peer_ni, ksock_route_t *route)
{
struct list_head *tmp;
ksock_conn_t *conn;
ksock_route_t *route2;
- LASSERT(!peer->ksnp_closing);
+ LASSERT(!peer_ni->ksnp_closing);
LASSERT(route->ksnr_peer == NULL);
LASSERT(!route->ksnr_scheduled);
LASSERT(!route->ksnr_connecting);
LASSERT(route->ksnr_connected == 0);
/* LASSERT(unique) */
- list_for_each(tmp, &peer->ksnp_routes) {
+ list_for_each(tmp, &peer_ni->ksnp_routes) {
route2 = list_entry(tmp, ksock_route_t, ksnr_list);
if (route2->ksnr_ipaddr == route->ksnr_ipaddr) {
CERROR("Duplicate route %s %pI4h\n",
- libcfs_id2str(peer->ksnp_id),
+ libcfs_id2str(peer_ni->ksnp_id),
&route->ksnr_ipaddr);
LBUG();
}
}
- route->ksnr_peer = peer;
- ksocknal_peer_addref(peer);
- /* peer's routelist takes over my ref on 'route' */
- list_add_tail(&route->ksnr_list, &peer->ksnp_routes);
+ route->ksnr_peer = peer_ni;
+ ksocknal_peer_addref(peer_ni);
+ /* peer_ni's routelist takes over my ref on 'route' */
+ list_add_tail(&route->ksnr_list, &peer_ni->ksnp_routes);
- list_for_each(tmp, &peer->ksnp_conns) {
+ list_for_each(tmp, &peer_ni->ksnp_conns) {
conn = list_entry(tmp, ksock_conn_t, ksnc_list);
if (conn->ksnc_ipaddr != route->ksnr_ipaddr)
static void
ksocknal_del_route_locked (ksock_route_t *route)
{
- ksock_peer_t *peer = route->ksnr_peer;
+ ksock_peer_ni_t *peer_ni = route->ksnr_peer;
ksock_interface_t *iface;
ksock_conn_t *conn;
struct list_head *ctmp;
LASSERT(!route->ksnr_deleted);
/* Close associated conns */
- list_for_each_safe(ctmp, cnxt, &peer->ksnp_conns) {
+ list_for_each_safe(ctmp, cnxt, &peer_ni->ksnp_conns) {
conn = list_entry(ctmp, ksock_conn_t, ksnc_list);
if (conn->ksnc_route != route)
route->ksnr_deleted = 1;
list_del(&route->ksnr_list);
- ksocknal_route_decref(route); /* drop peer's ref */
+ ksocknal_route_decref(route); /* drop peer_ni's ref */
- if (list_empty(&peer->ksnp_routes) &&
- list_empty(&peer->ksnp_conns)) {
- /* I've just removed the last route to a peer with no active
+ if (list_empty(&peer_ni->ksnp_routes) &&
+ list_empty(&peer_ni->ksnp_conns)) {
+ /* I've just removed the last route to a peer_ni with no active
* connections */
- ksocknal_unlink_peer_locked(peer);
+ ksocknal_unlink_peer_locked(peer_ni);
}
}
ksocknal_add_peer(lnet_ni_t *ni, lnet_process_id_t id, __u32 ipaddr, int port)
{
struct list_head *tmp;
- ksock_peer_t *peer;
- ksock_peer_t *peer2;
+ ksock_peer_ni_t *peer_ni;
+ ksock_peer_ni_t *peer2;
ksock_route_t *route;
ksock_route_t *route2;
int rc;
id.pid == LNET_PID_ANY)
return (-EINVAL);
- /* Have a brand new peer ready... */
- rc = ksocknal_create_peer(&peer, ni, id);
+ /* Have a brand new peer_ni ready... */
+ rc = ksocknal_create_peer(&peer_ni, ni, id);
if (rc != 0)
return rc;
route = ksocknal_create_route (ipaddr, port);
if (route == NULL) {
- ksocknal_peer_decref(peer);
+ ksocknal_peer_decref(peer_ni);
return (-ENOMEM);
}
peer2 = ksocknal_find_peer_locked(ni, id);
if (peer2 != NULL) {
- ksocknal_peer_decref(peer);
- peer = peer2;
+ ksocknal_peer_decref(peer_ni);
+ peer_ni = peer2;
} else {
- /* peer table takes my ref on peer */
- list_add_tail(&peer->ksnp_list,
+ /* peer_ni table takes my ref on peer_ni */
+ list_add_tail(&peer_ni->ksnp_list,
ksocknal_nid2peerlist(id.nid));
}
route2 = NULL;
- list_for_each(tmp, &peer->ksnp_routes) {
+ list_for_each(tmp, &peer_ni->ksnp_routes) {
route2 = list_entry(tmp, ksock_route_t, ksnr_list);
if (route2->ksnr_ipaddr == ipaddr)
route2 = NULL;
}
if (route2 == NULL) {
- ksocknal_add_route_locked(peer, route);
+ ksocknal_add_route_locked(peer_ni, route);
route->ksnr_share_count++;
} else {
ksocknal_route_decref(route);
}
static void
-ksocknal_del_peer_locked (ksock_peer_t *peer, __u32 ip)
+ksocknal_del_peer_locked (ksock_peer_ni_t *peer_ni, __u32 ip)
{
ksock_conn_t *conn;
ksock_route_t *route;
struct list_head *nxt;
int nshared;
- LASSERT(!peer->ksnp_closing);
+ LASSERT(!peer_ni->ksnp_closing);
- /* Extra ref prevents peer disappearing until I'm done with it */
- ksocknal_peer_addref(peer);
+ /* Extra ref prevents peer_ni disappearing until I'm done with it */
+ ksocknal_peer_addref(peer_ni);
- list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
+ list_for_each_safe(tmp, nxt, &peer_ni->ksnp_routes) {
route = list_entry(tmp, ksock_route_t, ksnr_list);
/* no match */
}
nshared = 0;
- list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
+ list_for_each_safe(tmp, nxt, &peer_ni->ksnp_routes) {
route = list_entry(tmp, ksock_route_t, ksnr_list);
nshared += route->ksnr_share_count;
}
/* remove everything else if there are no explicit entries
* left */
- list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
+ list_for_each_safe(tmp, nxt, &peer_ni->ksnp_routes) {
route = list_entry(tmp, ksock_route_t, ksnr_list);
/* we should only be removing auto-entries */
ksocknal_del_route_locked(route);
}
- list_for_each_safe(tmp, nxt, &peer->ksnp_conns) {
+ list_for_each_safe(tmp, nxt, &peer_ni->ksnp_conns) {
conn = list_entry(tmp, ksock_conn_t, ksnc_list);
ksocknal_close_conn_locked(conn, 0);
}
}
- ksocknal_peer_decref(peer);
- /* NB peer unlinks itself when last conn/route is removed */
+ ksocknal_peer_decref(peer_ni);
+ /* NB peer_ni unlinks itself when last conn/route is removed */
}
static int
struct list_head zombies = LIST_HEAD_INIT(zombies);
struct list_head *ptmp;
struct list_head *pnxt;
- ksock_peer_t *peer;
+ ksock_peer_ni_t *peer_ni;
int lo;
int hi;
int i;
for (i = lo; i <= hi; i++) {
list_for_each_safe(ptmp, pnxt,
&ksocknal_data.ksnd_peers[i]) {
- peer = list_entry(ptmp, ksock_peer_t, ksnp_list);
+ peer_ni = list_entry(ptmp, ksock_peer_ni_t, ksnp_list);
- if (peer->ksnp_ni != ni)
+ if (peer_ni->ksnp_ni != ni)
continue;
if (!((id.nid == LNET_NID_ANY ||
- peer->ksnp_id.nid == id.nid) &&
+ peer_ni->ksnp_id.nid == id.nid) &&
(id.pid == LNET_PID_ANY ||
- peer->ksnp_id.pid == id.pid)))
+ peer_ni->ksnp_id.pid == id.pid)))
continue;
- ksocknal_peer_addref(peer); /* a ref for me... */
+ ksocknal_peer_addref(peer_ni); /* a ref for me... */
- ksocknal_del_peer_locked(peer, ip);
+ ksocknal_del_peer_locked(peer_ni, ip);
- if (peer->ksnp_closing &&
- !list_empty(&peer->ksnp_tx_queue)) {
- LASSERT(list_empty(&peer->ksnp_conns));
- LASSERT(list_empty(&peer->ksnp_routes));
+ if (peer_ni->ksnp_closing &&
+ !list_empty(&peer_ni->ksnp_tx_queue)) {
+ LASSERT(list_empty(&peer_ni->ksnp_conns));
+ LASSERT(list_empty(&peer_ni->ksnp_routes));
- list_splice_init(&peer->ksnp_tx_queue,
+ list_splice_init(&peer_ni->ksnp_tx_queue,
&zombies);
}
- ksocknal_peer_decref(peer); /* ...till here */
+ ksocknal_peer_decref(peer_ni); /* ...till here */
rc = 0; /* matched! */
}
static ksock_conn_t *
ksocknal_get_conn_by_idx (lnet_ni_t *ni, int index)
{
- ksock_peer_t *peer;
+ ksock_peer_ni_t *peer_ni;
struct list_head *ptmp;
ksock_conn_t *conn;
struct list_head *ctmp;
for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) {
- peer = list_entry(ptmp, ksock_peer_t, ksnp_list);
+ peer_ni = list_entry(ptmp, ksock_peer_ni_t, ksnp_list);
- LASSERT(!peer->ksnp_closing);
+ LASSERT(!peer_ni->ksnp_closing);
- if (peer->ksnp_ni != ni)
+ if (peer_ni->ksnp_ni != ni)
continue;
- list_for_each(ctmp, &peer->ksnp_conns) {
+ list_for_each(ctmp, &peer_ni->ksnp_conns) {
if (index-- > 0)
continue;
}
static int
-ksocknal_select_ips(ksock_peer_t *peer, __u32 *peerips, int n_peerips)
+ksocknal_select_ips(ksock_peer_ni_t *peer_ni, __u32 *peerips, int n_peerips)
{
rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
- ksock_net_t *net = peer->ksnp_ni->ni_data;
+ ksock_net_t *net = peer_ni->ksnp_ni->ni_data;
ksock_interface_t *iface;
ksock_interface_t *best_iface;
int n_ips;
n_ips = (net->ksnn_ninterfaces < 2) ? 0 :
MIN(n_peerips, net->ksnn_ninterfaces);
- for (i = 0; peer->ksnp_n_passive_ips < n_ips; i++) {
+ for (i = 0; peer_ni->ksnp_n_passive_ips < n_ips; i++) {
/* ^ yes really... */
/* If we have any new interfaces, first tick off all the
- * peer IPs that match old interfaces, then choose new
- * interfaces to match the remaining peer IPS.
+ * peer_ni IPs that match old interfaces, then choose new
+ * interfaces to match the remaining peer_ni IPS.
* We don't forget interfaces we've stopped using; we might
* start using them again... */
- if (i < peer->ksnp_n_passive_ips) {
+ if (i < peer_ni->ksnp_n_passive_ips) {
/* Old interface. */
- ip = peer->ksnp_passive_ips[i];
- best_iface = ksocknal_ip2iface(peer->ksnp_ni, ip);
+ ip = peer_ni->ksnp_passive_ips[i];
+ best_iface = ksocknal_ip2iface(peer_ni->ksnp_ni, ip);
- /* peer passive ips are kept up to date */
+ /* peer_ni passive ips are kept up to date */
LASSERT(best_iface != NULL);
} else {
/* choose a new interface */
- LASSERT (i == peer->ksnp_n_passive_ips);
+ LASSERT (i == peer_ni->ksnp_n_passive_ips);
best_iface = NULL;
best_netmatch = 0;
iface = &net->ksnn_interfaces[j];
ip = iface->ksni_ipaddr;
- for (k = 0; k < peer->ksnp_n_passive_ips; k++)
- if (peer->ksnp_passive_ips[k] == ip)
+ for (k = 0; k < peer_ni->ksnp_n_passive_ips; k++)
+ if (peer_ni->ksnp_passive_ips[k] == ip)
break;
- if (k < peer->ksnp_n_passive_ips) /* using it already */
+ if (k < peer_ni->ksnp_n_passive_ips) /* using it already */
continue;
k = ksocknal_match_peerip(iface, peerips, n_peerips);
best_iface->ksni_npeers++;
ip = best_iface->ksni_ipaddr;
- peer->ksnp_passive_ips[i] = ip;
- peer->ksnp_n_passive_ips = i+1;
+ peer_ni->ksnp_passive_ips[i] = ip;
+ peer_ni->ksnp_n_passive_ips = i+1;
}
- /* mark the best matching peer IP used */
+ /* mark the best matching peer_ni IP used */
j = ksocknal_match_peerip(best_iface, peerips, n_peerips);
peerips[j] = 0;
}
- /* Overwrite input peer IP addresses */
- memcpy(peerips, peer->ksnp_passive_ips, n_ips * sizeof(*peerips));
+ /* Overwrite input peer_ni IP addresses */
+ memcpy(peerips, peer_ni->ksnp_passive_ips, n_ips * sizeof(*peerips));
write_unlock_bh(global_lock);
}
static void
-ksocknal_create_routes(ksock_peer_t *peer, int port,
+ksocknal_create_routes(ksock_peer_ni_t *peer_ni, int port,
__u32 *peer_ipaddrs, int npeer_ipaddrs)
{
ksock_route_t *newroute = NULL;
rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
- lnet_ni_t *ni = peer->ksnp_ni;
+ lnet_ni_t *ni = peer_ni->ksnp_ni;
ksock_net_t *net = ni->ni_data;
struct list_head *rtmp;
ksock_route_t *route;
write_lock_bh(global_lock);
}
- if (peer->ksnp_closing) {
- /* peer got closed under me */
+ if (peer_ni->ksnp_closing) {
+ /* peer_ni got closed under me */
break;
}
/* Already got a route? */
route = NULL;
- list_for_each(rtmp, &peer->ksnp_routes) {
+ list_for_each(rtmp, &peer_ni->ksnp_routes) {
route = list_entry(rtmp, ksock_route_t, ksnr_list);
if (route->ksnr_ipaddr == newroute->ksnr_ipaddr)
iface = &net->ksnn_interfaces[j];
/* Using this interface already? */
- list_for_each(rtmp, &peer->ksnp_routes) {
+ list_for_each(rtmp, &peer_ni->ksnp_routes) {
route = list_entry(rtmp, ksock_route_t,
ksnr_list);
newroute->ksnr_myipaddr = best_iface->ksni_ipaddr;
best_iface->ksni_nroutes++;
- ksocknal_add_route_locked(peer, newroute);
+ ksocknal_add_route_locked(peer_ni, newroute);
newroute = NULL;
}
}
static int
-ksocknal_connecting (ksock_peer_t *peer, __u32 ipaddr)
+ksocknal_connecting (ksock_peer_ni_t *peer_ni, __u32 ipaddr)
{
ksock_route_t *route;
- list_for_each_entry(route, &peer->ksnp_routes, ksnr_list) {
+ list_for_each_entry(route, &peer_ni->ksnp_routes, ksnr_list) {
if (route->ksnr_ipaddr == ipaddr)
return route->ksnr_connecting;
}
__u64 incarnation;
ksock_conn_t *conn;
ksock_conn_t *conn2;
- ksock_peer_t *peer = NULL;
- ksock_peer_t *peer2;
+ ksock_peer_ni_t *peer_ni = NULL;
+ ksock_peer_ni_t *peer2;
ksock_sched_t *sched;
struct ksock_hello_msg *hello;
int cpt;
if (rc != 0)
goto failed_1;
- /* Find out/confirm peer's NID and connection type and get the
+ /* Find out/confirm peer_ni's NID and connection type and get the
* vector of interfaces she's willing to let me connect to.
- * Passive connections use the listener timeout since the peer sends
+ * Passive connections use the listener timeout since the peer_ni sends
* eagerly */
if (active) {
- peer = route->ksnr_peer;
- LASSERT(ni == peer->ksnp_ni);
+ peer_ni = route->ksnr_peer;
+ LASSERT(ni == peer_ni->ksnp_ni);
/* Active connection sends HELLO eagerly */
hello->kshm_nips = ksocknal_local_ipvec(ni, hello->kshm_ips);
- peerid = peer->ksnp_id;
+ peerid = peer_ni->ksnp_id;
write_lock_bh(global_lock);
- conn->ksnc_proto = peer->ksnp_proto;
+ conn->ksnc_proto = peer_ni->ksnp_proto;
write_unlock_bh(global_lock);
if (conn->ksnc_proto == NULL) {
peerid.nid = LNET_NID_ANY;
peerid.pid = LNET_PID_ANY;
- /* Passive, get protocol from peer */
+ /* Passive, get protocol from peer_ni */
conn->ksnc_proto = NULL;
}
LASSERT (conn->ksnc_proto != NULL);
LASSERT (peerid.nid != LNET_NID_ANY);
- cpt = lnet_cpt_of_nid(peerid.nid);
+ cpt = lnet_cpt_of_nid(peerid.nid, ni);
if (active) {
- ksocknal_peer_addref(peer);
+ ksocknal_peer_addref(peer_ni);
write_lock_bh(global_lock);
} else {
- rc = ksocknal_create_peer(&peer, ni, peerid);
+ rc = ksocknal_create_peer(&peer_ni, ni, peerid);
if (rc != 0)
goto failed_1;
peer2 = ksocknal_find_peer_locked(ni, peerid);
if (peer2 == NULL) {
- /* NB this puts an "empty" peer in the peer
+ /* NB this puts an "empty" peer_ni in the peer_ni
* table (which takes my ref) */
- list_add_tail(&peer->ksnp_list,
+ list_add_tail(&peer_ni->ksnp_list,
ksocknal_nid2peerlist(peerid.nid));
} else {
- ksocknal_peer_decref(peer);
- peer = peer2;
+ ksocknal_peer_decref(peer_ni);
+ peer_ni = peer2;
}
/* +1 ref for me */
- ksocknal_peer_addref(peer);
- peer->ksnp_accepting++;
+ ksocknal_peer_addref(peer_ni);
+ peer_ni->ksnp_accepting++;
/* Am I already connecting to this guy? Resolve in
* favour of higher NID... */
if (peerid.nid < ni->ni_nid &&
- ksocknal_connecting(peer, conn->ksnc_ipaddr)) {
+ ksocknal_connecting(peer_ni, conn->ksnc_ipaddr)) {
rc = EALREADY;
warn = "connection race resolution";
goto failed_2;
}
}
- if (peer->ksnp_closing ||
+ if (peer_ni->ksnp_closing ||
(active && route->ksnr_deleted)) {
- /* peer/route got closed under me */
+ /* peer_ni/route got closed under me */
rc = -ESTALE;
- warn = "peer/route removed";
+ warn = "peer_ni/route removed";
goto failed_2;
}
- if (peer->ksnp_proto == NULL) {
+ if (peer_ni->ksnp_proto == NULL) {
/* Never connected before.
- * NB recv_hello may have returned EPROTO to signal my peer
+ * NB recv_hello may have returned EPROTO to signal my peer_ni
* wants a different protocol than the one I asked for.
*/
- LASSERT(list_empty(&peer->ksnp_conns));
+ LASSERT(list_empty(&peer_ni->ksnp_conns));
- peer->ksnp_proto = conn->ksnc_proto;
- peer->ksnp_incarnation = incarnation;
+ peer_ni->ksnp_proto = conn->ksnc_proto;
+ peer_ni->ksnp_incarnation = incarnation;
}
- if (peer->ksnp_proto != conn->ksnc_proto ||
- peer->ksnp_incarnation != incarnation) {
- /* Peer rebooted or I've got the wrong protocol version */
- ksocknal_close_peer_conns_locked(peer, 0, 0);
+ if (peer_ni->ksnp_proto != conn->ksnc_proto ||
+ peer_ni->ksnp_incarnation != incarnation) {
+ /* peer_ni rebooted or I've got the wrong protocol version */
+ ksocknal_close_peer_conns_locked(peer_ni, 0, 0);
- peer->ksnp_proto = NULL;
+ peer_ni->ksnp_proto = NULL;
rc = ESTALE;
- warn = peer->ksnp_incarnation != incarnation ?
- "peer rebooted" :
+ warn = peer_ni->ksnp_incarnation != incarnation ?
+ "peer_ni rebooted" :
"wrong proto version";
goto failed_2;
}
/* Refuse to duplicate an existing connection, unless this is a
* loopback connection */
if (conn->ksnc_ipaddr != conn->ksnc_myipaddr) {
- list_for_each(tmp, &peer->ksnp_conns) {
+ list_for_each(tmp, &peer_ni->ksnp_conns) {
conn2 = list_entry(tmp, ksock_conn_t, ksnc_list);
if (conn2->ksnc_ipaddr != conn->ksnc_ipaddr ||
conn2->ksnc_type != conn->ksnc_type)
continue;
- /* Reply on a passive connection attempt so the peer
+ /* Reply on a passive connection attempt so the peer_ni
* realises we're connected. */
LASSERT (rc == 0);
if (!active)
if (active &&
route->ksnr_ipaddr != conn->ksnc_ipaddr) {
CERROR("Route %s %pI4h connected to %pI4h\n",
- libcfs_id2str(peer->ksnp_id),
+ libcfs_id2str(peer_ni->ksnp_id),
&route->ksnr_ipaddr,
&conn->ksnc_ipaddr);
}
/* Search for a route corresponding to the new connection and
* create an association. This allows incoming connections created
- * by routes in my peer to match my own route entries so I don't
+ * by routes in my peer_ni to match my own route entries so I don't
* continually create duplicate routes. */
- list_for_each(tmp, &peer->ksnp_routes) {
+ list_for_each(tmp, &peer_ni->ksnp_routes) {
route = list_entry(tmp, ksock_route_t, ksnr_list);
if (route->ksnr_ipaddr != conn->ksnc_ipaddr)
break;
}
- conn->ksnc_peer = peer; /* conn takes my ref on peer */
- peer->ksnp_last_alive = ktime_get_real_seconds();
- peer->ksnp_send_keepalive = 0;
- peer->ksnp_error = 0;
+ conn->ksnc_peer = peer_ni; /* conn takes my ref on peer_ni */
+ peer_ni->ksnp_last_alive = ktime_get_real_seconds();
+ peer_ni->ksnp_send_keepalive = 0;
+ peer_ni->ksnp_error = 0;
sched = ksocknal_choose_scheduler_locked(cpt);
sched->kss_nconns++;
/* Set the deadline for the outgoing HELLO to drain */
conn->ksnc_tx_bufnob = sock->sk->sk_wmem_queued;
conn->ksnc_tx_deadline = cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
- smp_mb(); /* order with adding to peer's conn list */
+ smp_mb(); /* order with adding to peer_ni's conn list */
- list_add(&conn->ksnc_list, &peer->ksnp_conns);
+ list_add(&conn->ksnc_list, &peer_ni->ksnp_conns);
ksocknal_conn_addref(conn);
ksocknal_new_packet(conn, 0);
conn->ksnc_zc_capable = ksocknal_lib_zc_capable(conn);
/* Take packets blocking for this connection. */
- list_for_each_entry_safe(tx, txtmp, &peer->ksnp_tx_queue, tx_list) {
+ list_for_each_entry_safe(tx, txtmp, &peer_ni->ksnp_tx_queue, tx_list) {
if (conn->ksnc_proto->pro_match_tx(conn, tx, tx->tx_nonblk) ==
SOCKNAL_MATCH_NO)
continue;
if (active) {
/* additional routes after interface exchange? */
- ksocknal_create_routes(peer, conn->ksnc_port,
+ ksocknal_create_routes(peer_ni, conn->ksnc_port,
hello->kshm_ips, hello->kshm_nips);
} else {
- hello->kshm_nips = ksocknal_select_ips(peer, hello->kshm_ips,
+ hello->kshm_nips = ksocknal_select_ips(peer_ni, hello->kshm_ips,
hello->kshm_nips);
rc = ksocknal_send_hello(ni, conn, peerid.nid, hello);
}
ksocknal_lib_set_callback(sock, conn);
if (!active)
- peer->ksnp_accepting--;
+ peer_ni->ksnp_accepting--;
write_unlock_bh(global_lock);
return rc;
failed_2:
- if (!peer->ksnp_closing &&
- list_empty(&peer->ksnp_conns) &&
- list_empty(&peer->ksnp_routes)) {
- list_add(&zombies, &peer->ksnp_tx_queue);
- list_del_init(&peer->ksnp_tx_queue);
- ksocknal_unlink_peer_locked(peer);
+ if (!peer_ni->ksnp_closing &&
+ list_empty(&peer_ni->ksnp_conns) &&
+ list_empty(&peer_ni->ksnp_routes)) {
+ list_add(&zombies, &peer_ni->ksnp_tx_queue);
+ list_del_init(&peer_ni->ksnp_tx_queue);
+ ksocknal_unlink_peer_locked(peer_ni);
}
write_unlock_bh(global_lock);
}
write_lock_bh(global_lock);
- peer->ksnp_accepting--;
+ peer_ni->ksnp_accepting--;
write_unlock_bh(global_lock);
}
ksocknal_txlist_done(ni, &zombies, 1);
- ksocknal_peer_decref(peer);
+ ksocknal_peer_decref(peer_ni);
failed_1:
if (hello != NULL)
/* This just does the immmediate housekeeping, and queues the
* connection for the reaper to terminate.
* Caller holds ksnd_global_lock exclusively in irq context */
- ksock_peer_t *peer = conn->ksnc_peer;
+ ksock_peer_ni_t *peer_ni = conn->ksnc_peer;
ksock_route_t *route;
ksock_conn_t *conn2;
struct list_head *tmp;
- LASSERT(peer->ksnp_error == 0);
+ LASSERT(peer_ni->ksnp_error == 0);
LASSERT(!conn->ksnc_closing);
conn->ksnc_closing = 1;
- /* ksnd_deathrow_conns takes over peer's ref */
+ /* ksnd_deathrow_conns takes over peer_ni's ref */
list_del(&conn->ksnc_list);
route = conn->ksnc_route;
LASSERT((route->ksnr_connected & (1 << conn->ksnc_type)) != 0);
conn2 = NULL;
- list_for_each(tmp, &peer->ksnp_conns) {
+ list_for_each(tmp, &peer_ni->ksnp_conns) {
conn2 = list_entry(tmp, ksock_conn_t, ksnc_list);
if (conn2->ksnc_route == route &&
ksocknal_route_decref(route); /* drop conn's ref on route */
}
- if (list_empty(&peer->ksnp_conns)) {
- /* No more connections to this peer */
+ if (list_empty(&peer_ni->ksnp_conns)) {
+ /* No more connections to this peer_ni */
- if (!list_empty(&peer->ksnp_tx_queue)) {
+ if (!list_empty(&peer_ni->ksnp_tx_queue)) {
ksock_tx_t *tx;
LASSERT(conn->ksnc_proto == &ksocknal_protocol_v3x);
/* throw them to the last connection...,
* these TXs will be send to /dev/null by scheduler */
- list_for_each_entry(tx, &peer->ksnp_tx_queue,
+ list_for_each_entry(tx, &peer_ni->ksnp_tx_queue,
tx_list)
ksocknal_tx_prep(conn, tx);
spin_lock_bh(&conn->ksnc_scheduler->kss_lock);
- list_splice_init(&peer->ksnp_tx_queue,
+ list_splice_init(&peer_ni->ksnp_tx_queue,
&conn->ksnc_tx_queue);
spin_unlock_bh(&conn->ksnc_scheduler->kss_lock);
}
/* renegotiate protocol version */
- peer->ksnp_proto = NULL;
+ peer_ni->ksnp_proto = NULL;
/* stash last conn close reason */
- peer->ksnp_error = error;
+ peer_ni->ksnp_error = error;
- if (list_empty(&peer->ksnp_routes)) {
+ if (list_empty(&peer_ni->ksnp_routes)) {
/* I've just closed last conn belonging to a
- * peer with no routes to it */
- ksocknal_unlink_peer_locked(peer);
+ * peer_ni with no routes to it */
+ ksocknal_unlink_peer_locked(peer_ni);
}
}
}
void
-ksocknal_peer_failed (ksock_peer_t *peer)
+ksocknal_peer_failed (ksock_peer_ni_t *peer_ni)
{
int notify = 0;
cfs_time_t last_alive = 0;
/* There has been a connection failure or comms error; but I'll only
- * tell LNET I think the peer is dead if it's to another kernel and
+ * tell LNET I think the peer_ni is dead if it's to another kernel and
* there are no connections or connection attempts in existence. */
read_lock(&ksocknal_data.ksnd_global_lock);
- if ((peer->ksnp_id.pid & LNET_PID_USERFLAG) == 0 &&
- list_empty(&peer->ksnp_conns) &&
- peer->ksnp_accepting == 0 &&
- ksocknal_find_connecting_route_locked(peer) == NULL) {
+ if ((peer_ni->ksnp_id.pid & LNET_PID_USERFLAG) == 0 &&
+ list_empty(&peer_ni->ksnp_conns) &&
+ peer_ni->ksnp_accepting == 0 &&
+ ksocknal_find_connecting_route_locked(peer_ni) == NULL) {
notify = 1;
- last_alive = peer->ksnp_last_alive;
+ last_alive = peer_ni->ksnp_last_alive;
}
read_unlock(&ksocknal_data.ksnd_global_lock);
if (notify)
- lnet_notify(peer->ksnp_ni, peer->ksnp_id.nid, 0,
+ lnet_notify(peer_ni->ksnp_ni, peer_ni->ksnp_id.nid, 0,
last_alive);
}
void
ksocknal_finalize_zcreq(ksock_conn_t *conn)
{
- ksock_peer_t *peer = conn->ksnc_peer;
+ ksock_peer_ni_t *peer_ni = conn->ksnc_peer;
ksock_tx_t *tx;
ksock_tx_t *tmp;
struct list_head zlist = LIST_HEAD_INIT(zlist);
* abort all buffered data */
LASSERT(conn->ksnc_sock == NULL);
- spin_lock(&peer->ksnp_lock);
+ spin_lock(&peer_ni->ksnp_lock);
- list_for_each_entry_safe(tx, tmp, &peer->ksnp_zc_req_list, tx_zc_list) {
+ list_for_each_entry_safe(tx, tmp, &peer_ni->ksnp_zc_req_list, tx_zc_list) {
if (tx->tx_conn != conn)
continue;
list_add(&tx->tx_zc_list, &zlist);
}
- spin_unlock(&peer->ksnp_lock);
+ spin_unlock(&peer_ni->ksnp_lock);
while (!list_empty(&zlist)) {
tx = list_entry(zlist.next, ksock_tx_t, tx_zc_list);
* disengage the socket from its callbacks and close it.
* ksnc_refcount will eventually hit zero, and then the reaper will
* destroy it. */
- ksock_peer_t *peer = conn->ksnc_peer;
+ ksock_peer_ni_t *peer_ni = conn->ksnc_peer;
ksock_sched_t *sched = conn->ksnc_scheduler;
int failed = 0;
* scheduler yet, but it _has_ committed to terminate... */
conn->ksnc_scheduler->kss_nconns--;
- if (peer->ksnp_error != 0) {
- /* peer's last conn closed in error */
- LASSERT(list_empty(&peer->ksnp_conns));
+ if (peer_ni->ksnp_error != 0) {
+ /* peer_ni's last conn closed in error */
+ LASSERT(list_empty(&peer_ni->ksnp_conns));
failed = 1;
- peer->ksnp_error = 0; /* avoid multiple notifications */
+ peer_ni->ksnp_error = 0; /* avoid multiple notifications */
}
write_unlock_bh(&ksocknal_data.ksnd_global_lock);
if (failed)
- ksocknal_peer_failed(peer);
+ ksocknal_peer_failed(peer_ni);
/* The socket is closed on the final put; either here, or in
* ksocknal_{send,recv}msg(). Since we set up the linger2 option
}
int
-ksocknal_close_peer_conns_locked (ksock_peer_t *peer, __u32 ipaddr, int why)
+ksocknal_close_peer_conns_locked (ksock_peer_ni_t *peer_ni, __u32 ipaddr, int why)
{
ksock_conn_t *conn;
struct list_head *ctmp;
struct list_head *cnxt;
int count = 0;
- list_for_each_safe(ctmp, cnxt, &peer->ksnp_conns) {
+ list_for_each_safe(ctmp, cnxt, &peer_ni->ksnp_conns) {
conn = list_entry(ctmp, ksock_conn_t, ksnc_list);
if (ipaddr == 0 ||
int
ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why)
{
- ksock_peer_t *peer = conn->ksnc_peer;
+ ksock_peer_ni_t *peer_ni = conn->ksnc_peer;
__u32 ipaddr = conn->ksnc_ipaddr;
int count;
write_lock_bh(&ksocknal_data.ksnd_global_lock);
- count = ksocknal_close_peer_conns_locked (peer, ipaddr, why);
+ count = ksocknal_close_peer_conns_locked (peer_ni, ipaddr, why);
write_unlock_bh(&ksocknal_data.ksnd_global_lock);
int
ksocknal_close_matching_conns (lnet_process_id_t id, __u32 ipaddr)
{
- ksock_peer_t *peer;
+ ksock_peer_ni_t *peer_ni;
struct list_head *ptmp;
struct list_head *pnxt;
int lo;
for (i = lo; i <= hi; i++) {
list_for_each_safe(ptmp, pnxt, &ksocknal_data.ksnd_peers[i]) {
- peer = list_entry(ptmp, ksock_peer_t, ksnp_list);
+ peer_ni = list_entry(ptmp, ksock_peer_ni_t, ksnp_list);
- if (!((id.nid == LNET_NID_ANY || id.nid == peer->ksnp_id.nid) &&
- (id.pid == LNET_PID_ANY || id.pid == peer->ksnp_id.pid)))
+ if (!((id.nid == LNET_NID_ANY || id.nid == peer_ni->ksnp_id.nid) &&
+ (id.pid == LNET_PID_ANY || id.pid == peer_ni->ksnp_id.pid)))
continue;
- count += ksocknal_close_peer_conns_locked (peer, ipaddr, 0);
+ count += ksocknal_close_peer_conns_locked (peer_ni, ipaddr, 0);
}
}
int connect = 1;
time64_t last_alive = 0;
time64_t now = ktime_get_real_seconds();
- ksock_peer_t *peer = NULL;
+ ksock_peer_ni_t *peer_ni = NULL;
rwlock_t *glock = &ksocknal_data.ksnd_global_lock;
lnet_process_id_t id = {
.nid = nid,
read_lock(glock);
- peer = ksocknal_find_peer_locked(ni, id);
- if (peer != NULL) {
+ peer_ni = ksocknal_find_peer_locked(ni, id);
+ if (peer_ni != NULL) {
struct list_head *tmp;
ksock_conn_t *conn;
int bufnob;
- list_for_each(tmp, &peer->ksnp_conns) {
+ list_for_each(tmp, &peer_ni->ksnp_conns) {
conn = list_entry(tmp, ksock_conn_t, ksnc_list);
bufnob = conn->ksnc_sock->sk->sk_wmem_queued;
/* something got ACKed */
conn->ksnc_tx_deadline =
cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
- peer->ksnp_last_alive = now;
+ peer_ni->ksnp_last_alive = now;
conn->ksnc_tx_bufnob = bufnob;
}
}
- last_alive = peer->ksnp_last_alive;
- if (ksocknal_find_connectable_route_locked(peer) == NULL)
+ last_alive = peer_ni->ksnp_last_alive;
+ if (ksocknal_find_connectable_route_locked(peer_ni) == NULL)
connect = 0;
}
if (last_alive != 0)
*when = last_alive;
- CDEBUG(D_NET, "Peer %s %p, alive %ld secs ago, connect %d\n",
- libcfs_nid2str(nid), peer,
+ CDEBUG(D_NET, "peer_ni %s %p, alive %ld secs ago, connect %d\n",
+ libcfs_nid2str(nid), peer_ni,
last_alive ? cfs_duration_sec(now - last_alive) : -1,
connect);
write_lock_bh(glock);
- peer = ksocknal_find_peer_locked(ni, id);
- if (peer != NULL)
- ksocknal_launch_all_connections_locked(peer);
+ peer_ni = ksocknal_find_peer_locked(ni, id);
+ if (peer_ni != NULL)
+ ksocknal_launch_all_connections_locked(peer_ni);
write_unlock_bh(glock);
return;
}
static void
-ksocknal_push_peer (ksock_peer_t *peer)
+ksocknal_push_peer (ksock_peer_ni_t *peer_ni)
{
int index;
int i;
i = 0;
conn = NULL;
- list_for_each(tmp, &peer->ksnp_conns) {
+ list_for_each(tmp, &peer_ni->ksnp_conns) {
if (i++ == index) {
conn = list_entry(tmp, ksock_conn_t,
ksnc_list);
}
for (tmp = start; tmp <= end; tmp++) {
- int peer_off; /* searching offset in peer hash table */
+ int peer_off; /* searching offset in peer_ni hash table */
for (peer_off = 0; ; peer_off++) {
- ksock_peer_t *peer;
+ ksock_peer_ni_t *peer_ni;
int i = 0;
read_lock(&ksocknal_data.ksnd_global_lock);
- list_for_each_entry(peer, tmp, ksnp_list) {
+ list_for_each_entry(peer_ni, tmp, ksnp_list) {
if (!((id.nid == LNET_NID_ANY ||
- id.nid == peer->ksnp_id.nid) &&
+ id.nid == peer_ni->ksnp_id.nid) &&
(id.pid == LNET_PID_ANY ||
- id.pid == peer->ksnp_id.pid)))
+ id.pid == peer_ni->ksnp_id.pid)))
continue;
if (i++ == peer_off) {
- ksocknal_peer_addref(peer);
+ ksocknal_peer_addref(peer_ni);
break;
}
}
break;
rc = 0;
- ksocknal_push_peer(peer);
- ksocknal_peer_decref(peer);
+ ksocknal_push_peer(peer_ni);
+ ksocknal_peer_decref(peer_ni);
}
}
return rc;
int i;
int j;
struct list_head *ptmp;
- ksock_peer_t *peer;
+ ksock_peer_ni_t *peer_ni;
struct list_head *rtmp;
ksock_route_t *route;
for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) {
- peer = list_entry(ptmp, ksock_peer_t,
+ peer_ni = list_entry(ptmp, ksock_peer_ni_t,
ksnp_list);
- for (j = 0; j < peer->ksnp_n_passive_ips; j++)
- if (peer->ksnp_passive_ips[j] == ipaddress)
+ for (j = 0; j < peer_ni->ksnp_n_passive_ips; j++)
+ if (peer_ni->ksnp_passive_ips[j] == ipaddress)
iface->ksni_npeers++;
- list_for_each(rtmp, &peer->ksnp_routes) {
+ list_for_each(rtmp, &peer_ni->ksnp_routes) {
route = list_entry(rtmp,
ksock_route_t,
ksnr_list);
}
static void
-ksocknal_peer_del_interface_locked(ksock_peer_t *peer, __u32 ipaddr)
+ksocknal_peer_del_interface_locked(ksock_peer_ni_t *peer_ni, __u32 ipaddr)
{
struct list_head *tmp;
struct list_head *nxt;
int i;
int j;
- for (i = 0; i < peer->ksnp_n_passive_ips; i++)
- if (peer->ksnp_passive_ips[i] == ipaddr) {
- for (j = i+1; j < peer->ksnp_n_passive_ips; j++)
- peer->ksnp_passive_ips[j-1] =
- peer->ksnp_passive_ips[j];
- peer->ksnp_n_passive_ips--;
+ for (i = 0; i < peer_ni->ksnp_n_passive_ips; i++)
+ if (peer_ni->ksnp_passive_ips[i] == ipaddr) {
+ for (j = i+1; j < peer_ni->ksnp_n_passive_ips; j++)
+ peer_ni->ksnp_passive_ips[j-1] =
+ peer_ni->ksnp_passive_ips[j];
+ peer_ni->ksnp_n_passive_ips--;
break;
}
- list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
+ list_for_each_safe(tmp, nxt, &peer_ni->ksnp_routes) {
route = list_entry(tmp, ksock_route_t, ksnr_list);
if (route->ksnr_myipaddr != ipaddr)
}
}
- list_for_each_safe(tmp, nxt, &peer->ksnp_conns) {
+ list_for_each_safe(tmp, nxt, &peer_ni->ksnp_conns) {
conn = list_entry(tmp, ksock_conn_t, ksnc_list);
if (conn->ksnc_myipaddr == ipaddr)
int rc = -ENOENT;
struct list_head *tmp;
struct list_head *nxt;
- ksock_peer_t *peer;
+ ksock_peer_ni_t *peer_ni;
__u32 this_ip;
int i;
int j;
for (j = 0; j < ksocknal_data.ksnd_peer_hash_size; j++) {
list_for_each_safe(tmp, nxt,
&ksocknal_data.ksnd_peers[j]) {
- peer = list_entry(tmp, ksock_peer_t,
+ peer_ni = list_entry(tmp, ksock_peer_ni_t,
ksnp_list);
- if (peer->ksnp_ni != ni)
+ if (peer_ni->ksnp_ni != ni)
continue;
- ksocknal_peer_del_interface_locked(peer, this_ip);
+ ksocknal_peer_del_interface_locked(peer_ni, this_ip);
}
}
}
static void
ksocknal_debug_peerhash (lnet_ni_t *ni)
{
- ksock_peer_t *peer = NULL;
+ ksock_peer_ni_t *peer_ni = NULL;
struct list_head *tmp;
int i;
for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
list_for_each(tmp, &ksocknal_data.ksnd_peers[i]) {
- peer = list_entry(tmp, ksock_peer_t, ksnp_list);
+ peer_ni = list_entry(tmp, ksock_peer_ni_t, ksnp_list);
- if (peer->ksnp_ni == ni) break;
+ if (peer_ni->ksnp_ni == ni) break;
- peer = NULL;
+ peer_ni = NULL;
}
}
- if (peer != NULL) {
+ if (peer_ni != NULL) {
ksock_route_t *route;
ksock_conn_t *conn;
- CWARN ("Active peer on shutdown: %s, ref %d, scnt %d, "
+ CWARN ("Active peer_ni on shutdown: %s, ref %d, scnt %d, "
"closing %d, accepting %d, err %d, zcookie %llu, "
- "txq %d, zc_req %d\n", libcfs_id2str(peer->ksnp_id),
- atomic_read(&peer->ksnp_refcount),
- peer->ksnp_sharecount, peer->ksnp_closing,
- peer->ksnp_accepting, peer->ksnp_error,
- peer->ksnp_zc_next_cookie,
- !list_empty(&peer->ksnp_tx_queue),
- !list_empty(&peer->ksnp_zc_req_list));
-
- list_for_each(tmp, &peer->ksnp_routes) {
+ "txq %d, zc_req %d\n", libcfs_id2str(peer_ni->ksnp_id),
+ atomic_read(&peer_ni->ksnp_refcount),
+ peer_ni->ksnp_sharecount, peer_ni->ksnp_closing,
+ peer_ni->ksnp_accepting, peer_ni->ksnp_error,
+ peer_ni->ksnp_zc_next_cookie,
+ !list_empty(&peer_ni->ksnp_tx_queue),
+ !list_empty(&peer_ni->ksnp_zc_req_list));
+
+ list_for_each(tmp, &peer_ni->ksnp_routes) {
route = list_entry(tmp, ksock_route_t, ksnr_list);
CWARN ("Route: ref %d, schd %d, conn %d, cnted %d, "
"del %d\n", atomic_read(&route->ksnr_refcount),
route->ksnr_connected, route->ksnr_deleted);
}
- list_for_each(tmp, &peer->ksnp_conns) {
+ list_for_each(tmp, &peer_ni->ksnp_conns) {
conn = list_entry(tmp, ksock_conn_t, ksnc_list);
CWARN ("Conn: ref %d, sref %d, t %d, c %d\n",
atomic_read(&conn->ksnc_conn_refcount),
/* Delete all peers */
ksocknal_del_peer(ni, anyid, 0);
- /* Wait for all peer state to clean up */
+ /* Wait for all peer_ni state to clean up */
i = 2;
spin_lock_bh(&net->ksnn_lock);
while (net->ksnn_npeers != 0) {
int rc;
int i;
- LASSERT(ncpts > 0 && ncpts <= cfs_cpt_number(lnet_cpt_table()));
+ if (ncpts > 0 && ncpts > cfs_cpt_number(lnet_cpt_table()))
+ return -EINVAL;
for (i = 0; i < ncpts; i++) {
struct ksock_sched_info *info;
int
ksocknal_startup (lnet_ni_t *ni)
{
- ksock_net_t *net;
- int rc;
- int i;
+ ksock_net_t *net;
+ int rc;
+ int i;
+ struct net_device *net_dev;
+ int node_id;
- LASSERT (ni->ni_lnd == &the_ksocklnd);
+ LASSERT (ni->ni_net->net_lnd == &the_ksocklnd);
if (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING) {
rc = ksocknal_base_startup();
spin_lock_init(&net->ksnn_lock);
net->ksnn_incarnation = ksocknal_new_incarnation();
ni->ni_data = net;
- ni->ni_peertimeout = *ksocknal_tunables.ksnd_peertimeout;
- ni->ni_maxtxcredits = *ksocknal_tunables.ksnd_credits;
- ni->ni_peertxcredits = *ksocknal_tunables.ksnd_peertxcredits;
- ni->ni_peerrtrcredits = *ksocknal_tunables.ksnd_peerrtrcredits;
+ if (!ni->ni_net->net_tunables_set) {
+ ni->ni_net->net_tunables.lct_peer_timeout =
+ *ksocknal_tunables.ksnd_peertimeout;
+ ni->ni_net->net_tunables.lct_max_tx_credits =
+ *ksocknal_tunables.ksnd_credits;
+ ni->ni_net->net_tunables.lct_peer_tx_credits =
+ *ksocknal_tunables.ksnd_peertxcredits;
+ ni->ni_net->net_tunables.lct_peer_rtr_credits =
+ *ksocknal_tunables.ksnd_peerrtrcredits;
+ ni->ni_net->net_tunables_set = true;
+ }
+
if (ni->ni_interfaces[0] == NULL) {
rc = ksocknal_enumerate_interfaces(net);
strlcpy(net->ksnn_interfaces[i].ksni_name,
ni->ni_interfaces[i],
sizeof(net->ksnn_interfaces[i].ksni_name));
+
}
net->ksnn_ninterfaces = i;
}
+ net_dev = dev_get_by_name(&init_net,
+ net->ksnn_interfaces[0].ksni_name);
+ if (net_dev != NULL) {
+ node_id = dev_to_node(&net_dev->dev);
+ ni->ni_dev_cpt = cfs_cpt_of_node(lnet_cpt_table(), node_id);
+ dev_put(net_dev);
+ } else {
+ ni->ni_dev_cpt = CFS_CPT_ANY;
+ }
+
/* call it before add it to ksocknal_data.ksnd_nets */
rc = ksocknal_net_start_threads(net, ni->ni_cpts, ni->ni_ncpts);
if (rc != 0)
#define SOCKNAL_NSCHEDS 3
#define SOCKNAL_NSCHEDS_HIGH (SOCKNAL_NSCHEDS << 1)
-#define SOCKNAL_PEER_HASH_SIZE 101 /* # peer lists */
+#define SOCKNAL_PEER_HASH_SIZE 101 /* # peer_ni lists */
#define SOCKNAL_RESCHED 100 /* # scheduler loops before reschedule */
#define SOCKNAL_INSANITY_RECONN 5000 /* connd is trying on reconn infinitely */
#define SOCKNAL_ENOMEM_RETRY CFS_TICK /* jiffies between retries */
int *ksnd_keepalive_count; /* # probes */
int *ksnd_keepalive_intvl; /* time between probes */
int *ksnd_credits; /* # concurrent sends */
- int *ksnd_peertxcredits; /* # concurrent sends to 1 peer */
- int *ksnd_peerrtrcredits; /* # per-peer router buffer credits */
- int *ksnd_peertimeout; /* seconds to consider peer dead */
+ int *ksnd_peertxcredits; /* # concurrent sends to 1 peer_ni */
+ int *ksnd_peerrtrcredits; /* # per-peer_ni router buffer credits */
+ int *ksnd_peertimeout; /* seconds to consider peer_ni dead */
int *ksnd_enable_csum; /* enable check sum */
int *ksnd_inject_csum_error; /* set non-zero to inject checksum error */
int *ksnd_nonblk_zcack; /* always send zc-ack on non-blocking connection */
int ksnd_init; /* initialisation state */
int ksnd_nnets; /* # networks set up */
struct list_head ksnd_nets; /* list of nets */
- /* stabilize peer/conn ops */
+ /* stabilize peer_ni/conn ops */
rwlock_t ksnd_global_lock;
/* hash table of all my known peers */
struct list_head *ksnd_peers;
typedef struct /* transmit packet */
{
struct list_head tx_list; /* queue on conn for transmission etc */
- struct list_head tx_zc_list; /* queue on peer for ZC request */
+ struct list_head tx_zc_list; /* queue on peer_ni for ZC request */
atomic_t tx_refcount; /* tx reference count */
int tx_nob; /* # packet bytes */
int tx_resid; /* residual bytes */
typedef struct ksock_conn
{
- struct ksock_peer *ksnc_peer; /* owning peer */
+ struct ksock_peer *ksnc_peer; /* owning peer_ni */
struct ksock_route *ksnc_route; /* owning route */
- struct list_head ksnc_list; /* stash on peer's conn list */
+ struct list_head ksnc_list; /* stash on peer_ni's conn list */
struct socket *ksnc_sock; /* actual socket */
void *ksnc_saved_data_ready; /* socket's original data_ready() callback */
void *ksnc_saved_write_space; /* socket's original write_space() callback */
atomic_t ksnc_sock_refcount; /* sock refcount */
ksock_sched_t *ksnc_scheduler; /* who schedules this connection */
__u32 ksnc_myipaddr; /* my IP */
- __u32 ksnc_ipaddr; /* peer's IP */
- int ksnc_port; /* peer's port */
+ __u32 ksnc_ipaddr; /* peer_ni's IP */
+ int ksnc_port; /* peer_ni's port */
signed int ksnc_type:3; /* type of connection,
* should be signed value */
unsigned int ksnc_closing:1; /* being shut down */
typedef struct ksock_route
{
- struct list_head ksnr_list; /* chain on peer route list */
+ struct list_head ksnr_list; /* chain on peer_ni route list */
struct list_head ksnr_connd_list; /* chain on ksnr_connd_routes */
- struct ksock_peer *ksnr_peer; /* owning peer */
+ struct ksock_peer *ksnr_peer; /* owning peer_ni */
atomic_t ksnr_refcount; /* # users */
cfs_time_t ksnr_timeout; /* when (in jiffies) reconnection can happen next */
cfs_duration_t ksnr_retry_interval; /* how long between retries */
unsigned int ksnr_scheduled:1; /* scheduled for attention */
unsigned int ksnr_connecting:1;/* connection establishment in progress */
unsigned int ksnr_connected:4; /* connections established by type */
- unsigned int ksnr_deleted:1; /* been removed from peer? */
+ unsigned int ksnr_deleted:1; /* been removed from peer_ni? */
unsigned int ksnr_share_count; /* created explicitly? */
int ksnr_conn_count; /* # conns established by this route */
} ksock_route_t;
typedef struct ksock_peer
{
- struct list_head ksnp_list; /* stash on global peer list */
+ struct list_head ksnp_list; /* stash on global peer_ni list */
cfs_time_t ksnp_last_alive; /* when (in jiffies) I was last alive */
lnet_process_id_t ksnp_id; /* who's on the other end(s) */
atomic_t ksnp_refcount; /* # users */
int ksnp_accepting;/* # passive connections pending */
int ksnp_error; /* errno on closing last conn */
__u64 ksnp_zc_next_cookie;/* ZC completion cookie */
- __u64 ksnp_incarnation; /* latest known peer incarnation */
- struct ksock_proto *ksnp_proto; /* latest known peer protocol */
+ __u64 ksnp_incarnation; /* latest known peer_ni incarnation */
+ struct ksock_proto *ksnp_proto; /* latest known peer_ni protocol */
struct list_head ksnp_conns; /* all active connections */
struct list_head ksnp_routes; /* routes */
struct list_head ksnp_tx_queue; /* waiting packets */
lnet_ni_t *ksnp_ni; /* which network */
int ksnp_n_passive_ips; /* # of... */
__u32 ksnp_passive_ips[LNET_MAX_INTERFACES]; /* preferred local interfaces */
-} ksock_peer_t;
+} ksock_peer_ni_t;
typedef struct ksock_connreq
{
}
static inline void
-ksocknal_peer_addref (ksock_peer_t *peer)
+ksocknal_peer_addref (ksock_peer_ni_t *peer_ni)
{
- LASSERT (atomic_read (&peer->ksnp_refcount) > 0);
- atomic_inc(&peer->ksnp_refcount);
+ LASSERT (atomic_read (&peer_ni->ksnp_refcount) > 0);
+ atomic_inc(&peer_ni->ksnp_refcount);
}
-extern void ksocknal_destroy_peer (ksock_peer_t *peer);
+extern void ksocknal_destroy_peer (ksock_peer_ni_t *peer_ni);
static inline void
-ksocknal_peer_decref (ksock_peer_t *peer)
+ksocknal_peer_decref (ksock_peer_ni_t *peer_ni)
{
- LASSERT (atomic_read (&peer->ksnp_refcount) > 0);
- if (atomic_dec_and_test(&peer->ksnp_refcount))
- ksocknal_destroy_peer (peer);
+ LASSERT (atomic_read (&peer_ni->ksnp_refcount) > 0);
+ if (atomic_dec_and_test(&peer_ni->ksnp_refcount))
+ ksocknal_destroy_peer (peer_ni);
}
int ksocknal_startup (lnet_ni_t *ni);
int ksocknal_accept(lnet_ni_t *ni, struct socket *sock);
extern int ksocknal_add_peer(lnet_ni_t *ni, lnet_process_id_t id, __u32 ip, int port);
-extern ksock_peer_t *ksocknal_find_peer_locked (lnet_ni_t *ni, lnet_process_id_t id);
-extern ksock_peer_t *ksocknal_find_peer (lnet_ni_t *ni, lnet_process_id_t id);
-extern void ksocknal_peer_failed (ksock_peer_t *peer);
+extern ksock_peer_ni_t *ksocknal_find_peer_locked (lnet_ni_t *ni, lnet_process_id_t id);
+extern ksock_peer_ni_t *ksocknal_find_peer (lnet_ni_t *ni, lnet_process_id_t id);
+extern void ksocknal_peer_failed (ksock_peer_ni_t *peer_ni);
extern int ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route,
struct socket *sock, int type);
extern void ksocknal_close_conn_locked (ksock_conn_t *conn, int why);
extern void ksocknal_terminate_conn (ksock_conn_t *conn);
extern void ksocknal_destroy_conn (ksock_conn_t *conn);
-extern int ksocknal_close_peer_conns_locked (ksock_peer_t *peer,
+extern int ksocknal_close_peer_conns_locked (ksock_peer_ni_t *peer_ni,
__u32 ipaddr, int why);
extern int ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why);
extern int ksocknal_close_matching_conns(lnet_process_id_t id, __u32 ipaddr);
-extern ksock_conn_t *ksocknal_find_conn_locked(ksock_peer_t *peer,
+extern ksock_conn_t *ksocknal_find_conn_locked(ksock_peer_ni_t *peer_ni,
ksock_tx_t *tx, int nonblk);
extern int ksocknal_launch_packet(lnet_ni_t *ni, ksock_tx_t *tx,
extern void ksocknal_query (struct lnet_ni *ni, lnet_nid_t nid, cfs_time_t *when);
extern int ksocknal_thread_start(int (*fn)(void *arg), void *arg, char *name);
extern void ksocknal_thread_fini (void);
-extern void ksocknal_launch_all_connections_locked (ksock_peer_t *peer);
-extern ksock_route_t *ksocknal_find_connectable_route_locked (ksock_peer_t *peer);
-extern ksock_route_t *ksocknal_find_connecting_route_locked (ksock_peer_t *peer);
+extern void ksocknal_launch_all_connections_locked (ksock_peer_ni_t *peer_ni);
+extern ksock_route_t *ksocknal_find_connectable_route_locked (ksock_peer_ni_t *peer_ni);
+extern ksock_route_t *ksocknal_find_connecting_route_locked (ksock_peer_ni_t *peer_ni);
extern int ksocknal_new_packet (ksock_conn_t *conn, int skip);
extern int ksocknal_scheduler (void *arg);
extern int ksocknal_connd (void *arg);
ksocknal_check_zc_req(ksock_tx_t *tx)
{
ksock_conn_t *conn = tx->tx_conn;
- ksock_peer_t *peer = conn->ksnc_peer;
+ ksock_peer_ni_t *peer_ni = conn->ksnc_peer;
/* Set tx_msg.ksm_zc_cookies[0] to a unique non-zero cookie and add tx
* to ksnp_zc_req_list if some fragment of this message should be sent
- * zero-copy. Our peer will send an ACK containing this cookie when
+ * zero-copy. Our peer_ni will send an ACK containing this cookie when
* she has received this message to tell us we can signal completion.
* tx_msg.ksm_zc_cookies[0] remains non-zero while tx is on
* ksnp_zc_req_list. */
ksocknal_tx_addref(tx);
- spin_lock(&peer->ksnp_lock);
+ spin_lock(&peer_ni->ksnp_lock);
- /* ZC_REQ is going to be pinned to the peer */
+ /* ZC_REQ is going to be pinned to the peer_ni */
tx->tx_deadline =
cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
LASSERT (tx->tx_msg.ksm_zc_cookies[0] == 0);
- tx->tx_msg.ksm_zc_cookies[0] = peer->ksnp_zc_next_cookie++;
+ tx->tx_msg.ksm_zc_cookies[0] = peer_ni->ksnp_zc_next_cookie++;
- if (peer->ksnp_zc_next_cookie == 0)
- peer->ksnp_zc_next_cookie = SOCKNAL_KEEPALIVE_PING + 1;
+ if (peer_ni->ksnp_zc_next_cookie == 0)
+ peer_ni->ksnp_zc_next_cookie = SOCKNAL_KEEPALIVE_PING + 1;
- list_add_tail(&tx->tx_zc_list, &peer->ksnp_zc_req_list);
+ list_add_tail(&tx->tx_zc_list, &peer_ni->ksnp_zc_req_list);
- spin_unlock(&peer->ksnp_lock);
+ spin_unlock(&peer_ni->ksnp_lock);
}
static void
ksocknal_uncheck_zc_req(ksock_tx_t *tx)
{
- ksock_peer_t *peer = tx->tx_conn->ksnc_peer;
+ ksock_peer_ni_t *peer_ni = tx->tx_conn->ksnc_peer;
LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP);
LASSERT(tx->tx_zc_capable);
tx->tx_zc_checked = 0;
- spin_lock(&peer->ksnp_lock);
+ spin_lock(&peer_ni->ksnp_lock);
if (tx->tx_msg.ksm_zc_cookies[0] == 0) {
/* Not waiting for an ACK */
- spin_unlock(&peer->ksnp_lock);
+ spin_unlock(&peer_ni->ksnp_lock);
return;
}
tx->tx_msg.ksm_zc_cookies[0] = 0;
list_del(&tx->tx_zc_list);
- spin_unlock(&peer->ksnp_lock);
+ spin_unlock(&peer_ni->ksnp_lock);
ksocknal_tx_decref(tx);
}
}
void
-ksocknal_launch_all_connections_locked (ksock_peer_t *peer)
+ksocknal_launch_all_connections_locked (ksock_peer_ni_t *peer_ni)
{
ksock_route_t *route;
/* called holding write lock on ksnd_global_lock */
for (;;) {
/* launch any/all connections that need it */
- route = ksocknal_find_connectable_route_locked(peer);
+ route = ksocknal_find_connectable_route_locked(peer_ni);
if (route == NULL)
return;
}
ksock_conn_t *
-ksocknal_find_conn_locked(ksock_peer_t *peer, ksock_tx_t *tx, int nonblk)
+ksocknal_find_conn_locked(ksock_peer_ni_t *peer_ni, ksock_tx_t *tx, int nonblk)
{
struct list_head *tmp;
ksock_conn_t *conn;
int tnob = 0;
int fnob = 0;
- list_for_each(tmp, &peer->ksnp_conns) {
+ list_for_each(tmp, &peer_ni->ksnp_conns) {
ksock_conn_t *c = list_entry(tmp, ksock_conn_t, ksnc_list);
int nob = atomic_read(&c->ksnc_tx_nob) +
c->ksnc_sock->sk->sk_wmem_queued;
ksock_route_t *
-ksocknal_find_connectable_route_locked (ksock_peer_t *peer)
+ksocknal_find_connectable_route_locked (ksock_peer_ni_t *peer_ni)
{
cfs_time_t now = cfs_time_current();
struct list_head *tmp;
ksock_route_t *route;
- list_for_each(tmp, &peer->ksnp_routes) {
+ list_for_each(tmp, &peer_ni->ksnp_routes) {
route = list_entry(tmp, ksock_route_t, ksnr_list);
LASSERT (!route->ksnr_connecting || route->ksnr_scheduled);
}
ksock_route_t *
-ksocknal_find_connecting_route_locked (ksock_peer_t *peer)
+ksocknal_find_connecting_route_locked (ksock_peer_ni_t *peer_ni)
{
struct list_head *tmp;
ksock_route_t *route;
- list_for_each(tmp, &peer->ksnp_routes) {
+ list_for_each(tmp, &peer_ni->ksnp_routes) {
route = list_entry(tmp, ksock_route_t, ksnr_list);
LASSERT (!route->ksnr_connecting || route->ksnr_scheduled);
int
ksocknal_launch_packet (lnet_ni_t *ni, ksock_tx_t *tx, lnet_process_id_t id)
{
- ksock_peer_t *peer;
+ ksock_peer_ni_t *peer_ni;
ksock_conn_t *conn;
rwlock_t *g_lock;
int retry;
for (retry = 0;; retry = 1) {
read_lock(g_lock);
- peer = ksocknal_find_peer_locked(ni, id);
- if (peer != NULL) {
- if (ksocknal_find_connectable_route_locked(peer) == NULL) {
- conn = ksocknal_find_conn_locked(peer, tx, tx->tx_nonblk);
+ peer_ni = ksocknal_find_peer_locked(ni, id);
+ if (peer_ni != NULL) {
+ if (ksocknal_find_connectable_route_locked(peer_ni) == NULL) {
+ conn = ksocknal_find_conn_locked(peer_ni, tx, tx->tx_nonblk);
if (conn != NULL) {
/* I've got no routes that need to be
* connecting and I do have an actual
write_lock_bh(g_lock);
- peer = ksocknal_find_peer_locked(ni, id);
- if (peer != NULL)
+ peer_ni = ksocknal_find_peer_locked(ni, id);
+ if (peer_ni != NULL)
break;
write_unlock_bh(g_lock);
}
if (retry) {
- CERROR("Can't find peer %s\n", libcfs_id2str(id));
+ CERROR("Can't find peer_ni %s\n", libcfs_id2str(id));
return -EHOSTUNREACH;
}
LNET_NIDADDR(id.nid),
lnet_acceptor_port());
if (rc != 0) {
- CERROR("Can't add peer %s: %d\n",
+ CERROR("Can't add peer_ni %s: %d\n",
libcfs_id2str(id), rc);
return rc;
}
}
- ksocknal_launch_all_connections_locked(peer);
+ ksocknal_launch_all_connections_locked(peer_ni);
- conn = ksocknal_find_conn_locked(peer, tx, tx->tx_nonblk);
+ conn = ksocknal_find_conn_locked(peer_ni, tx, tx->tx_nonblk);
if (conn != NULL) {
/* Connection exists; queue message on it */
ksocknal_queue_tx_locked (tx, conn);
return (0);
}
- if (peer->ksnp_accepting > 0 ||
- ksocknal_find_connecting_route_locked (peer) != NULL) {
- /* the message is going to be pinned to the peer */
+ if (peer_ni->ksnp_accepting > 0 ||
+ ksocknal_find_connecting_route_locked (peer_ni) != NULL) {
+ /* the message is going to be pinned to the peer_ni */
tx->tx_deadline =
cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
/* Queue the message until a connection is established */
- list_add_tail(&tx->tx_list, &peer->ksnp_tx_queue);
+ list_add_tail(&tx->tx_list, &peer_ni->ksnp_tx_queue);
write_unlock_bh(g_lock);
return 0;
}
conn->ksnc_proto->pro_unpack(&conn->ksnc_msg);
if ((conn->ksnc_peer->ksnp_id.pid & LNET_PID_USERFLAG) != 0) {
- /* Userspace peer */
+ /* Userspace peer_ni */
lhdr = &conn->ksnc_msg.ksm_u.lnetmsg.ksnm_hdr;
id = &conn->ksnc_peer->ksnp_id;
proto = ksocknal_parse_proto_version(hello);
if (proto == NULL) {
if (!active) {
- /* unknown protocol from peer, tell peer my protocol */
+ /* unknown protocol from peer_ni, tell peer_ni my protocol */
conn->ksnc_proto = &ksocknal_protocol_v3x;
#if SOCKNAL_VERSION_DEBUG
if (*ksocknal_tunables.ksnd_protocol == 2)
if (!active &&
conn->ksnc_port > LNET_ACCEPTOR_MAX_RESERVED_PORT) {
- /* Userspace NAL assigns peer process ID from socket */
+ /* Userspace NAL assigns peer_ni process ID from socket */
recv_id.pid = conn->ksnc_port | LNET_PID_USERFLAG;
recv_id.nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), conn->ksnc_ipaddr);
} else {
if (!active) {
*peerid = recv_id;
- /* peer determines type */
+ /* peer_ni determines type */
conn->ksnc_type = ksocknal_invert_type(hello->kshm_ctype);
if (conn->ksnc_type == SOCKLND_CONN_NONE) {
CERROR("Unexpected type %d from %s ip %pI4h\n",
ksocknal_connect (ksock_route_t *route)
{
struct list_head zombies = LIST_HEAD_INIT(zombies);
- ksock_peer_t *peer = route->ksnr_peer;
+ ksock_peer_ni_t *peer_ni = route->ksnr_peer;
int type;
int wanted;
struct socket *sock;
for (;;) {
wanted = ksocknal_route_mask() & ~route->ksnr_connected;
- /* stop connecting if peer/route got closed under me, or
+ /* stop connecting if peer_ni/route got closed under me, or
* route got connected while queued */
- if (peer->ksnp_closing || route->ksnr_deleted ||
+ if (peer_ni->ksnp_closing || route->ksnr_deleted ||
wanted == 0) {
retry_later = 0;
break;
}
- /* reschedule if peer is connecting to me */
- if (peer->ksnp_accepting > 0) {
+ /* reschedule if peer_ni is connecting to me */
+ if (peer_ni->ksnp_accepting > 0) {
CDEBUG(D_NET,
- "peer %s(%d) already connecting to me, retry later.\n",
- libcfs_nid2str(peer->ksnp_id.nid), peer->ksnp_accepting);
+ "peer_ni %s(%d) already connecting to me, retry later.\n",
+ libcfs_nid2str(peer_ni->ksnp_id.nid), peer_ni->ksnp_accepting);
retry_later = 1;
}
if (cfs_time_aftereq(cfs_time_current(), deadline)) {
rc = -ETIMEDOUT;
- lnet_connect_console_error(rc, peer->ksnp_id.nid,
+ lnet_connect_console_error(rc, peer_ni->ksnp_id.nid,
route->ksnr_ipaddr,
route->ksnr_port);
goto failed;
}
- rc = lnet_connect(&sock, peer->ksnp_id.nid,
+ rc = lnet_connect(&sock, peer_ni->ksnp_id.nid,
route->ksnr_myipaddr,
route->ksnr_ipaddr, route->ksnr_port);
if (rc != 0)
goto failed;
- rc = ksocknal_create_conn(peer->ksnp_ni, route, sock, type);
+ rc = ksocknal_create_conn(peer_ni->ksnp_ni, route, sock, type);
if (rc < 0) {
- lnet_connect_console_error(rc, peer->ksnp_id.nid,
+ lnet_connect_console_error(rc, peer_ni->ksnp_id.nid,
route->ksnr_ipaddr,
route->ksnr_port);
goto failed;
* race or I have to renegotiate protocol version */
retry_later = (rc != 0);
if (retry_later)
- CDEBUG(D_NET, "peer %s: conn race, retry later.\n",
- libcfs_nid2str(peer->ksnp_id.nid));
+ CDEBUG(D_NET, "peer_ni %s: conn race, retry later.\n",
+ libcfs_nid2str(peer_ni->ksnp_id.nid));
write_lock_bh(&ksocknal_data.ksnd_global_lock);
}
if (retry_later) {
/* re-queue for attention; this frees me up to handle
- * the peer's incoming connection request */
+ * the peer_ni's incoming connection request */
if (rc == EALREADY ||
- (rc == 0 && peer->ksnp_accepting > 0)) {
+ (rc == 0 && peer_ni->ksnp_accepting > 0)) {
/* We want to introduce a delay before next
* attempt to connect if we lost conn race,
* but the race is resolved quickly usually,
route->ksnr_timeout = cfs_time_add(cfs_time_current(),
route->ksnr_retry_interval);
- if (!list_empty(&peer->ksnp_tx_queue) &&
- peer->ksnp_accepting == 0 &&
- ksocknal_find_connecting_route_locked(peer) == NULL) {
+ if (!list_empty(&peer_ni->ksnp_tx_queue) &&
+ peer_ni->ksnp_accepting == 0 &&
+ ksocknal_find_connecting_route_locked(peer_ni) == NULL) {
ksock_conn_t *conn;
/* ksnp_tx_queue is queued on a conn on successful
* connection for V1.x and V2.x */
- if (!list_empty(&peer->ksnp_conns)) {
- conn = list_entry(peer->ksnp_conns.next,
+ if (!list_empty(&peer_ni->ksnp_conns)) {
+ conn = list_entry(peer_ni->ksnp_conns.next,
ksock_conn_t, ksnc_list);
LASSERT (conn->ksnc_proto == &ksocknal_protocol_v3x);
}
/* take all the blocked packets while I've got the lock and
* complete below... */
- list_splice_init(&peer->ksnp_tx_queue, &zombies);
+ list_splice_init(&peer_ni->ksnp_tx_queue, &zombies);
}
write_unlock_bh(&ksocknal_data.ksnd_global_lock);
- ksocknal_peer_failed(peer);
- ksocknal_txlist_done(peer->ksnp_ni, &zombies, 1);
+ ksocknal_peer_failed(peer_ni);
+ ksocknal_txlist_done(peer_ni->ksnp_ni, &zombies, 1);
return 0;
}
}
static ksock_conn_t *
-ksocknal_find_timed_out_conn (ksock_peer_t *peer)
+ksocknal_find_timed_out_conn (ksock_peer_ni_t *peer_ni)
{
/* We're called with a shared lock on ksnd_global_lock */
ksock_conn_t *conn;
struct list_head *ctmp;
- list_for_each(ctmp, &peer->ksnp_conns) {
+ list_for_each(ctmp, &peer_ni->ksnp_conns) {
int error;
conn = list_entry(ctmp, ksock_conn_t, ksnc_list);
CNETERR("A connection with %s "
"(%pI4h:%d) was reset; "
"it may have rebooted.\n",
- libcfs_id2str(peer->ksnp_id),
+ libcfs_id2str(peer_ni->ksnp_id),
&conn->ksnc_ipaddr,
conn->ksnc_port);
break;
CNETERR("A connection with %s "
"(%pI4h:%d) timed out; the "
"network or node may be down.\n",
- libcfs_id2str(peer->ksnp_id),
+ libcfs_id2str(peer_ni->ksnp_id),
&conn->ksnc_ipaddr,
conn->ksnc_port);
break;
CNETERR("An unexpected network error %d "
"occurred with %s "
"(%pI4h:%d\n", error,
- libcfs_id2str(peer->ksnp_id),
+ libcfs_id2str(peer_ni->ksnp_id),
&conn->ksnc_ipaddr,
conn->ksnc_port);
break;
ksocknal_conn_addref(conn);
CNETERR("Timeout receiving from %s (%pI4h:%d), "
"state %d wanted %d left %d\n",
- libcfs_id2str(peer->ksnp_id),
+ libcfs_id2str(peer_ni->ksnp_id),
&conn->ksnc_ipaddr,
conn->ksnc_port,
conn->ksnc_rx_state,
ksocknal_conn_addref(conn);
CNETERR("Timeout sending data to %s (%pI4h:%d) "
"the network or that node may be down.\n",
- libcfs_id2str(peer->ksnp_id),
+ libcfs_id2str(peer_ni->ksnp_id),
&conn->ksnc_ipaddr, conn->ksnc_port);
return (conn);
}
}
static inline void
-ksocknal_flush_stale_txs(ksock_peer_t *peer)
+ksocknal_flush_stale_txs(ksock_peer_ni_t *peer_ni)
{
ksock_tx_t *tx;
struct list_head stale_txs = LIST_HEAD_INIT(stale_txs);
write_lock_bh(&ksocknal_data.ksnd_global_lock);
- while (!list_empty(&peer->ksnp_tx_queue)) {
- tx = list_entry(peer->ksnp_tx_queue.next,
+ while (!list_empty(&peer_ni->ksnp_tx_queue)) {
+ tx = list_entry(peer_ni->ksnp_tx_queue.next,
ksock_tx_t, tx_list);
if (!cfs_time_aftereq(cfs_time_current(),
write_unlock_bh(&ksocknal_data.ksnd_global_lock);
- ksocknal_txlist_done(peer->ksnp_ni, &stale_txs, 1);
+ ksocknal_txlist_done(peer_ni->ksnp_ni, &stale_txs, 1);
}
static int
-ksocknal_send_keepalive_locked(ksock_peer_t *peer)
+ksocknal_send_keepalive_locked(ksock_peer_ni_t *peer_ni)
__must_hold(&ksocknal_data.ksnd_global_lock)
{
ksock_sched_t *sched;
ksock_tx_t *tx;
/* last_alive will be updated by create_conn */
- if (list_empty(&peer->ksnp_conns))
+ if (list_empty(&peer_ni->ksnp_conns))
return 0;
- if (peer->ksnp_proto != &ksocknal_protocol_v3x)
+ if (peer_ni->ksnp_proto != &ksocknal_protocol_v3x)
return 0;
if (*ksocknal_tunables.ksnd_keepalive <= 0 ||
cfs_time_before(cfs_time_current(),
- cfs_time_add(peer->ksnp_last_alive,
+ cfs_time_add(peer_ni->ksnp_last_alive,
cfs_time_seconds(*ksocknal_tunables.ksnd_keepalive))))
return 0;
if (cfs_time_before(cfs_time_current(),
- peer->ksnp_send_keepalive))
+ peer_ni->ksnp_send_keepalive))
return 0;
/* retry 10 secs later, so we wouldn't put pressure
- * on this peer if we failed to send keepalive this time */
- peer->ksnp_send_keepalive = cfs_time_shift(10);
+ * on this peer_ni if we failed to send keepalive this time */
+ peer_ni->ksnp_send_keepalive = cfs_time_shift(10);
- conn = ksocknal_find_conn_locked(peer, NULL, 1);
+ conn = ksocknal_find_conn_locked(peer_ni, NULL, 1);
if (conn != NULL) {
sched = conn->ksnc_scheduler;
return -ENOMEM;
}
- if (ksocknal_launch_packet(peer->ksnp_ni, tx, peer->ksnp_id) == 0) {
+ if (ksocknal_launch_packet(peer_ni->ksnp_ni, tx, peer_ni->ksnp_id) == 0) {
read_lock(&ksocknal_data.ksnd_global_lock);
return 1;
}
ksocknal_check_peer_timeouts (int idx)
{
struct list_head *peers = &ksocknal_data.ksnd_peers[idx];
- ksock_peer_t *peer;
+ ksock_peer_ni_t *peer_ni;
ksock_conn_t *conn;
ksock_tx_t *tx;
* take a look... */
read_lock(&ksocknal_data.ksnd_global_lock);
- list_for_each_entry(peer, peers, ksnp_list) {
+ list_for_each_entry(peer_ni, peers, ksnp_list) {
ksock_tx_t *tx_stale;
cfs_time_t deadline = 0;
int resid = 0;
int n = 0;
- if (ksocknal_send_keepalive_locked(peer) != 0) {
+ if (ksocknal_send_keepalive_locked(peer_ni) != 0) {
read_unlock(&ksocknal_data.ksnd_global_lock);
goto again;
}
- conn = ksocknal_find_timed_out_conn (peer);
+ conn = ksocknal_find_timed_out_conn (peer_ni);
if (conn != NULL) {
read_unlock(&ksocknal_data.ksnd_global_lock);
ksocknal_close_conn_and_siblings (conn, -ETIMEDOUT);
/* NB we won't find this one again, but we can't
- * just proceed with the next peer, since we dropped
+ * just proceed with the next peer_ni, since we dropped
* ksnd_global_lock and it might be dead already! */
ksocknal_conn_decref(conn);
goto again;
/* we can't process stale txs right here because we're
* holding only shared lock */
- if (!list_empty(&peer->ksnp_tx_queue)) {
+ if (!list_empty(&peer_ni->ksnp_tx_queue)) {
ksock_tx_t *tx =
- list_entry(peer->ksnp_tx_queue.next,
+ list_entry(peer_ni->ksnp_tx_queue.next,
ksock_tx_t, tx_list);
if (cfs_time_aftereq(cfs_time_current(),
tx->tx_deadline)) {
- ksocknal_peer_addref(peer);
+ ksocknal_peer_addref(peer_ni);
read_unlock(&ksocknal_data.ksnd_global_lock);
- ksocknal_flush_stale_txs(peer);
+ ksocknal_flush_stale_txs(peer_ni);
- ksocknal_peer_decref(peer);
+ ksocknal_peer_decref(peer_ni);
goto again;
}
}
- if (list_empty(&peer->ksnp_zc_req_list))
+ if (list_empty(&peer_ni->ksnp_zc_req_list))
continue;
tx_stale = NULL;
- spin_lock(&peer->ksnp_lock);
- list_for_each_entry(tx, &peer->ksnp_zc_req_list, tx_zc_list) {
+ spin_lock(&peer_ni->ksnp_lock);
+ list_for_each_entry(tx, &peer_ni->ksnp_zc_req_list, tx_zc_list) {
if (!cfs_time_aftereq(cfs_time_current(),
tx->tx_deadline))
break;
}
if (tx_stale == NULL) {
- spin_unlock(&peer->ksnp_lock);
+ spin_unlock(&peer_ni->ksnp_lock);
continue;
}
conn = tx_stale->tx_conn;
ksocknal_conn_addref(conn);
- spin_unlock(&peer->ksnp_lock);
+ spin_unlock(&peer_ni->ksnp_lock);
read_unlock(&ksocknal_data.ksnd_global_lock);
- CERROR("Total %d stale ZC_REQs for peer %s detected; the "
+ CERROR("Total %d stale ZC_REQs for peer_ni %s detected; the "
"oldest(%p) timed out %ld secs ago, "
"resid: %d, wmem: %d\n",
- n, libcfs_nid2str(peer->ksnp_id.nid), tx_stale,
+ n, libcfs_nid2str(peer_ni->ksnp_id.nid), tx_stale,
cfs_duration_sec(cfs_time_current() - deadline),
resid, conn->ksnc_sock->sk->sk_wmem_queued);
int chunk = ksocknal_data.ksnd_peer_hash_size;
/* Time to check for timeouts on a few more peers: I do
- * checks every 'p' seconds on a proportion of the peer
+ * checks every 'p' seconds on a proportion of the peer_ni
* table and I need to check every connection 'n' times
* within a timeout interval, to ensure I detect a
* timeout on any connection within (n+1)/n times the
LASSERT (!conn->ksnc_closing);
if (rc != 0) {
- CERROR ("Error %d getting sock peer IP\n", rc);
+ CERROR ("Error %d getting sock peer_ni IP\n", rc);
return rc;
}
/* Remind the socket to ACK eagerly. If I don't, the socket might
* think I'm about to send something it could piggy-back the ACK
* on, introducing delay in completing zero-copy sends in my
- * peer. */
+ * peer_ni. */
kernel_setsockopt(sock, SOL_TCP, TCP_QUICKACK,
(char *)&opt, sizeof(opt));
static int
ksocknal_handle_zcreq(ksock_conn_t *c, __u64 cookie, int remote)
{
- ksock_peer_t *peer = c->ksnc_peer;
+ ksock_peer_ni_t *peer_ni = c->ksnc_peer;
ksock_conn_t *conn;
ksock_tx_t *tx;
int rc;
read_lock(&ksocknal_data.ksnd_global_lock);
- conn = ksocknal_find_conn_locked(peer, NULL, !!remote);
+ conn = ksocknal_find_conn_locked(peer_ni, NULL, !!remote);
if (conn != NULL) {
ksock_sched_t *sched = conn->ksnc_scheduler;
if (tx == NULL)
return -ENOMEM;
- if ((rc = ksocknal_launch_packet(peer->ksnp_ni, tx, peer->ksnp_id)) == 0)
+ if ((rc = ksocknal_launch_packet(peer_ni->ksnp_ni, tx, peer_ni->ksnp_id)) == 0)
return 0;
ksocknal_free_tx(tx);
static int
ksocknal_handle_zcack(ksock_conn_t *conn, __u64 cookie1, __u64 cookie2)
{
- ksock_peer_t *peer = conn->ksnc_peer;
+ ksock_peer_ni_t *peer_ni = conn->ksnc_peer;
ksock_tx_t *tx;
ksock_tx_t *tmp;
struct list_head zlist = LIST_HEAD_INIT(zlist);
return count == 1 ? 0 : -EPROTO;
}
- spin_lock(&peer->ksnp_lock);
+ spin_lock(&peer_ni->ksnp_lock);
list_for_each_entry_safe(tx, tmp,
- &peer->ksnp_zc_req_list, tx_zc_list) {
+ &peer_ni->ksnp_zc_req_list, tx_zc_list) {
__u64 c = tx->tx_msg.ksm_zc_cookies[0];
if (c == cookie1 || c == cookie2 || (cookie1 < c && c < cookie2)) {
}
}
- spin_unlock(&peer->ksnp_lock);
+ spin_unlock(&peer_ni->ksnp_lock);
while (!list_empty(&zlist)) {
tx = list_entry(zlist.next, ksock_tx_t, tx_zc_list);
if (flip)
__swab64s(&cr.acr_nid);
- ni = lnet_net2ni(LNET_NIDNET(cr.acr_nid));
- if (ni == NULL || /* no matching net */
+ ni = lnet_nid2ni_addref(cr.acr_nid);
+ if (ni == NULL || /* no matching net */
ni->ni_nid != cr.acr_nid) { /* right NET, wrong NID! */
if (ni != NULL)
lnet_ni_decref(ni);
return -EPERM;
}
- if (ni->ni_lnd->lnd_accept == NULL) {
+ if (ni->ni_net->net_lnd->lnd_accept == NULL) {
/* This catches a request for the loopback LND */
lnet_ni_decref(ni);
LCONSOLE_ERROR_MSG(0x121, "Refusing connection from %pI4h "
CDEBUG(D_NET, "Accept %s from %pI4h\n",
libcfs_nid2str(cr.acr_nid), &peer_ip);
- rc = ni->ni_lnd->lnd_accept(ni, sock);
+ rc = ni->ni_net->net_lnd->lnd_accept(ni, sock);
lnet_ni_decref(ni);
return rc;
if (rc <= 0)
return rc;
- if (lnet_count_acceptor_nis() == 0) /* not required */
+ if (lnet_count_acceptor_nets() == 0) /* not required */
return 0;
task = kthread_run(lnet_acceptor, (void *)(uintptr_t)secure,
module_param(rnet_htable_size, int, 0444);
MODULE_PARM_DESC(rnet_htable_size, "size of remote network hash table");
+static int use_tcp_bonding = false;
+module_param(use_tcp_bonding, int, 0444);
+MODULE_PARM_DESC(use_tcp_bonding,
+ "Set to 1 to use socklnd bonding. 0 to use Multi-Rail");
+
+unsigned int lnet_numa_range = 0;
+module_param(lnet_numa_range, uint, 0444);
+MODULE_PARM_DESC(lnet_numa_range,
+ "NUMA range to consider during Multi-Rail selection");
+
+/*
+ * This sequence number keeps track of how many times DLC was used to
+ * update the local NIs. It is incremented when a NI is added or
+ * removed and checked when sending a message to determine if there is
+ * a need to re-run the selection algorithm. See lnet_select_pathway()
+ * for more details on its usage.
+ */
+static atomic_t lnet_dlc_seq_no = ATOMIC_INIT(0);
+
static int lnet_ping(lnet_process_id_t id, signed long timeout,
lnet_process_id_t __user *ids, int n_ids);
the_lnet.ln_pid = requested_pid;
INIT_LIST_HEAD(&the_lnet.ln_test_peers);
- INIT_LIST_HEAD(&the_lnet.ln_nis);
- INIT_LIST_HEAD(&the_lnet.ln_nis_cpt);
- INIT_LIST_HEAD(&the_lnet.ln_nis_zombie);
+ INIT_LIST_HEAD(&the_lnet.ln_peers);
+ INIT_LIST_HEAD(&the_lnet.ln_remote_peer_ni_list);
+ INIT_LIST_HEAD(&the_lnet.ln_nets);
INIT_LIST_HEAD(&the_lnet.ln_routers);
INIT_LIST_HEAD(&the_lnet.ln_drop_rules);
INIT_LIST_HEAD(&the_lnet.ln_delay_rules);
LASSERT(the_lnet.ln_refcount == 0);
LASSERT(list_empty(&the_lnet.ln_test_peers));
- LASSERT(list_empty(&the_lnet.ln_nis));
- LASSERT(list_empty(&the_lnet.ln_nis_cpt));
- LASSERT(list_empty(&the_lnet.ln_nis_zombie));
+ LASSERT(list_empty(&the_lnet.ln_nets));
lnet_portals_destroy();
lnet_res_container_cleanup(&the_lnet.ln_eq_container);
lnet_msg_containers_destroy();
- lnet_peer_tables_destroy();
+ lnet_peer_uninit();
lnet_rtrpools_free(0);
if (the_lnet.ln_counters != NULL) {
}
lnet_ni_t *
-lnet_net2ni_locked(__u32 net, int cpt)
+lnet_net2ni_locked(__u32 net_id, int cpt)
{
- struct list_head *tmp;
- lnet_ni_t *ni;
+ struct lnet_ni *ni;
+ struct lnet_net *net;
LASSERT(cpt != LNET_LOCK_EX);
- list_for_each(tmp, &the_lnet.ln_nis) {
- ni = list_entry(tmp, lnet_ni_t, ni_list);
-
- if (LNET_NIDNET(ni->ni_nid) == net) {
- lnet_ni_addref_locked(ni, cpt);
+ list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+ if (net->net_id == net_id) {
+ ni = list_entry(net->net_ni_list.next, struct lnet_ni,
+ ni_netlist);
return ni;
}
}
}
lnet_ni_t *
-lnet_net2ni(__u32 net)
+lnet_net2ni_addref(__u32 net)
{
lnet_ni_t *ni;
lnet_net_lock(0);
ni = lnet_net2ni_locked(net, 0);
+ if (ni)
+ lnet_ni_addref_locked(ni, 0);
lnet_net_unlock(0);
return ni;
}
-EXPORT_SYMBOL(lnet_net2ni);
+EXPORT_SYMBOL(lnet_net2ni_addref);
+
+struct lnet_net *
+lnet_get_net_locked(__u32 net_id)
+{
+ struct lnet_net *net;
+
+ list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+ if (net->net_id == net_id)
+ return net;
+ }
+
+ return NULL;
+}
-static unsigned int
+unsigned int
lnet_nid_cpt_hash(lnet_nid_t nid, unsigned int number)
{
__u64 key = nid;
}
int
-lnet_cpt_of_nid_locked(lnet_nid_t nid)
+lnet_cpt_of_nid_locked(lnet_nid_t nid, struct lnet_ni *ni)
{
- struct lnet_ni *ni;
+ struct lnet_net *net;
/* must called with hold of lnet_net_lock */
if (LNET_CPT_NUMBER == 1)
return 0; /* the only one */
- /* take lnet_net_lock(any) would be OK */
- if (!list_empty(&the_lnet.ln_nis_cpt)) {
- list_for_each_entry(ni, &the_lnet.ln_nis_cpt, ni_cptlist) {
- if (LNET_NIDNET(ni->ni_nid) != LNET_NIDNET(nid))
- continue;
+ /*
+ * If NI is provided then use the CPT identified in the NI cpt
+ * list if one exists. If one doesn't exist, then that NI is
+ * associated with all CPTs and it follows that the net it belongs
+ * to is implicitly associated with all CPTs, so just hash the nid
+ * and return that.
+ */
+ if (ni != NULL) {
+ if (ni->ni_cpts != NULL)
+ return ni->ni_cpts[lnet_nid_cpt_hash(nid,
+ ni->ni_ncpts)];
+ else
+ return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
+ }
- LASSERT(ni->ni_cpts != NULL);
- return ni->ni_cpts[lnet_nid_cpt_hash
- (nid, ni->ni_ncpts)];
- }
+ /* no NI provided so look at the net */
+ net = lnet_get_net_locked(LNET_NIDNET(nid));
+
+ if (net != NULL && net->net_cpts != NULL) {
+ return net->net_cpts[lnet_nid_cpt_hash(nid, net->net_ncpts)];
}
return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
}
int
-lnet_cpt_of_nid(lnet_nid_t nid)
+lnet_cpt_of_nid(lnet_nid_t nid, struct lnet_ni *ni)
{
int cpt;
int cpt2;
if (LNET_CPT_NUMBER == 1)
return 0; /* the only one */
- if (list_empty(&the_lnet.ln_nis_cpt))
- return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
-
cpt = lnet_net_lock_current();
- cpt2 = lnet_cpt_of_nid_locked(nid);
+
+ cpt2 = lnet_cpt_of_nid_locked(nid, ni);
+
lnet_net_unlock(cpt);
return cpt2;
EXPORT_SYMBOL(lnet_cpt_of_nid);
int
-lnet_islocalnet(__u32 net)
+lnet_islocalnet(__u32 net_id)
{
- struct lnet_ni *ni;
+ struct lnet_net *net;
int cpt;
+ bool local;
cpt = lnet_net_lock_current();
- ni = lnet_net2ni_locked(net, cpt);
- if (ni != NULL)
- lnet_ni_decref_locked(ni, cpt);
+ net = lnet_get_net_locked(net_id);
+
+ local = net != NULL;
lnet_net_unlock(cpt);
- return ni != NULL;
+ return local;
+}
+
+bool
+lnet_is_ni_healthy_locked(struct lnet_ni *ni)
+{
+ if (ni->ni_state == LNET_NI_STATE_ACTIVE ||
+ ni->ni_state == LNET_NI_STATE_DEGRADED)
+ return true;
+
+ return false;
}
lnet_ni_t *
lnet_nid2ni_locked(lnet_nid_t nid, int cpt)
{
+ struct lnet_net *net;
struct lnet_ni *ni;
- struct list_head *tmp;
LASSERT(cpt != LNET_LOCK_EX);
- list_for_each(tmp, &the_lnet.ln_nis) {
- ni = list_entry(tmp, lnet_ni_t, ni_list);
-
- if (ni->ni_nid == nid) {
- lnet_ni_addref_locked(ni, cpt);
- return ni;
+ list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+ list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
+ if (ni->ni_nid == nid)
+ return ni;
}
}
return NULL;
}
+lnet_ni_t *
+lnet_nid2ni_addref(lnet_nid_t nid)
+{
+ lnet_ni_t *ni;
+
+ lnet_net_lock(0);
+ ni = lnet_nid2ni_locked(nid, 0);
+ if (ni)
+ lnet_ni_addref_locked(ni, 0);
+ lnet_net_unlock(0);
+
+ return ni;
+}
+EXPORT_SYMBOL(lnet_nid2ni_addref);
+
int
lnet_islocalnid(lnet_nid_t nid)
{
cpt = lnet_net_lock_current();
ni = lnet_nid2ni_locked(nid, cpt);
- if (ni != NULL)
- lnet_ni_decref_locked(ni, cpt);
lnet_net_unlock(cpt);
return ni != NULL;
}
int
-lnet_count_acceptor_nis (void)
+lnet_count_acceptor_nets(void)
{
/* Return the # of NIs that need the acceptor. */
int count = 0;
- struct list_head *tmp;
- struct lnet_ni *ni;
+ struct lnet_net *net;
int cpt;
cpt = lnet_net_lock_current();
- list_for_each(tmp, &the_lnet.ln_nis) {
- ni = list_entry(tmp, lnet_ni_t, ni_list);
-
- if (ni->ni_lnd->lnd_accept != NULL)
+ list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+ /* all socklnd type networks should have the acceptor
+ * thread started */
+ if (net->net_lnd->lnd_accept != NULL)
count++;
}
}
static inline int
+lnet_get_net_ni_count_locked(struct lnet_net *net)
+{
+ struct lnet_ni *ni;
+ int count = 0;
+
+ list_for_each_entry(ni, &net->net_ni_list, ni_netlist)
+ count++;
+
+ return count;
+}
+
+static inline int
+lnet_get_net_ni_count_pre(struct lnet_net *net)
+{
+ struct lnet_ni *ni;
+ int count = 0;
+
+ list_for_each_entry(ni, &net->net_ni_added, ni_netlist)
+ count++;
+
+ return count;
+}
+
+static inline int
lnet_get_ni_count(void)
{
- struct lnet_ni *ni;
- int count = 0;
+ struct lnet_ni *ni;
+ struct lnet_net *net;
+ int count = 0;
lnet_net_lock(0);
- list_for_each_entry(ni, &the_lnet.ln_nis, ni_list)
- count++;
+ list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+ list_for_each_entry(ni, &net->net_ni_list, ni_netlist)
+ count++;
+ }
lnet_net_unlock(0);
static void
lnet_ping_info_destroy(void)
{
+ struct lnet_net *net;
struct lnet_ni *ni;
lnet_net_lock(LNET_LOCK_EX);
- list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
- lnet_ni_lock(ni);
- ni->ni_status = NULL;
- lnet_ni_unlock(ni);
+ list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+ list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
+ lnet_ni_lock(ni);
+ ni->ni_status = NULL;
+ lnet_ni_unlock(ni);
+ }
}
lnet_ping_info_free(the_lnet.ln_ping_info);
lnet_ping_info_install_locked(struct lnet_ping_info *ping_info)
{
int i;
- lnet_ni_t *ni;
+ struct lnet_ni *ni;
+ struct lnet_net *net;
struct lnet_ni_status *ns;
i = 0;
- list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
- LASSERT(i < ping_info->pi_nnis);
+ list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+ list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
+ LASSERT(i < ping_info->pi_nnis);
- ns = &ping_info->pi_ni[i];
+ ns = &ping_info->pi_ni[i];
- ns->ns_nid = ni->ni_nid;
+ ns->ns_nid = ni->ni_nid;
- lnet_ni_lock(ni);
- ns->ns_status = (ni->ni_status != NULL) ?
- ni->ni_status->ns_status : LNET_NI_STATUS_UP;
- ni->ni_status = ns;
- lnet_ni_unlock(ni);
+ lnet_ni_lock(ni);
+ ns->ns_status = (ni->ni_status != NULL) ?
+ ni->ni_status->ns_status :
+ LNET_NI_STATUS_UP;
+ ni->ni_status = ns;
+ lnet_ni_unlock(ni);
+
+ i++;
+ }
- i++;
}
}
LASSERT(ni->ni_ncpts >= 1);
if (ni->ni_ncpts == 1)
- return ni->ni_maxtxcredits;
+ return ni->ni_net->net_tunables.lct_max_tx_credits;
- credits = ni->ni_maxtxcredits / ni->ni_ncpts;
- credits = max(credits, 8 * ni->ni_peertxcredits);
- credits = min(credits, ni->ni_maxtxcredits);
+ credits = ni->ni_net->net_tunables.lct_max_tx_credits / ni->ni_ncpts;
+ credits = max(credits, 8 * ni->ni_net->net_tunables.lct_peer_tx_credits);
+ credits = min(credits, ni->ni_net->net_tunables.lct_max_tx_credits);
return credits;
}
}
/* move it to zombie list and nobody can find it anymore */
- LASSERT(!list_empty(&ni->ni_list));
- list_move(&ni->ni_list, &the_lnet.ln_nis_zombie);
- lnet_ni_decref_locked(ni, 0); /* drop ln_nis' ref */
+ LASSERT(!list_empty(&ni->ni_netlist));
+ list_move(&ni->ni_netlist, &ni->ni_net->net_ni_zombie);
+ lnet_ni_decref_locked(ni, 0);
}
static void
-lnet_clear_zombies_nis_locked(void)
+lnet_clear_zombies_nis_locked(struct lnet_net *net)
{
int i;
int islo;
lnet_ni_t *ni;
+ struct list_head *zombie_list = &net->net_ni_zombie;
- /* Now wait for the NI's I just nuked to show up on ln_zombie_nis
- * and shut them down in guaranteed thread context */
+ /*
+ * Now wait for the NIs I just nuked to show up on the zombie
+ * list and shut them down in guaranteed thread context
+ */
i = 2;
- while (!list_empty(&the_lnet.ln_nis_zombie)) {
+ while (!list_empty(zombie_list)) {
int *ref;
int j;
- ni = list_entry(the_lnet.ln_nis_zombie.next,
- lnet_ni_t, ni_list);
- list_del_init(&ni->ni_list);
+ ni = list_entry(zombie_list->next,
+ lnet_ni_t, ni_netlist);
+ list_del_init(&ni->ni_netlist);
+ /* the ni should be in deleting state. If it's not it's
+ * a bug */
+ LASSERT(ni->ni_state == LNET_NI_STATE_DELETING);
cfs_percpt_for_each(ref, j, ni->ni_refs) {
if (*ref == 0)
continue;
/* still busy, add it back to zombie list */
- list_add(&ni->ni_list, &the_lnet.ln_nis_zombie);
+ list_add(&ni->ni_netlist, zombie_list);
break;
}
- if (!list_empty(&ni->ni_list)) {
+ if (!list_empty(&ni->ni_netlist)) {
lnet_net_unlock(LNET_LOCK_EX);
++i;
if ((i & (-i)) == i) {
continue;
}
- ni->ni_lnd->lnd_refcount--;
lnet_net_unlock(LNET_LOCK_EX);
- islo = ni->ni_lnd->lnd_type == LOLND;
+ islo = ni->ni_net->net_lnd->lnd_type == LOLND;
LASSERT(!in_interrupt());
- (ni->ni_lnd->lnd_shutdown)(ni);
-
- /* can't deref lnd anymore now; it might have unregistered
- * itself... */
+ (net->net_lnd->lnd_shutdown)(ni);
if (!islo)
CDEBUG(D_LNI, "Removed LNI %s\n",
}
}
-static void
-lnet_shutdown_lndnis(void)
-{
- int i;
- lnet_ni_t *ni;
-
- /* NB called holding the global mutex */
-
- /* All quiet on the API front */
- LASSERT(!the_lnet.ln_shutdown);
- LASSERT(the_lnet.ln_refcount == 0);
- LASSERT(list_empty(&the_lnet.ln_nis_zombie));
-
- lnet_net_lock(LNET_LOCK_EX);
- the_lnet.ln_shutdown = 1; /* flag shutdown */
-
- /* Unlink NIs from the global table */
- while (!list_empty(&the_lnet.ln_nis)) {
- ni = list_entry(the_lnet.ln_nis.next,
- lnet_ni_t, ni_list);
- lnet_ni_unlink_locked(ni);
- }
-
- /* Drop the cached loopback NI. */
- if (the_lnet.ln_loni != NULL) {
- lnet_ni_decref_locked(the_lnet.ln_loni, 0);
- the_lnet.ln_loni = NULL;
- }
-
- lnet_net_unlock(LNET_LOCK_EX);
-
- /* Clear lazy portals and drop delayed messages which hold refs
- * on their lnet_msg_t::msg_rxpeer */
- for (i = 0; i < the_lnet.ln_nportals; i++)
- LNetClearLazyPortal(i);
-
- /* Clear the peer table and wait for all peers to go (they hold refs on
- * their NIs) */
- lnet_peer_tables_cleanup(NULL);
-
- lnet_net_lock(LNET_LOCK_EX);
-
- lnet_clear_zombies_nis_locked();
- the_lnet.ln_shutdown = 0;
- lnet_net_unlock(LNET_LOCK_EX);
-}
-
/* shutdown down the NI and release refcount */
static void
lnet_shutdown_lndni(struct lnet_ni *ni)
{
int i;
+ struct lnet_net *net = ni->ni_net;
lnet_net_lock(LNET_LOCK_EX);
+ ni->ni_state = LNET_NI_STATE_DELETING;
lnet_ni_unlink_locked(ni);
+ lnet_incr_dlc_seq();
lnet_net_unlock(LNET_LOCK_EX);
/* clear messages for this NI on the lazy portal */
for (i = 0; i < the_lnet.ln_nportals; i++)
lnet_clear_lazy_portal(ni, i, "Shutting down NI");
- /* Do peer table cleanup for this ni */
- lnet_peer_tables_cleanup(ni);
-
lnet_net_lock(LNET_LOCK_EX);
- lnet_clear_zombies_nis_locked();
+ lnet_clear_zombies_nis_locked(net);
lnet_net_unlock(LNET_LOCK_EX);
}
-static int
-lnet_startup_lndni(struct lnet_ni *ni, struct lnet_ioctl_config_data *conf)
+static void
+lnet_shutdown_lndnet(struct lnet_net *net)
{
- struct lnet_ioctl_config_lnd_tunables *lnd_tunables = NULL;
- int rc = -EINVAL;
- __u32 lnd_type;
- lnd_t *lnd;
- struct lnet_tx_queue *tq;
- int i;
+ struct lnet_ni *ni;
+
+ lnet_net_lock(LNET_LOCK_EX);
- lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid));
+ net->net_state = LNET_NET_STATE_DELETING;
- LASSERT(libcfs_isknown_lnd(lnd_type));
+ list_del_init(&net->net_list);
- if (lnd_type == CIBLND || lnd_type == OPENIBLND ||
- lnd_type == IIBLND || lnd_type == VIBLND) {
- CERROR("LND %s obsoleted\n", libcfs_lnd2str(lnd_type));
- goto failed0;
+ while (!list_empty(&net->net_ni_list)) {
+ ni = list_entry(net->net_ni_list.next,
+ lnet_ni_t, ni_netlist);
+ lnet_net_unlock(LNET_LOCK_EX);
+ lnet_shutdown_lndni(ni);
+ lnet_net_lock(LNET_LOCK_EX);
}
- /* Make sure this new NI is unique. */
+ lnet_net_unlock(LNET_LOCK_EX);
+
+ /* Do peer table cleanup for this net */
+ lnet_peer_tables_cleanup(net);
+
lnet_net_lock(LNET_LOCK_EX);
- rc = lnet_net_unique(LNET_NIDNET(ni->ni_nid), &the_lnet.ln_nis);
+ /*
+ * decrement ref count on lnd only when the entire network goes
+ * away
+ */
+ net->net_lnd->lnd_refcount--;
+
lnet_net_unlock(LNET_LOCK_EX);
- if (!rc) {
- if (lnd_type == LOLND) {
- lnet_ni_free(ni);
- return 0;
- }
+ lnet_net_free(net);
+}
- CERROR("Net %s is not unique\n",
- libcfs_net2str(LNET_NIDNET(ni->ni_nid)));
+static void
+lnet_shutdown_lndnets(void)
+{
+ struct lnet_net *net;
- rc = -EEXIST;
- goto failed0;
- }
+ /* NB called holding the global mutex */
- mutex_lock(&the_lnet.ln_lnd_mutex);
- lnd = lnet_find_lnd_by_type(lnd_type);
+ /* All quiet on the API front */
+ LASSERT(!the_lnet.ln_shutdown);
+ LASSERT(the_lnet.ln_refcount == 0);
- if (lnd == NULL) {
- mutex_unlock(&the_lnet.ln_lnd_mutex);
- rc = request_module("%s", libcfs_lnd2modname(lnd_type));
- mutex_lock(&the_lnet.ln_lnd_mutex);
+ lnet_net_lock(LNET_LOCK_EX);
+ the_lnet.ln_shutdown = 1; /* flag shutdown */
- lnd = lnet_find_lnd_by_type(lnd_type);
- if (lnd == NULL) {
- mutex_unlock(&the_lnet.ln_lnd_mutex);
- CERROR("Can't load LND %s, module %s, rc=%d\n",
- libcfs_lnd2str(lnd_type),
- libcfs_lnd2modname(lnd_type), rc);
-#ifndef HAVE_MODULE_LOADING_SUPPORT
- LCONSOLE_ERROR_MSG(0x104, "Your kernel must be "
- "compiled with kernel module "
- "loading support.");
-#endif
- rc = -EINVAL;
- goto failed0;
- }
+ while (!list_empty(&the_lnet.ln_nets)) {
+ /*
+ * move the nets to the zombie list to avoid them being
+ * picked up for new work. LONET is also included in the
+ * Nets that will be moved to the zombie list
+ */
+ net = list_entry(the_lnet.ln_nets.next,
+ struct lnet_net, net_list);
+ list_move(&net->net_list, &the_lnet.ln_net_zombie);
}
- lnet_net_lock(LNET_LOCK_EX);
- lnd->lnd_refcount++;
+ /* Drop the cached loopback Net. */
+ if (the_lnet.ln_loni != NULL) {
+ lnet_ni_decref_locked(the_lnet.ln_loni, 0);
+ the_lnet.ln_loni = NULL;
+ }
lnet_net_unlock(LNET_LOCK_EX);
- ni->ni_lnd = lnd;
+ /* iterate through the net zombie list and delete each net */
+ while (!list_empty(&the_lnet.ln_net_zombie)) {
+ net = list_entry(the_lnet.ln_net_zombie.next,
+ struct lnet_net, net_list);
+ lnet_shutdown_lndnet(net);
+ }
- if (conf && conf->cfg_hdr.ioc_len > sizeof(*conf))
- lnd_tunables = (struct lnet_ioctl_config_lnd_tunables *)conf->cfg_bulk;
+ lnet_net_lock(LNET_LOCK_EX);
+ the_lnet.ln_shutdown = 0;
+ lnet_net_unlock(LNET_LOCK_EX);
+}
- if (lnd_tunables != NULL) {
- LIBCFS_ALLOC(ni->ni_lnd_tunables,
- sizeof(*ni->ni_lnd_tunables));
- if (ni->ni_lnd_tunables == NULL) {
- mutex_unlock(&the_lnet.ln_lnd_mutex);
- rc = -ENOMEM;
- goto failed0;
- }
- memcpy(ni->ni_lnd_tunables, lnd_tunables,
- sizeof(*ni->ni_lnd_tunables));
- }
+static int
+lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
+{
+ int rc = -EINVAL;
+ struct lnet_tx_queue *tq;
+ int i;
+ struct lnet_net *net = ni->ni_net;
+
+ mutex_lock(&the_lnet.ln_lnd_mutex);
- /* If given some LND tunable parameters, parse those now to
- * override the values in the NI structure. */
- if (conf) {
- if (conf->cfg_config_u.cfg_net.net_peer_rtr_credits >= 0)
- ni->ni_peerrtrcredits =
- conf->cfg_config_u.cfg_net.net_peer_rtr_credits;
- if (conf->cfg_config_u.cfg_net.net_peer_timeout >= 0)
- ni->ni_peertimeout =
- conf->cfg_config_u.cfg_net.net_peer_timeout;
- if (conf->cfg_config_u.cfg_net.net_peer_tx_credits >= 0)
- ni->ni_peertxcredits =
- conf->cfg_config_u.cfg_net.net_peer_tx_credits;
- if (conf->cfg_config_u.cfg_net.net_max_tx_credits >= 0)
- ni->ni_maxtxcredits =
- conf->cfg_config_u.cfg_net.net_max_tx_credits;
+ if (tun) {
+ memcpy(&ni->ni_lnd_tunables, tun, sizeof(*tun));
+ ni->ni_lnd_tunables_set = true;
}
- rc = (lnd->lnd_startup)(ni);
+ rc = (net->net_lnd->lnd_startup)(ni);
mutex_unlock(&the_lnet.ln_lnd_mutex);
if (rc != 0) {
LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s\n",
- rc, libcfs_lnd2str(lnd->lnd_type));
+ rc, libcfs_lnd2str(net->net_lnd->lnd_type));
lnet_net_lock(LNET_LOCK_EX);
- lnd->lnd_refcount--;
+ net->net_lnd->lnd_refcount--;
lnet_net_unlock(LNET_LOCK_EX);
goto failed0;
}
- LASSERT(ni->ni_peertimeout <= 0 || lnd->lnd_query != NULL);
+ ni->ni_state = LNET_NI_STATE_ACTIVE;
- lnet_net_lock(LNET_LOCK_EX);
- /* refcount for ln_nis */
- lnet_ni_addref_locked(ni, 0);
- list_add_tail(&ni->ni_list, &the_lnet.ln_nis);
- if (ni->ni_cpts != NULL) {
- lnet_ni_addref_locked(ni, 0);
- list_add_tail(&ni->ni_cptlist, &the_lnet.ln_nis_cpt);
- }
-
- lnet_net_unlock(LNET_LOCK_EX);
-
- if (lnd->lnd_type == LOLND) {
+ /* We keep a reference on the loopback net through the loopback NI */
+ if (net->net_lnd->lnd_type == LOLND) {
lnet_ni_addref(ni);
LASSERT(the_lnet.ln_loni == NULL);
the_lnet.ln_loni = ni;
+ ni->ni_net->net_tunables.lct_peer_tx_credits = 0;
+ ni->ni_net->net_tunables.lct_peer_rtr_credits = 0;
+ ni->ni_net->net_tunables.lct_max_tx_credits = 0;
+ ni->ni_net->net_tunables.lct_peer_timeout = 0;
return 0;
}
- if (ni->ni_peertxcredits == 0 || ni->ni_maxtxcredits == 0) {
+ if (ni->ni_net->net_tunables.lct_peer_tx_credits == 0 ||
+ ni->ni_net->net_tunables.lct_max_tx_credits == 0) {
LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n",
- libcfs_lnd2str(lnd->lnd_type),
- ni->ni_peertxcredits == 0 ?
+ libcfs_lnd2str(net->net_lnd->lnd_type),
+ ni->ni_net->net_tunables.lct_peer_tx_credits == 0 ?
"" : "per-peer ");
/* shutdown the NI since if we get here then it must've already
* been started
tq->tq_credits = lnet_ni_tq_credits(ni);
}
+ atomic_set(&ni->ni_tx_credits,
+ lnet_ni_tq_credits(ni) * ni->ni_ncpts);
+
CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d/%d]\n",
- libcfs_nid2str(ni->ni_nid), ni->ni_peertxcredits,
+ libcfs_nid2str(ni->ni_nid),
+ ni->ni_net->net_tunables.lct_peer_tx_credits,
lnet_ni_tq_credits(ni) * LNET_CPT_NUMBER,
- ni->ni_peerrtrcredits, ni->ni_peertimeout);
+ ni->ni_net->net_tunables.lct_peer_rtr_credits,
+ ni->ni_net->net_tunables.lct_peer_timeout);
return 0;
failed0:
}
static int
-lnet_startup_lndnis(struct list_head *nilist)
+lnet_startup_lndnet(struct lnet_net *net, struct lnet_lnd_tunables *tun)
{
struct lnet_ni *ni;
+ struct lnet_net *net_l = NULL;
+ struct list_head local_ni_list;
int rc;
int ni_count = 0;
+ __u32 lnd_type;
+ lnd_t *lnd;
+ int peer_timeout =
+ net->net_tunables.lct_peer_timeout;
+ int maxtxcredits =
+ net->net_tunables.lct_max_tx_credits;
+ int peerrtrcredits =
+ net->net_tunables.lct_peer_rtr_credits;
- while (!list_empty(nilist)) {
- ni = list_entry(nilist->next, lnet_ni_t, ni_list);
- list_del(&ni->ni_list);
- rc = lnet_startup_lndni(ni, NULL);
+ INIT_LIST_HEAD(&local_ni_list);
- if (rc < 0)
- goto failed;
+ /*
+ * make sure that this net is unique. If it isn't then
+ * we are adding interfaces to an already existing network, and
+ * 'net' is just a convenient way to pass in the list.
+ * if it is unique we need to find the LND and load it if
+ * necessary.
+ */
+ if (lnet_net_unique(net->net_id, &the_lnet.ln_nets, &net_l)) {
+ lnd_type = LNET_NETTYP(net->net_id);
- ni_count++;
- }
+ LASSERT(libcfs_isknown_lnd(lnd_type));
- return ni_count;
-failed:
- lnet_shutdown_lndnis();
+ if (lnd_type == CIBLND || lnd_type == OPENIBLND ||
+ lnd_type == IIBLND || lnd_type == VIBLND) {
+ CERROR("LND %s obsoleted\n", libcfs_lnd2str(lnd_type));
+ rc = -EINVAL;
+ goto failed0;
+ }
- return rc;
-}
+ mutex_lock(&the_lnet.ln_lnd_mutex);
+ lnd = lnet_find_lnd_by_type(lnd_type);
-/**
- * Initialize LNet library.
- *
- * Automatically called at module loading time. Caller has to call
- * lnet_lib_exit() after a call to lnet_lib_init(), if and only if the
- * latter returned 0. It must be called exactly once.
- *
- * \retval 0 on success
- * \retval -ve on failures.
- */
-int lnet_lib_init(void)
-{
- int rc;
+ if (lnd == NULL) {
+ mutex_unlock(&the_lnet.ln_lnd_mutex);
+ rc = request_module("%s", libcfs_lnd2modname(lnd_type));
+ mutex_lock(&the_lnet.ln_lnd_mutex);
+
+ lnd = lnet_find_lnd_by_type(lnd_type);
+ if (lnd == NULL) {
+ mutex_unlock(&the_lnet.ln_lnd_mutex);
+ CERROR("Can't load LND %s, module %s, rc=%d\n",
+ libcfs_lnd2str(lnd_type),
+ libcfs_lnd2modname(lnd_type), rc);
+#ifndef HAVE_MODULE_LOADING_SUPPORT
+ LCONSOLE_ERROR_MSG(0x104, "Your kernel must be "
+ "compiled with kernel module "
+ "loading support.");
+#endif
+ rc = -EINVAL;
+ goto failed0;
+ }
+ }
- lnet_assert_wire_constants();
+ lnet_net_lock(LNET_LOCK_EX);
+ lnd->lnd_refcount++;
+ lnet_net_unlock(LNET_LOCK_EX);
- memset(&the_lnet, 0, sizeof(the_lnet));
+ net->net_lnd = lnd;
- /* refer to global cfs_cpt_table for now */
- the_lnet.ln_cpt_table = cfs_cpt_table;
- the_lnet.ln_cpt_number = cfs_cpt_number(cfs_cpt_table);
+ mutex_unlock(&the_lnet.ln_lnd_mutex);
- LASSERT(the_lnet.ln_cpt_number > 0);
- if (the_lnet.ln_cpt_number > LNET_CPT_MAX) {
- /* we are under risk of consuming all lh_cookie */
- CERROR("Can't have %d CPTs for LNet (max allowed is %d), "
- "please change setting of CPT-table and retry\n",
- the_lnet.ln_cpt_number, LNET_CPT_MAX);
- return -E2BIG;
+ net_l = net;
}
- while ((1 << the_lnet.ln_cpt_bits) < the_lnet.ln_cpt_number)
- the_lnet.ln_cpt_bits++;
+ /*
+ * net_l: if the network being added is unique then net_l
+ * will point to that network
+ * if the network being added is not unique then
+ * net_l points to the existing network.
+ *
+ * When we enter the loop below, we'll pick NIs off he
+ * network beign added and start them up, then add them to
+ * a local ni list. Once we've successfully started all
+ * the NIs then we join the local NI list (of started up
+ * networks) with the net_l->net_ni_list, which should
+ * point to the correct network to add the new ni list to
+ *
+ * If any of the new NIs fail to start up, then we want to
+ * iterate through the local ni list, which should include
+ * any NIs which were successfully started up, and shut
+ * them down.
+ *
+ * After than we want to delete the network being added,
+ * to avoid a memory leak.
+ */
- rc = lnet_create_locks();
- if (rc != 0) {
- CERROR("Can't create LNet global locks: %d\n", rc);
- return rc;
+ /*
+ * When a network uses TCP bonding then all its interfaces
+ * must be specified when the network is first defined: the
+ * TCP bonding code doesn't allow for interfaces to be added
+ * or removed.
+ */
+ if (net_l != net && net_l != NULL && use_tcp_bonding &&
+ LNET_NETTYP(net_l->net_id) == SOCKLND) {
+ rc = -EINVAL;
+ goto failed0;
}
- the_lnet.ln_refcount = 0;
- LNetInvalidateHandle(&the_lnet.ln_rc_eqh);
+ while (!list_empty(&net->net_ni_added)) {
+ ni = list_entry(net->net_ni_added.next, struct lnet_ni,
+ ni_netlist);
+ list_del_init(&ni->ni_netlist);
+
+ /* make sure that the the NI we're about to start
+ * up is actually unique. if it's not fail. */
+ if (!lnet_ni_unique_net(&net_l->net_ni_list,
+ ni->ni_interfaces[0])) {
+ rc = -EINVAL;
+ goto failed1;
+ }
+
+ /* adjust the pointer the parent network, just in case it
+ * the net is a duplicate */
+ ni->ni_net = net_l;
+
+ rc = lnet_startup_lndni(ni, tun);
+
+ LASSERT(ni->ni_net->net_tunables.lct_peer_timeout <= 0 ||
+ ni->ni_net->net_lnd->lnd_query != NULL);
+
+ if (rc < 0)
+ goto failed1;
+
+ lnet_ni_addref(ni);
+ list_add_tail(&ni->ni_netlist, &local_ni_list);
+
+ ni_count++;
+ }
+
+ lnet_net_lock(LNET_LOCK_EX);
+ list_splice_tail(&local_ni_list, &net_l->net_ni_list);
+ lnet_incr_dlc_seq();
+ lnet_net_unlock(LNET_LOCK_EX);
+
+ /* if the network is not unique then we don't want to keep
+ * it around after we're done. Free it. Otherwise add that
+ * net to the global the_lnet.ln_nets */
+ if (net_l != net && net_l != NULL) {
+ /*
+ * TODO - note. currently the tunables can not be updated
+ * once added
+ */
+ lnet_net_free(net);
+ } else {
+ net->net_state = LNET_NET_STATE_ACTIVE;
+ /*
+ * restore tunables after it has been overwitten by the
+ * lnd
+ */
+ if (peer_timeout != -1)
+ net->net_tunables.lct_peer_timeout = peer_timeout;
+ if (maxtxcredits != -1)
+ net->net_tunables.lct_max_tx_credits = maxtxcredits;
+ if (peerrtrcredits != -1)
+ net->net_tunables.lct_peer_rtr_credits = peerrtrcredits;
+
+ lnet_net_lock(LNET_LOCK_EX);
+ list_add_tail(&net->net_list, &the_lnet.ln_nets);
+ lnet_net_unlock(LNET_LOCK_EX);
+ }
+
+ return ni_count;
+
+failed1:
+ /*
+ * shutdown the new NIs that are being started up
+ * free the NET being started
+ */
+ while (!list_empty(&local_ni_list)) {
+ ni = list_entry(local_ni_list.next, struct lnet_ni,
+ ni_netlist);
+
+ lnet_shutdown_lndni(ni);
+ }
+
+failed0:
+ lnet_net_free(net);
+
+ return rc;
+}
+
+static int
+lnet_startup_lndnets(struct list_head *netlist)
+{
+ struct lnet_net *net;
+ int rc;
+ int ni_count = 0;
+
+ while (!list_empty(netlist)) {
+ net = list_entry(netlist->next, struct lnet_net, net_list);
+ list_del_init(&net->net_list);
+
+ rc = lnet_startup_lndnet(net, NULL);
+
+ if (rc < 0)
+ goto failed;
+
+ ni_count += rc;
+ }
+
+ return ni_count;
+failed:
+ lnet_shutdown_lndnets();
+
+ return rc;
+}
+
+/**
+ * Initialize LNet library.
+ *
+ * Automatically called at module loading time. Caller has to call
+ * lnet_lib_exit() after a call to lnet_lib_init(), if and only if the
+ * latter returned 0. It must be called exactly once.
+ *
+ * \retval 0 on success
+ * \retval -ve on failures.
+ */
+int lnet_lib_init(void)
+{
+ int rc;
+
+ lnet_assert_wire_constants();
+
+ memset(&the_lnet, 0, sizeof(the_lnet));
+
+ /* refer to global cfs_cpt_table for now */
+ the_lnet.ln_cpt_table = cfs_cpt_table;
+ the_lnet.ln_cpt_number = cfs_cpt_number(cfs_cpt_table);
+
+ LASSERT(the_lnet.ln_cpt_number > 0);
+ if (the_lnet.ln_cpt_number > LNET_CPT_MAX) {
+ /* we are under risk of consuming all lh_cookie */
+ CERROR("Can't have %d CPTs for LNet (max allowed is %d), "
+ "please change setting of CPT-table and retry\n",
+ the_lnet.ln_cpt_number, LNET_CPT_MAX);
+ return -E2BIG;
+ }
+
+ while ((1 << the_lnet.ln_cpt_bits) < the_lnet.ln_cpt_number)
+ the_lnet.ln_cpt_bits++;
+
+ rc = lnet_create_locks();
+ if (rc != 0) {
+ CERROR("Can't create LNet global locks: %d\n", rc);
+ return rc;
+ }
+
+ the_lnet.ln_refcount = 0;
+ LNetInvalidateHandle(&the_lnet.ln_rc_eqh);
INIT_LIST_HEAD(&the_lnet.ln_lnds);
+ INIT_LIST_HEAD(&the_lnet.ln_net_zombie);
INIT_LIST_HEAD(&the_lnet.ln_rcd_zombie);
INIT_LIST_HEAD(&the_lnet.ln_rcd_deathrow);
struct lnet_ping_info *pinfo;
lnet_handle_md_t md_handle;
struct list_head net_head;
+ struct lnet_net *net;
INIT_LIST_HEAD(&net_head);
return rc;
}
- /* Add in the loopback network */
- if (lnet_ni_alloc(LNET_MKNET(LOLND, 0), NULL, &net_head) == NULL) {
+ /* create a network for Loopback network */
+ net = lnet_net_alloc(LNET_MKNET(LOLND, 0), &net_head);
+ if (net == NULL) {
+ rc = -ENOMEM;
+ goto err_empty_list;
+ }
+
+ /* Add in the loopback NI */
+ if (lnet_ni_alloc(net, NULL, NULL) == NULL) {
rc = -ENOMEM;
goto err_empty_list;
}
* in this case. On cleanup in case of failure only clean up
* routes if it has been loaded */
if (!the_lnet.ln_nis_from_mod_params) {
- rc = lnet_parse_networks(&net_head,
- lnet_get_networks());
+ rc = lnet_parse_networks(&net_head, lnet_get_networks(),
+ use_tcp_bonding);
if (rc < 0)
goto err_empty_list;
}
- ni_count = lnet_startup_lndnis(&net_head);
+ ni_count = lnet_startup_lndnets(&net_head);
if (ni_count < 0) {
rc = ni_count;
goto err_empty_list;
if (!the_lnet.ln_nis_from_mod_params)
lnet_destroy_routes();
err_shutdown_lndnis:
- lnet_shutdown_lndnis();
+ lnet_shutdown_lndnets();
err_empty_list:
lnet_unprepare();
LASSERT(rc < 0);
mutex_unlock(&the_lnet.ln_api_mutex);
while (!list_empty(&net_head)) {
- struct lnet_ni *ni;
+ struct lnet_net *net;
- ni = list_entry(net_head.next, struct lnet_ni, ni_list);
- list_del_init(&ni->ni_list);
- lnet_ni_free(ni);
+ net = list_entry(net_head.next, struct lnet_net, net_list);
+ list_del_init(&net->net_list);
+ lnet_net_free(net);
}
return rc;
}
lnet_acceptor_stop();
lnet_destroy_routes();
- lnet_shutdown_lndnis();
+ lnet_shutdown_lndnets();
lnet_unprepare();
}
}
EXPORT_SYMBOL(LNetNIFini);
+
+static int lnet_handle_dbg_task(struct lnet_ioctl_dbg *dbg,
+ struct lnet_dbg_task_info *dbg_info)
+{
+ switch (dbg->dbg_task) {
+ case LNET_DBG_INCR_DLC_SEQ:
+ lnet_incr_dlc_seq();
+ }
+
+ return 0;
+}
/**
* Grabs the ni data from the ni structure and fills the out
* parameters
*
* \param[in] ni network interface structure
- * \param[out] cpt_count the number of cpts the ni is on
- * \param[out] nid Network Interface ID
- * \param[out] peer_timeout NI peer timeout
- * \param[out] peer_tx_crdits NI peer transmit credits
- * \param[out] peer_rtr_credits NI peer router credits
- * \param[out] max_tx_credits NI max transmit credit
- * \param[out] net_config Network configuration
+ * \param[out] cfg_ni NI config information
+ * \param[out] tun network and LND tunables
*/
static void
-lnet_fill_ni_info(struct lnet_ni *ni, struct lnet_ioctl_config_data *config)
+lnet_fill_ni_info(struct lnet_ni *ni, struct lnet_ioctl_config_ni *cfg_ni,
+ struct lnet_ioctl_config_lnd_tunables *tun,
+ struct lnet_ioctl_element_stats *stats,
+ __u32 tun_size)
+{
+ size_t min_size = 0;
+ int i;
+
+ if (!ni || !cfg_ni || !tun)
+ return;
+
+ if (ni->ni_interfaces[0] != NULL) {
+ for (i = 0; i < ARRAY_SIZE(ni->ni_interfaces); i++) {
+ if (ni->ni_interfaces[i] != NULL) {
+ strncpy(cfg_ni->lic_ni_intf[i],
+ ni->ni_interfaces[i],
+ sizeof(cfg_ni->lic_ni_intf[i]));
+ }
+ }
+ }
+
+ cfg_ni->lic_nid = ni->ni_nid;
+ cfg_ni->lic_status = ni->ni_status->ns_status;
+ cfg_ni->lic_tcp_bonding = use_tcp_bonding;
+ cfg_ni->lic_dev_cpt = ni->ni_dev_cpt;
+
+ memcpy(&tun->lt_cmn, &ni->ni_net->net_tunables, sizeof(tun->lt_cmn));
+
+ if (stats) {
+ stats->send_count = atomic_read(&ni->ni_stats.send_count);
+ stats->recv_count = atomic_read(&ni->ni_stats.recv_count);
+ }
+
+ /*
+ * tun->lt_tun will always be present, but in order to be
+ * backwards compatible, we need to deal with the cases when
+ * tun->lt_tun is smaller than what the kernel has, because it
+ * comes from an older version of a userspace program, then we'll
+ * need to copy as much information as we have available space.
+ */
+ min_size = tun_size - sizeof(tun->lt_cmn);
+ memcpy(&tun->lt_tun, &ni->ni_lnd_tunables, min_size);
+
+ /* copy over the cpts */
+ if (ni->ni_ncpts == LNET_CPT_NUMBER &&
+ ni->ni_cpts == NULL) {
+ for (i = 0; i < ni->ni_ncpts; i++)
+ cfg_ni->lic_cpts[i] = i;
+ } else {
+ for (i = 0;
+ ni->ni_cpts != NULL && i < ni->ni_ncpts &&
+ i < LNET_MAX_SHOW_NUM_CPT;
+ i++)
+ cfg_ni->lic_cpts[i] = ni->ni_cpts[i];
+ }
+ cfg_ni->lic_ncpts = ni->ni_ncpts;
+}
+
+/**
+ * NOTE: This is a legacy function left in the code to be backwards
+ * compatible with older userspace programs. It should eventually be
+ * removed.
+ *
+ * Grabs the ni data from the ni structure and fills the out
+ * parameters
+ *
+ * \param[in] ni network interface structure
+ * \param[out] config config information
+ */
+static void
+lnet_fill_ni_info_legacy(struct lnet_ni *ni,
+ struct lnet_ioctl_config_data *config)
{
struct lnet_ioctl_net_config *net_config;
struct lnet_ioctl_config_lnd_tunables *lnd_cfg = NULL;
}
config->cfg_nid = ni->ni_nid;
- config->cfg_config_u.cfg_net.net_peer_timeout = ni->ni_peertimeout;
- config->cfg_config_u.cfg_net.net_max_tx_credits = ni->ni_maxtxcredits;
- config->cfg_config_u.cfg_net.net_peer_tx_credits = ni->ni_peertxcredits;
- config->cfg_config_u.cfg_net.net_peer_rtr_credits = ni->ni_peerrtrcredits;
+ config->cfg_config_u.cfg_net.net_peer_timeout =
+ ni->ni_net->net_tunables.lct_peer_timeout;
+ config->cfg_config_u.cfg_net.net_max_tx_credits =
+ ni->ni_net->net_tunables.lct_max_tx_credits;
+ config->cfg_config_u.cfg_net.net_peer_tx_credits =
+ ni->ni_net->net_tunables.lct_peer_tx_credits;
+ config->cfg_config_u.cfg_net.net_peer_rtr_credits =
+ ni->ni_net->net_tunables.lct_peer_rtr_credits;
net_config->ni_status = ni->ni_status->ns_status;
if (config->cfg_hdr.ioc_len > min_size)
tunable_size = config->cfg_hdr.ioc_len - min_size;
- /* Don't copy to much data to user space */
- min_size = min(tunable_size, sizeof(*ni->ni_lnd_tunables));
+ /* Don't copy too much data to user space */
+ min_size = min(tunable_size, sizeof(ni->ni_lnd_tunables));
lnd_cfg = (struct lnet_ioctl_config_lnd_tunables *)net_config->cfg_bulk;
- if (ni->ni_lnd_tunables && lnd_cfg && min_size) {
- memcpy(lnd_cfg, ni->ni_lnd_tunables, min_size);
+ if (lnd_cfg && min_size) {
+ memcpy(&lnd_cfg->lt_tun, &ni->ni_lnd_tunables, min_size);
config->cfg_config_u.cfg_net.net_interface_count = 1;
/* Tell user land that kernel side has less data */
- if (tunable_size > sizeof(*ni->ni_lnd_tunables)) {
+ if (tunable_size > sizeof(ni->ni_lnd_tunables)) {
min_size = tunable_size - sizeof(ni->ni_lnd_tunables);
config->cfg_hdr.ioc_len -= min_size;
}
}
}
-static int
+struct lnet_ni *
+lnet_get_ni_idx_locked(int idx)
+{
+ struct lnet_ni *ni;
+ struct lnet_net *net;
+
+ list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+ list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
+ if (idx-- == 0)
+ return ni;
+ }
+ }
+
+ return NULL;
+}
+
+struct lnet_ni *
+lnet_get_next_ni_locked(struct lnet_net *mynet, struct lnet_ni *prev)
+{
+ struct lnet_ni *ni;
+ struct lnet_net *net = mynet;
+
+ if (prev == NULL) {
+ if (net == NULL)
+ net = list_entry(the_lnet.ln_nets.next, struct lnet_net,
+ net_list);
+ ni = list_entry(net->net_ni_list.next, struct lnet_ni,
+ ni_netlist);
+
+ return ni;
+ }
+
+ if (prev->ni_netlist.next == &prev->ni_net->net_ni_list) {
+ /* if you reached the end of the ni list and the net is
+ * specified, then there are no more nis in that net */
+ if (net != NULL)
+ return NULL;
+
+ /* we reached the end of this net ni list. move to the
+ * next net */
+ if (prev->ni_net->net_list.next == &the_lnet.ln_nets)
+ /* no more nets and no more NIs. */
+ return NULL;
+
+ /* get the next net */
+ net = list_entry(prev->ni_net->net_list.next, struct lnet_net,
+ net_list);
+ /* get the ni on it */
+ ni = list_entry(net->net_ni_list.next, struct lnet_ni,
+ ni_netlist);
+
+ return ni;
+ }
+
+ /* there are more nis left */
+ ni = list_entry(prev->ni_netlist.next, struct lnet_ni, ni_netlist);
+
+ return ni;
+}
+
+int
lnet_get_net_config(struct lnet_ioctl_config_data *config)
{
struct lnet_ni *ni;
- struct list_head *tmp;
- int idx = config->cfg_count;
+ int cpt;
int rc = -ENOENT;
- int cpt, i = 0;
-
- if (unlikely(!config->cfg_bulk))
- return -EINVAL;
+ int idx = config->cfg_count;
cpt = lnet_net_lock_current();
- list_for_each(tmp, &the_lnet.ln_nis) {
- if (i++ != idx)
- continue;
+ ni = lnet_get_ni_idx_locked(idx);
- ni = list_entry(tmp, lnet_ni_t, ni_list);
+ if (ni != NULL) {
+ rc = 0;
lnet_ni_lock(ni);
- lnet_fill_ni_info(ni, config);
+ lnet_fill_ni_info_legacy(ni, config);
lnet_ni_unlock(ni);
- rc = 0;
- break;
}
lnet_net_unlock(cpt);
}
int
-lnet_dyn_add_ni(lnet_pid_t requested_pid, struct lnet_ioctl_config_data *conf)
+lnet_get_ni_config(struct lnet_ioctl_config_ni *cfg_ni,
+ struct lnet_ioctl_config_lnd_tunables *tun,
+ struct lnet_ioctl_element_stats *stats,
+ __u32 tun_size)
{
- char *nets = conf->cfg_config_u.cfg_net.net_intf;
- struct lnet_ping_info *pinfo;
- lnet_handle_md_t md_handle;
struct lnet_ni *ni;
- struct list_head net_head;
- int rc;
- lnet_remotenet_t *rnet;
+ int cpt;
+ int rc = -ENOENT;
- INIT_LIST_HEAD(&net_head);
+ if (!cfg_ni || !tun || !stats)
+ return -EINVAL;
- /* Create a ni structure for the network string */
- rc = lnet_parse_networks(&net_head, nets);
- if (rc <= 0)
- return rc == 0 ? -EINVAL : rc;
+ cpt = lnet_net_lock_current();
- mutex_lock(&the_lnet.ln_api_mutex);
+ ni = lnet_get_ni_idx_locked(cfg_ni->lic_idx);
- if (rc > 1) {
- rc = -EINVAL; /* only add one interface per call */
- goto failed0;
+ if (ni) {
+ rc = 0;
+ lnet_ni_lock(ni);
+ lnet_fill_ni_info(ni, cfg_ni, tun, stats, tun_size);
+ lnet_ni_unlock(ni);
}
- ni = list_entry(net_head.next, struct lnet_ni, ni_list);
+ lnet_net_unlock(cpt);
+ return rc;
+}
+
+static int lnet_add_net_common(struct lnet_net *net,
+ struct lnet_ioctl_config_lnd_tunables *tun)
+{
+ __u32 net_id;
+ lnet_ping_info_t *pinfo;
+ lnet_handle_md_t md_handle;
+ int rc;
+ lnet_remotenet_t *rnet;
+ int net_ni_count;
+ int num_acceptor_nets;
lnet_net_lock(LNET_LOCK_EX);
- rnet = lnet_find_net_locked(LNET_NIDNET(ni->ni_nid));
+ rnet = lnet_find_rnet_locked(net->net_id);
lnet_net_unlock(LNET_LOCK_EX);
- /* make sure that the net added doesn't invalidate the current
- * configuration LNet is keeping */
- if (rnet != NULL) {
+ /*
+ * make sure that the net added doesn't invalidate the current
+ * configuration LNet is keeping
+ */
+ if (rnet) {
CERROR("Adding net %s will invalidate routing configuration\n",
- nets);
- rc = -EUSERS;
- goto failed0;
+ libcfs_net2str(net->net_id));
+ lnet_net_free(net);
+ return -EUSERS;
}
- rc = lnet_ping_info_setup(&pinfo, &md_handle, 1 + lnet_get_ni_count(),
+ /*
+ * make sure you calculate the correct number of slots in the ping
+ * info. Since the ping info is a flattened list of all the NIs,
+ * we should allocate enough slots to accomodate the number of NIs
+ * which will be added.
+ *
+ * since ni hasn't been configured yet, use
+ * lnet_get_net_ni_count_pre() which checks the net_ni_added list
+ */
+ net_ni_count = lnet_get_net_ni_count_pre(net);
+
+ rc = lnet_ping_info_setup(&pinfo, &md_handle,
+ net_ni_count + lnet_get_ni_count(),
false);
- if (rc != 0)
- goto failed0;
+ if (rc < 0) {
+ lnet_net_free(net);
+ return rc;
+ }
- list_del_init(&ni->ni_list);
+ if (tun)
+ memcpy(&net->net_tunables,
+ &tun->lt_cmn, sizeof(net->net_tunables));
+ else
+ memset(&net->net_tunables, -1, sizeof(net->net_tunables));
- rc = lnet_startup_lndni(ni, conf);
- if (rc != 0)
- goto failed1;
+ /*
+ * before starting this network get a count of the current TCP
+ * networks which require the acceptor thread running. If that
+ * count is == 0 before we start up this network, then we'd want to
+ * start up the acceptor thread after starting up this network
+ */
+ num_acceptor_nets = lnet_count_acceptor_nets();
- if (ni->ni_lnd->lnd_accept != NULL) {
+ net_id = net->net_id;
+
+ rc = lnet_startup_lndnet(net,
+ (tun) ? &tun->lt_tun : NULL);
+ if (rc < 0)
+ goto failed;
+
+ lnet_net_lock(LNET_LOCK_EX);
+ net = lnet_get_net_locked(net_id);
+ lnet_net_unlock(LNET_LOCK_EX);
+
+ LASSERT(net);
+
+ /*
+ * Start the acceptor thread if this is the first network
+ * being added that requires the thread.
+ */
+ if (net->net_lnd->lnd_accept && num_acceptor_nets == 0) {
rc = lnet_acceptor_start();
if (rc < 0) {
- /* shutdown the ni that we just started */
+ /* shutdown the net that we just started */
CERROR("Failed to start up acceptor thread\n");
- lnet_shutdown_lndni(ni);
- goto failed1;
+ lnet_shutdown_lndnet(net);
+ goto failed;
}
}
+ lnet_net_lock(LNET_LOCK_EX);
+ lnet_peer_net_added(net);
+ lnet_net_unlock(LNET_LOCK_EX);
+
lnet_ping_target_update(pinfo, md_handle);
- mutex_unlock(&the_lnet.ln_api_mutex);
return 0;
-failed1:
+failed:
lnet_ping_md_unlink(pinfo, &md_handle);
lnet_ping_info_free(pinfo);
-failed0:
+ return rc;
+}
+
+static int lnet_handle_legacy_ip2nets(char *ip2nets,
+ struct lnet_ioctl_config_lnd_tunables *tun)
+{
+ struct lnet_net *net;
+ char *nets;
+ int rc;
+ struct list_head net_head;
+
+ INIT_LIST_HEAD(&net_head);
+
+ rc = lnet_parse_ip2nets(&nets, ip2nets);
+ if (rc < 0)
+ return rc;
+
+ rc = lnet_parse_networks(&net_head, nets, use_tcp_bonding);
+ if (rc < 0)
+ return rc;
+
+ mutex_lock(&the_lnet.ln_api_mutex);
+ while (!list_empty(&net_head)) {
+ net = list_entry(net_head.next, struct lnet_net, net_list);
+ list_del_init(&net->net_list);
+ rc = lnet_add_net_common(net, tun);
+ if (rc < 0)
+ goto out;
+ }
+
+out:
mutex_unlock(&the_lnet.ln_api_mutex);
+
while (!list_empty(&net_head)) {
- ni = list_entry(net_head.next, struct lnet_ni, ni_list);
- list_del_init(&ni->ni_list);
- lnet_ni_free(ni);
+ net = list_entry(net_head.next, struct lnet_net, net_list);
+ list_del_init(&net->net_list);
+ lnet_net_free(net);
}
return rc;
}
-int
-lnet_dyn_del_ni(__u32 net)
+int lnet_dyn_add_ni(struct lnet_ioctl_config_ni *conf)
{
- lnet_ni_t *ni;
- struct lnet_ping_info *pinfo;
+ struct lnet_net *net;
+ struct lnet_ni *ni;
+ struct lnet_ioctl_config_lnd_tunables *tun = NULL;
+ int rc, i;
+ __u32 net_id;
+
+ /* get the tunables if they are available */
+ if (conf->lic_cfg_hdr.ioc_len >=
+ sizeof(*conf) + sizeof(*tun))
+ tun = (struct lnet_ioctl_config_lnd_tunables *)
+ conf->lic_bulk;
+
+ /* handle legacy ip2nets from DLC */
+ if (conf->lic_legacy_ip2nets[0] != '\0')
+ return lnet_handle_legacy_ip2nets(conf->lic_legacy_ip2nets,
+ tun);
+
+ net_id = LNET_NIDNET(conf->lic_nid);
+
+ net = lnet_net_alloc(net_id, NULL);
+ if (!net)
+ return -ENOMEM;
+
+ for (i = 0; i < conf->lic_ncpts; i++) {
+ if (conf->lic_cpts[i] >= LNET_CPT_NUMBER)
+ return -EINVAL;
+ }
+
+ ni = lnet_ni_alloc_w_cpt_array(net, conf->lic_cpts, conf->lic_ncpts,
+ conf->lic_ni_intf[0]);
+ if (!ni)
+ return -ENOMEM;
+
+ mutex_lock(&the_lnet.ln_api_mutex);
+
+ rc = lnet_add_net_common(net, tun);
+
+ mutex_unlock(&the_lnet.ln_api_mutex);
+
+ return rc;
+}
+
+int lnet_dyn_del_ni(struct lnet_ioctl_config_ni *conf)
+{
+ struct lnet_net *net;
+ struct lnet_ni *ni;
+ __u32 net_id = LNET_NIDNET(conf->lic_nid);
+ lnet_ping_info_t *pinfo;
lnet_handle_md_t md_handle;
int rc;
+ int net_count;
+ __u32 addr;
/* don't allow userspace to shutdown the LOLND */
- if (LNET_NETTYP(net) == LOLND)
+ if (LNET_NETTYP(net_id) == LOLND)
return -EINVAL;
mutex_lock(&the_lnet.ln_api_mutex);
+
+ lnet_net_lock(0);
+
+ net = lnet_get_net_locked(net_id);
+ if (!net) {
+ CERROR("net %s not found\n",
+ libcfs_net2str(net_id));
+ rc = -ENOENT;
+ goto net_unlock;
+ }
+
+ addr = LNET_NIDADDR(conf->lic_nid);
+ if (addr == 0) {
+ /* remove the entire net */
+ net_count = lnet_get_net_ni_count_locked(net);
+
+ lnet_net_unlock(0);
+
+ /* create and link a new ping info, before removing the old one */
+ rc = lnet_ping_info_setup(&pinfo, &md_handle,
+ lnet_get_ni_count() - net_count,
+ false);
+ if (rc != 0)
+ goto out;
+
+ lnet_shutdown_lndnet(net);
+
+ if (lnet_count_acceptor_nets() == 0)
+ lnet_acceptor_stop();
+
+ lnet_ping_target_update(pinfo, md_handle);
+
+ goto out;
+ }
+
+ ni = lnet_nid2ni_locked(conf->lic_nid, 0);
+ if (!ni) {
+ CERROR("nid %s not found \n",
+ libcfs_nid2str(conf->lic_nid));
+ rc = -ENOENT;
+ goto net_unlock;
+ }
+
+ net_count = lnet_get_net_ni_count_locked(net);
+
+ lnet_net_unlock(0);
+
/* create and link a new ping info, before removing the old one */
rc = lnet_ping_info_setup(&pinfo, &md_handle,
lnet_get_ni_count() - 1, false);
if (rc != 0)
goto out;
- ni = lnet_net2ni(net);
- if (ni == NULL) {
- rc = -EINVAL;
+ lnet_shutdown_lndni(ni);
+
+ if (lnet_count_acceptor_nets() == 0)
+ lnet_acceptor_stop();
+
+ lnet_ping_target_update(pinfo, md_handle);
+
+ /* check if the net is empty and remove it if it is */
+ if (net_count == 1)
+ lnet_shutdown_lndnet(net);
+
+ goto out;
+
+net_unlock:
+ lnet_net_unlock(0);
+out:
+ mutex_unlock(&the_lnet.ln_api_mutex);
+
+ return rc;
+}
+
+/*
+ * lnet_dyn_add_net and lnet_dyn_del_net are now deprecated.
+ * They are only expected to be called for unique networks.
+ * That can be as a result of older DLC library
+ * calls. Multi-Rail DLC and beyond no longer uses these APIs.
+ */
+int
+lnet_dyn_add_net(struct lnet_ioctl_config_data *conf)
+{
+ struct lnet_net *net;
+ struct list_head net_head;
+ int rc;
+ struct lnet_ioctl_config_lnd_tunables tun;
+ char *nets = conf->cfg_config_u.cfg_net.net_intf;
+
+ INIT_LIST_HEAD(&net_head);
+
+ /* Create a net/ni structures for the network string */
+ rc = lnet_parse_networks(&net_head, nets, use_tcp_bonding);
+ if (rc <= 0)
+ return rc == 0 ? -EINVAL : rc;
+
+ mutex_lock(&the_lnet.ln_api_mutex);
+
+ if (rc > 1) {
+ rc = -EINVAL; /* only add one network per call */
goto failed;
}
- /* decrement the reference counter taken by lnet_net2ni() */
- lnet_ni_decref_locked(ni, 0);
+ net = list_entry(net_head.next, struct lnet_net, net_list);
+ list_del_init(&net->net_list);
- lnet_shutdown_lndni(ni);
+ LASSERT(lnet_net_unique(net->net_id, &the_lnet.ln_nets, NULL));
+
+ memset(&tun, 0, sizeof(tun));
+
+ tun.lt_cmn.lct_peer_timeout =
+ conf->cfg_config_u.cfg_net.net_peer_timeout;
+ tun.lt_cmn.lct_peer_tx_credits =
+ conf->cfg_config_u.cfg_net.net_peer_tx_credits;
+ tun.lt_cmn.lct_peer_rtr_credits =
+ conf->cfg_config_u.cfg_net.net_peer_rtr_credits;
+ tun.lt_cmn.lct_max_tx_credits =
+ conf->cfg_config_u.cfg_net.net_max_tx_credits;
+
+ rc = lnet_add_net_common(net, &tun);
+ if (rc != 0)
+ goto failed;
- if (lnet_count_acceptor_nis() == 0)
+ return 0;
+
+failed:
+ mutex_unlock(&the_lnet.ln_api_mutex);
+ while (!list_empty(&net_head)) {
+ net = list_entry(net_head.next, struct lnet_net, net_list);
+ list_del_init(&net->net_list);
+ lnet_net_free(net);
+ }
+ return rc;
+}
+
+int
+lnet_dyn_del_net(__u32 net_id)
+{
+ struct lnet_net *net;
+ struct lnet_ping_info *pinfo;
+ lnet_handle_md_t md_handle;
+ int rc;
+ int net_ni_count;
+
+ /* don't allow userspace to shutdown the LOLND */
+ if (LNET_NETTYP(net_id) == LOLND)
+ return -EINVAL;
+
+ mutex_lock(&the_lnet.ln_api_mutex);
+
+ lnet_net_lock(0);
+
+ net = lnet_get_net_locked(net_id);
+ if (net == NULL) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ net_ni_count = lnet_get_net_ni_count_locked(net);
+
+ lnet_net_unlock(0);
+
+ /* create and link a new ping info, before removing the old one */
+ rc = lnet_ping_info_setup(&pinfo, &md_handle,
+ lnet_get_ni_count() - net_ni_count, false);
+ if (rc != 0)
+ goto out;
+
+ lnet_shutdown_lndnet(net);
+
+ if (lnet_count_acceptor_nets() == 0)
lnet_acceptor_stop();
lnet_ping_target_update(pinfo, md_handle);
- goto out;
-failed:
- lnet_ping_md_unlink(pinfo, &md_handle);
- lnet_ping_info_free(pinfo);
+
out:
mutex_unlock(&the_lnet.ln_api_mutex);
return rc;
}
+void lnet_incr_dlc_seq(void)
+{
+ atomic_inc(&lnet_dlc_seq_no);
+}
+
+__u32 lnet_get_dlc_seq_locked(void)
+{
+ return atomic_read(&lnet_dlc_seq_no);
+}
+
/**
* LNet ioctl handler.
*
if (config->cfg_hdr.ioc_len < sizeof(*config))
return -EINVAL;
- return lnet_get_route(config->cfg_count,
- &config->cfg_net,
- &config->cfg_config_u.cfg_route.rtr_hop,
- &config->cfg_nid,
- &config->cfg_config_u.cfg_route.rtr_flags,
- &config->cfg_config_u.cfg_route.
+ mutex_lock(&the_lnet.ln_api_mutex);
+ rc = lnet_get_route(config->cfg_count,
+ &config->cfg_net,
+ &config->cfg_config_u.cfg_route.rtr_hop,
+ &config->cfg_nid,
+ &config->cfg_config_u.cfg_route.rtr_flags,
+ &config->cfg_config_u.cfg_route.
rtr_priority);
+ mutex_unlock(&the_lnet.ln_api_mutex);
+ return rc;
+
+ case IOC_LIBCFS_GET_LOCAL_NI: {
+ struct lnet_ioctl_config_ni *cfg_ni;
+ struct lnet_ioctl_config_lnd_tunables *tun = NULL;
+ struct lnet_ioctl_element_stats *stats;
+ __u32 tun_size;
+
+ cfg_ni = arg;
+ /* get the tunables if they are available */
+ if (cfg_ni->lic_cfg_hdr.ioc_len <
+ sizeof(*cfg_ni) + sizeof(*stats)+ sizeof(*tun))
+ return -EINVAL;
+
+ stats = (struct lnet_ioctl_element_stats *)
+ cfg_ni->lic_bulk;
+ tun = (struct lnet_ioctl_config_lnd_tunables *)
+ (cfg_ni->lic_bulk + sizeof(*stats));
+
+ tun_size = cfg_ni->lic_cfg_hdr.ioc_len - sizeof(*cfg_ni) -
+ sizeof(*stats);
+
+ mutex_lock(&the_lnet.ln_api_mutex);
+ rc = lnet_get_ni_config(cfg_ni, tun, stats, tun_size);
+ mutex_unlock(&the_lnet.ln_api_mutex);
+ return rc;
+ }
case IOC_LIBCFS_GET_NET: {
size_t total = sizeof(*config) +
if (config->cfg_hdr.ioc_len < total)
return -EINVAL;
- return lnet_get_net_config(config);
+ mutex_lock(&the_lnet.ln_api_mutex);
+ rc = lnet_get_net_config(config);
+ mutex_unlock(&the_lnet.ln_api_mutex);
+ return rc;
}
case IOC_LIBCFS_GET_LNET_STATS:
if (lnet_stats->st_hdr.ioc_len < sizeof(*lnet_stats))
return -EINVAL;
+ mutex_lock(&the_lnet.ln_api_mutex);
lnet_counters_get(&lnet_stats->st_cntrs);
+ mutex_unlock(&the_lnet.ln_api_mutex);
return 0;
}
mutex_unlock(&the_lnet.ln_api_mutex);
return rc;
+ case IOC_LIBCFS_SET_NUMA_RANGE: {
+ struct lnet_ioctl_numa_range *numa;
+ numa = arg;
+ if (numa->nr_hdr.ioc_len != sizeof(*numa))
+ return -EINVAL;
+ mutex_lock(&the_lnet.ln_api_mutex);
+ lnet_numa_range = numa->nr_range;
+ mutex_unlock(&the_lnet.ln_api_mutex);
+ return 0;
+ }
+
+ case IOC_LIBCFS_GET_NUMA_RANGE: {
+ struct lnet_ioctl_numa_range *numa;
+ numa = arg;
+ if (numa->nr_hdr.ioc_len != sizeof(*numa))
+ return -EINVAL;
+ numa->nr_range = lnet_numa_range;
+ return 0;
+ }
+
case IOC_LIBCFS_GET_BUF: {
struct lnet_ioctl_pool_cfg *pool_cfg;
size_t total = sizeof(*config) + sizeof(*pool_cfg);
return -EINVAL;
pool_cfg = (struct lnet_ioctl_pool_cfg *)config->cfg_bulk;
- return lnet_get_rtr_pool_cfg(config->cfg_count, pool_cfg);
+
+ mutex_lock(&the_lnet.ln_api_mutex);
+ rc = lnet_get_rtr_pool_cfg(config->cfg_count, pool_cfg);
+ mutex_unlock(&the_lnet.ln_api_mutex);
+ return rc;
+ }
+
+ case IOC_LIBCFS_ADD_PEER_NI: {
+ struct lnet_ioctl_peer_cfg *cfg = arg;
+
+ if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg))
+ return -EINVAL;
+
+ mutex_lock(&the_lnet.ln_api_mutex);
+ rc = lnet_add_peer_ni_to_peer(cfg->prcfg_prim_nid,
+ cfg->prcfg_cfg_nid,
+ cfg->prcfg_mr);
+ mutex_unlock(&the_lnet.ln_api_mutex);
+ return rc;
+ }
+
+ case IOC_LIBCFS_DEL_PEER_NI: {
+ struct lnet_ioctl_peer_cfg *cfg = arg;
+
+ if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg))
+ return -EINVAL;
+
+ mutex_lock(&the_lnet.ln_api_mutex);
+ rc = lnet_del_peer_ni_from_peer(cfg->prcfg_prim_nid,
+ cfg->prcfg_cfg_nid);
+ mutex_unlock(&the_lnet.ln_api_mutex);
+ return rc;
}
case IOC_LIBCFS_GET_PEER_INFO: {
if (peer_info->pr_hdr.ioc_len < sizeof(*peer_info))
return -EINVAL;
- return lnet_get_peer_info(
+ mutex_lock(&the_lnet.ln_api_mutex);
+ rc = lnet_get_peer_ni_info(
peer_info->pr_count,
&peer_info->pr_nid,
peer_info->pr_lnd_u.pr_peer_credits.cr_aliveness,
&peer_info->pr_lnd_u.pr_peer_credits.cr_ni_peer_tx_credits,
&peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_credits,
&peer_info->pr_lnd_u.pr_peer_credits.cr_peer_rtr_credits,
- &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_min_rtr_credits,
+ &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_min_tx_credits,
&peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_qnob);
+ mutex_unlock(&the_lnet.ln_api_mutex);
+ return rc;
+ }
+
+ case IOC_LIBCFS_GET_PEER_NI: {
+ struct lnet_ioctl_peer_cfg *cfg = arg;
+ struct lnet_peer_ni_credit_info *lpni_cri;
+ struct lnet_ioctl_element_stats *lpni_stats;
+ size_t total = sizeof(*cfg) + sizeof(*lpni_cri) +
+ sizeof(*lpni_stats);
+
+ if (cfg->prcfg_hdr.ioc_len < total)
+ return -EINVAL;
+
+ lpni_cri = (struct lnet_peer_ni_credit_info*) cfg->prcfg_bulk;
+ lpni_stats = (struct lnet_ioctl_element_stats *)
+ (cfg->prcfg_bulk + sizeof(*lpni_cri));
+
+ mutex_lock(&the_lnet.ln_api_mutex);
+ rc = lnet_get_peer_info(cfg->prcfg_idx, &cfg->prcfg_prim_nid,
+ &cfg->prcfg_cfg_nid, &cfg->prcfg_mr,
+ lpni_cri, lpni_stats);
+ mutex_unlock(&the_lnet.ln_api_mutex);
+ return rc;
}
case IOC_LIBCFS_NOTIFY_ROUTER: {
data->ioc_count = rc;
return 0;
}
+
+ case IOC_LIBCFS_DBG: {
+ struct lnet_ioctl_dbg *dbg = arg;
+ struct lnet_dbg_task_info *dbg_info;
+ size_t total = sizeof(*dbg) + sizeof(*dbg_info);
+
+ if (dbg->dbg_hdr.ioc_len < total)
+ return -EINVAL;
+
+ dbg_info = (struct lnet_dbg_task_info*) dbg->dbg_bulk;
+
+ return lnet_handle_dbg_task(dbg, dbg_info);
+ }
+
default:
- ni = lnet_net2ni(data->ioc_net);
+ ni = lnet_net2ni_addref(data->ioc_net);
if (ni == NULL)
return -EINVAL;
- if (ni->ni_lnd->lnd_ctl == NULL)
+ if (ni->ni_net->net_lnd->lnd_ctl == NULL)
rc = -EINVAL;
else
- rc = ni->ni_lnd->lnd_ctl(ni, cmd, arg);
+ rc = ni->ni_net->net_lnd->lnd_ctl(ni, cmd, arg);
lnet_ni_decref(ni);
return rc;
LNetGetId(unsigned int index, lnet_process_id_t *id)
{
struct lnet_ni *ni;
- struct list_head *tmp;
+ struct lnet_net *net;
int cpt;
int rc = -ENOENT;
cpt = lnet_net_lock_current();
- list_for_each(tmp, &the_lnet.ln_nis) {
- if (index-- != 0)
- continue;
-
- ni = list_entry(tmp, lnet_ni_t, ni_list);
+ list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+ list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
+ if (index-- != 0)
+ continue;
- id->nid = ni->ni_nid;
- id->pid = the_lnet.ln_pid;
- rc = 0;
- break;
+ id->nid = ni->ni_nid;
+ id->pid = the_lnet.ln_pid;
+ rc = 0;
+ break;
+ }
}
lnet_net_unlock(cpt);
#define LNET_MAX_TEXTBUF_NOB (64<<10) /* bound allocation */
#define LNET_SINGLE_TEXTBUF_NOB (4<<10)
+#define SPACESTR " \t\v\r\n"
+#define DELIMITERS ":()[]"
+
static void
-lnet_syntax(char *name, char *str, int offset, int width)
+lnet_syntax(const char *name, const char *str, int offset, int width)
{
static char dots[LNET_SINGLE_TEXTBUF_NOB];
static char dashes[LNET_SINGLE_TEXTBUF_NOB];
}
}
-int
-lnet_net_unique(__u32 net, struct list_head *nilist)
+bool
+lnet_net_unique(__u32 net_id, struct list_head *netlist,
+ struct lnet_net **net)
+{
+ struct lnet_net *net_l;
+
+ if (!netlist)
+ return true;
+
+ list_for_each_entry(net_l, netlist, net_list) {
+ if (net_l->net_id == net_id) {
+ if (net != NULL)
+ *net = net_l;
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/* check that the NI is unique within the list of NIs already added to
+ * a network */
+bool
+lnet_ni_unique_net(struct list_head *nilist, char *iface)
{
struct list_head *tmp;
- lnet_ni_t *ni;
+ struct lnet_ni *ni;
list_for_each(tmp, nilist) {
- ni = list_entry(tmp, lnet_ni_t, ni_list);
+ ni = list_entry(tmp, struct lnet_ni, ni_netlist);
- if (LNET_NIDNET(ni->ni_nid) == net)
- return 0;
+ if (ni->ni_interfaces[0] != NULL &&
+ strncmp(ni->ni_interfaces[0], iface, strlen(iface)) == 0)
+ return false;
}
- return 1;
+ return true;
+}
+
+/* check that the NI is unique to the interfaces with in the same NI.
+ * This is only a consideration if use_tcp_bonding is set */
+static bool
+lnet_ni_unique_ni(char *iface_list[LNET_MAX_INTERFACES], char *iface)
+{
+ int i;
+ for (i = 0; i < LNET_MAX_INTERFACES; i++) {
+ if (iface_list[i] != NULL &&
+ strncmp(iface_list[i], iface, strlen(iface)) == 0)
+ return false;
+ }
+
+ return true;
+}
+
+static bool
+in_array(__u32 *array, __u32 size, __u32 value)
+{
+ int i;
+
+ for (i = 0; i < size; i++) {
+ if (array[i] == value)
+ return false;
+ }
+
+ return true;
+}
+
+static int
+lnet_net_append_cpts(__u32 *cpts, __u32 ncpts, struct lnet_net *net)
+{
+ __u32 *added_cpts = NULL;
+ int i, j = 0, rc = 0;
+
+ /*
+ * no need to go futher since a subset of the NIs already exist on
+ * all CPTs
+ */
+ if (net->net_ncpts == LNET_CPT_NUMBER)
+ return 0;
+
+ if (cpts == NULL) {
+ /* there is an NI which will exist on all CPTs */
+ if (net->net_cpts != NULL)
+ LIBCFS_FREE(net->net_cpts, sizeof(*net->net_cpts) *
+ net->net_ncpts);
+ net->net_cpts = NULL;
+ net->net_ncpts = LNET_CPT_NUMBER;
+ return 0;
+ }
+
+ if (net->net_cpts == NULL) {
+ LIBCFS_ALLOC(net->net_cpts, sizeof(*net->net_cpts) * ncpts);
+ if (net->net_cpts == NULL)
+ return -ENOMEM;
+ memcpy(net->net_cpts, cpts, ncpts);
+ net->net_ncpts = ncpts;
+ return 0;
+ }
+
+ LIBCFS_ALLOC(added_cpts, sizeof(*added_cpts) * LNET_CPT_NUMBER);
+ if (added_cpts == NULL)
+ return -ENOMEM;
+
+ for (i = 0; i < ncpts; i++) {
+ if (!in_array(net->net_cpts, net->net_ncpts, cpts[i])) {
+ added_cpts[j] = cpts[i];
+ j++;
+ }
+ }
+
+ /* append the new cpts if any to the list of cpts in the net */
+ if (j > 0) {
+ __u32 *array = NULL, *loc;
+ __u32 total_entries = j + net->net_ncpts;
+
+ LIBCFS_ALLOC(array, sizeof(*net->net_cpts) * total_entries);
+ if (array == NULL) {
+ rc = -ENOMEM;
+ goto failed;
+ }
+
+ memcpy(array, net->net_cpts, net->net_ncpts);
+ loc = array + net->net_ncpts;
+ memcpy(loc, added_cpts, j);
+
+ LIBCFS_FREE(net->net_cpts, sizeof(*net->net_cpts) *
+ net->net_ncpts);
+ net->net_ncpts = total_entries;
+ net->net_cpts = array;
+ }
+
+failed:
+ LIBCFS_FREE(added_cpts, sizeof(*added_cpts) * LNET_CPT_NUMBER);
+
+ return rc;
+}
+
+static void
+lnet_net_remove_cpts(__u32 *cpts, __u32 ncpts, struct lnet_net *net)
+{
+ struct lnet_ni *ni;
+ int rc;
+
+ /*
+ * Operation Assumption:
+ * This function is called after an NI has been removed from
+ * its parent net.
+ *
+ * if we're removing an NI which exists on all CPTs then
+ * we have to check if any of the other NIs on this net also
+ * exists on all CPTs. If none, then we need to build our Net CPT
+ * list based on the remaining NIs.
+ *
+ * If the NI being removed exist on a subset of the CPTs then we
+ * alo rebuild the Net CPT list based on the remaining NIs, which
+ * should resutl in the expected Net CPT list.
+ */
+
+ /*
+ * sometimes this function can be called due to some failure
+ * creating an NI, before any of the cpts are allocated, so check
+ * for that case and don't do anything
+ */
+ if (ncpts == 0)
+ return;
+
+ if (ncpts == LNET_CPT_NUMBER) {
+ /*
+ * first iteration through the NI list in the net to see
+ * if any of the NIs exist on all the CPTs. If one is
+ * found then our job is done.
+ */
+ list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
+ if (ni->ni_ncpts == LNET_CPT_NUMBER)
+ return;
+ }
+ }
+
+ /*
+ * Rebuild the Net CPT list again, thereby only including only the
+ * CPTs which the remaining NIs are associated with.
+ */
+ if (net->net_cpts != NULL) {
+ LIBCFS_FREE(net->net_cpts,
+ sizeof(*net->net_cpts) * net->net_ncpts);
+ net->net_cpts = NULL;
+ }
+
+ list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
+ rc = lnet_net_append_cpts(ni->ni_cpts, ni->ni_ncpts,
+ net);
+ if (rc != 0) {
+ CERROR("Out of Memory\n");
+ /*
+ * do our best to keep on going. Delete
+ * the net cpts and set it to NULL. This
+ * way we can keep on going but less
+ * efficiently, since memory accesses might be
+ * accross CPT lines.
+ */
+ if (net->net_cpts != NULL) {
+ LIBCFS_FREE(net->net_cpts,
+ sizeof(*net->net_cpts) *
+ net->net_ncpts);
+ net->net_cpts = NULL;
+ net->net_ncpts = LNET_CPT_NUMBER;
+ }
+ return;
+ }
+ }
}
void
{
int i;
+ lnet_net_remove_cpts(ni->ni_cpts, ni->ni_ncpts, ni->ni_net);
+
if (ni->ni_refs != NULL)
cfs_percpt_free(ni->ni_refs);
if (ni->ni_cpts != NULL)
cfs_expr_list_values_free(ni->ni_cpts, ni->ni_ncpts);
- if (ni->ni_lnd_tunables != NULL)
- LIBCFS_FREE(ni->ni_lnd_tunables, sizeof(*ni->ni_lnd_tunables));
-
for (i = 0; i < LNET_MAX_INTERFACES &&
ni->ni_interfaces[i] != NULL; i++) {
LIBCFS_FREE(ni->ni_interfaces[i],
LIBCFS_FREE(ni, sizeof(*ni));
}
-lnet_ni_t *
-lnet_ni_alloc(__u32 net, struct cfs_expr_list *el, struct list_head *nilist)
+void
+lnet_net_free(struct lnet_net *net)
+{
+ struct list_head *tmp, *tmp2;
+ struct lnet_ni *ni;
+
+ LASSERT(list_empty(&net->net_ni_zombie));
+
+ /*
+ * delete any nis that haven't been added yet. This could happen
+ * if there is a failure on net startup
+ */
+ list_for_each_safe(tmp, tmp2, &net->net_ni_added) {
+ ni = list_entry(tmp, struct lnet_ni, ni_netlist);
+ list_del_init(&ni->ni_netlist);
+ lnet_ni_free(ni);
+ }
+
+ /* delete any nis which have been started. */
+ list_for_each_safe(tmp, tmp2, &net->net_ni_list) {
+ ni = list_entry(tmp, struct lnet_ni, ni_netlist);
+ list_del_init(&ni->ni_netlist);
+ lnet_ni_free(ni);
+ }
+
+ if (net->net_cpts != NULL)
+ LIBCFS_FREE(net->net_cpts,
+ sizeof(*net->net_cpts) * net->net_ncpts);
+
+ LIBCFS_FREE(net, sizeof(*net));
+}
+
+struct lnet_net *
+lnet_net_alloc(__u32 net_id, struct list_head *net_list)
+{
+ struct lnet_net *net;
+
+ if (!lnet_net_unique(net_id, net_list, NULL)) {
+ CERROR("Duplicate net %s. Ignore\n",
+ libcfs_net2str(net_id));
+ return NULL;
+ }
+
+ LIBCFS_ALLOC(net, sizeof(*net));
+ if (net == NULL) {
+ CERROR("Out of memory creating network %s\n",
+ libcfs_net2str(net_id));
+ return NULL;
+ }
+
+ INIT_LIST_HEAD(&net->net_list);
+ INIT_LIST_HEAD(&net->net_ni_list);
+ INIT_LIST_HEAD(&net->net_ni_added);
+ INIT_LIST_HEAD(&net->net_ni_zombie);
+
+ net->net_id = net_id;
+ net->net_state = LNET_NET_STATE_INIT;
+
+ /* initialize global paramters to undefiend */
+ net->net_tunables.lct_peer_timeout = -1;
+ net->net_tunables.lct_max_tx_credits = -1;
+ net->net_tunables.lct_peer_tx_credits = -1;
+ net->net_tunables.lct_peer_rtr_credits = -1;
+
+ if (net_list)
+ list_add_tail(&net->net_list, net_list);
+
+ return net;
+}
+
+static int
+lnet_ni_add_interface(struct lnet_ni *ni, char *iface)
+{
+ int niface = 0;
+
+ if (ni == NULL)
+ return -ENOMEM;
+
+ if (!lnet_ni_unique_ni(ni->ni_interfaces, iface))
+ return -EINVAL;
+
+ /* Allocate a separate piece of memory and copy
+ * into it the string, so we don't have
+ * a depencency on the tokens string. This way we
+ * can free the tokens at the end of the function.
+ * The newly allocated ni_interfaces[] can be
+ * freed when freeing the NI */
+ while (niface < LNET_MAX_INTERFACES &&
+ ni->ni_interfaces[niface] != NULL)
+ niface++;
+
+ if (niface >= LNET_MAX_INTERFACES) {
+ LCONSOLE_ERROR_MSG(0x115, "Too many interfaces "
+ "for net %s\n",
+ libcfs_net2str(LNET_NIDNET(ni->ni_nid)));
+ return -EINVAL;
+ }
+
+ LIBCFS_ALLOC(ni->ni_interfaces[niface],
+ strlen(iface) + 1);
+
+ if (ni->ni_interfaces[niface] == NULL) {
+ CERROR("Can't allocate net interface name\n");
+ return -ENOMEM;
+ }
+
+ strncpy(ni->ni_interfaces[niface], iface,
+ strlen(iface) + 1);
+
+ return 0;
+}
+
+static struct lnet_ni *
+lnet_ni_alloc_common(struct lnet_net *net, char *iface)
{
struct lnet_tx_queue *tq;
struct lnet_ni *ni;
- int rc;
int i;
- if (!lnet_net_unique(net, nilist)) {
- LCONSOLE_ERROR_MSG(0x111, "Duplicate network specified: %s\n",
- libcfs_net2str(net));
- return NULL;
- }
+ if (iface != NULL)
+ /* make sure that this NI is unique in the net it's
+ * being added to */
+ if (!lnet_ni_unique_net(&net->net_ni_added, iface))
+ return NULL;
LIBCFS_ALLOC(ni, sizeof(*ni));
if (ni == NULL) {
- CERROR("Out of memory creating network %s\n",
- libcfs_net2str(net));
+ CERROR("Out of memory creating network interface %s%s\n",
+ libcfs_net2str(net->net_id),
+ (iface != NULL) ? iface : "");
return NULL;
}
spin_lock_init(&ni->ni_lock);
INIT_LIST_HEAD(&ni->ni_cptlist);
+ INIT_LIST_HEAD(&ni->ni_netlist);
ni->ni_refs = cfs_percpt_alloc(lnet_cpt_table(),
sizeof(*ni->ni_refs[0]));
if (ni->ni_refs == NULL)
cfs_percpt_for_each(tq, i, ni->ni_tx_queues)
INIT_LIST_HEAD(&tq->tq_delayed);
- if (el == NULL) {
+ ni->ni_net = net;
+ /* LND will fill in the address part of the NID */
+ ni->ni_nid = LNET_MKNID(net->net_id, 0);
+
+ /* Store net namespace in which current ni is being created */
+ if (current->nsproxy->net_ns != NULL)
+ ni->ni_net_ns = get_net(current->nsproxy->net_ns);
+ else
+ ni->ni_net_ns = NULL;
+
+ ni->ni_last_alive = ktime_get_real_seconds();
+ ni->ni_state = LNET_NI_STATE_INIT;
+ list_add_tail(&ni->ni_netlist, &net->net_ni_added);
+
+ /*
+ * if an interface name is provided then make sure to add in that
+ * interface name in NI
+ */
+ if (iface)
+ if (lnet_ni_add_interface(ni, iface) != 0)
+ goto failed;
+
+ return ni;
+failed:
+ lnet_ni_free(ni);
+ return NULL;
+}
+
+/* allocate and add to the provided network */
+struct lnet_ni *
+lnet_ni_alloc(struct lnet_net *net, struct cfs_expr_list *el, char *iface)
+{
+ struct lnet_ni *ni;
+ int rc;
+
+ ni = lnet_ni_alloc_common(net, iface);
+ if (!ni)
+ return NULL;
+
+ if (!el) {
ni->ni_cpts = NULL;
ni->ni_ncpts = LNET_CPT_NUMBER;
} else {
rc = cfs_expr_list_values(el, LNET_CPT_NUMBER, &ni->ni_cpts);
if (rc <= 0) {
- CERROR("Failed to set CPTs for NI %s: %d\n",
- libcfs_net2str(net), rc);
+ CERROR("Failed to set CPTs for NI %s(%s): %d\n",
+ libcfs_net2str(net->net_id),
+ (iface != NULL) ? iface : "", rc);
goto failed;
}
ni->ni_ncpts = rc;
}
- /* LND will fill in the address part of the NID */
- ni->ni_nid = LNET_MKNID(net, 0);
+ rc = lnet_net_append_cpts(ni->ni_cpts, ni->ni_ncpts, net);
+ if (rc != 0)
+ goto failed;
- /* Store net namespace in which current ni is being created */
- if (current->nsproxy->net_ns != NULL)
- ni->ni_net_ns = get_net(current->nsproxy->net_ns);
- else
- ni->ni_net_ns = NULL;
+ return ni;
+failed:
+ lnet_ni_free(ni);
+ return NULL;
+}
+
+struct lnet_ni *
+lnet_ni_alloc_w_cpt_array(struct lnet_net *net, __u32 *cpts, __u32 ncpts,
+ char *iface)
+{
+ struct lnet_ni *ni;
+ int rc;
+
+ ni = lnet_ni_alloc_common(net, iface);
+ if (!ni)
+ return NULL;
+
+ if (ncpts == 0) {
+ ni->ni_cpts = NULL;
+ ni->ni_ncpts = LNET_CPT_NUMBER;
+ } else {
+ size_t array_size = ncpts * sizeof(ni->ni_cpts[0]);
+ LIBCFS_ALLOC(ni->ni_cpts, array_size);
+ if (ni->ni_cpts == NULL)
+ goto failed;
+ memcpy(ni->ni_cpts, cpts, array_size);
+ ni->ni_ncpts = ncpts;
+ }
+
+ rc = lnet_net_append_cpts(ni->ni_cpts, ni->ni_ncpts, net);
+ if (rc != 0)
+ goto failed;
- ni->ni_last_alive = ktime_get_real_seconds();
- list_add_tail(&ni->ni_list, nilist);
return ni;
- failed:
+failed:
lnet_ni_free(ni);
return NULL;
}
+/*
+ * Parse the networks string and create the matching set of NIs on the
+ * nilist.
+ */
int
-lnet_parse_networks(struct list_head *nilist, char *networks)
+lnet_parse_networks(struct list_head *netlist, char *networks,
+ bool use_tcp_bonding)
{
- struct cfs_expr_list *el = NULL;
+ struct cfs_expr_list *net_el = NULL;
+ struct cfs_expr_list *ni_el = NULL;
int tokensize;
char *tokens;
char *str;
- char *tmp;
- struct lnet_ni *ni;
- __u32 net;
+ struct lnet_net *net;
+ struct lnet_ni *ni = NULL;
+ __u32 net_id;
int nnets = 0;
- struct list_head *temp_node;
if (networks == NULL) {
CERROR("networks string is undefined\n");
}
memcpy(tokens, networks, tokensize);
- str = tmp = tokens;
-
- while (str != NULL && *str != 0) {
- char *comma = strchr(str, ',');
- char *bracket = strchr(str, '(');
- char *square = strchr(str, '[');
- char *iface;
- int niface;
- int rc;
-
- /* NB we don't check interface conflicts here; it's the LNDs
- * responsibility (if it cares at all) */
-
- if (square != NULL && (comma == NULL || square < comma)) {
- /* i.e: o2ib0(ib0)[1,2], number between square
- * brackets are CPTs this NI needs to be bond */
- if (bracket != NULL && bracket > square) {
- tmp = square;
+ str = tokens;
+
+ /*
+ * Main parser loop.
+ *
+ * NB we don't check interface conflicts here; it's the LNDs
+ * responsibility (if it cares at all)
+ */
+ do {
+ char *nistr;
+ char *elstr;
+ char *name;
+ int rc;
+
+ /*
+ * Parse a network string into its components.
+ *
+ * <name>{"("...")"}{"["<el>"]"}
+ */
+
+ /* Network name (mandatory) */
+ while (isspace(*str))
+ *str++ = '\0';
+ if (!*str)
+ break;
+ name = str;
+ str += strcspn(str, SPACESTR ":()[],");
+ while (isspace(*str))
+ *str++ = '\0';
+
+ /* Interface list (optional) */
+ if (*str == '(') {
+ *str++ = '\0';
+ nistr = str;
+ str += strcspn(str, ")");
+ if (*str != ')') {
+ str = nistr;
goto failed_syntax;
}
+ do {
+ *str++ = '\0';
+ } while (isspace(*str));
+ } else {
+ nistr = NULL;
+ }
- tmp = strchr(square, ']');
- if (tmp == NULL) {
- tmp = square;
+ /* CPT expression (optional) */
+ if (*str == '[') {
+ elstr = str;
+ str += strcspn(str, "]");
+ if (*str != ']') {
+ str = elstr;
goto failed_syntax;
}
-
- rc = cfs_expr_list_parse(square, tmp - square + 1,
- 0, LNET_CPT_NUMBER - 1, &el);
+ rc = cfs_expr_list_parse(elstr, str - elstr + 1,
+ 0, LNET_CPT_NUMBER - 1,
+ &net_el);
if (rc != 0) {
- tmp = square;
+ str = elstr;
goto failed_syntax;
}
-
- while (square <= tmp)
- *square++ = ' ';
+ *elstr = '\0';
+ do {
+ *str++ = '\0';
+ } while (isspace(*str));
}
- if (bracket == NULL ||
- (comma != NULL && comma < bracket)) {
-
- /* no interface list specified */
-
- if (comma != NULL)
- *comma++ = 0;
- net = libcfs_str2net(cfs_trimwhite(str));
-
- if (net == LNET_NIDNET(LNET_NID_ANY)) {
- LCONSOLE_ERROR_MSG(0x113, "Unrecognised network"
- " type\n");
- tmp = str;
- goto failed_syntax;
- }
+ /* Bad delimiters */
+ if (*str && (strchr(DELIMITERS, *str) != NULL))
+ goto failed_syntax;
- if (LNET_NETTYP(net) != LOLND && /* LO is implicit */
- lnet_ni_alloc(net, el, nilist) == NULL)
- goto failed;
+ /* go to the next net if it exits */
+ str += strcspn(str, ",");
+ if (*str == ',')
+ *str++ = '\0';
+
+ /*
+ * At this point the name is properly terminated.
+ */
+ net_id = libcfs_str2net(name);
+ if (net_id == LNET_NIDNET(LNET_NID_ANY)) {
+ LCONSOLE_ERROR_MSG(0x113,
+ "Unrecognised network type\n");
+ str = name;
+ goto failed_syntax;
+ }
- if (el != NULL) {
- cfs_expr_list_free(el);
- el = NULL;
+ if (LNET_NETTYP(net_id) == LOLND) {
+ /* Loopback is implicit, and there can be only one. */
+ if (net_el) {
+ cfs_expr_list_free(net_el);
+ net_el = NULL;
}
-
- str = comma;
+ /* Should we error out instead? */
continue;
}
- *bracket = 0;
- net = libcfs_str2net(cfs_trimwhite(str));
- if (net == LNET_NIDNET(LNET_NID_ANY)) {
- tmp = str;
- goto failed_syntax;
- }
+ /*
+ * All network paramaters are now known.
+ */
+ nnets++;
- ni = lnet_ni_alloc(net, el, nilist);
- if (ni == NULL)
+ /* always allocate a net, since we will eventually add an
+ * interface to it, or we will fail, in which case we'll
+ * just delete it */
+ net = lnet_net_alloc(net_id, netlist);
+ if (IS_ERR_OR_NULL(net))
goto failed;
- if (el != NULL) {
- cfs_expr_list_free(el);
- el = NULL;
- }
-
- niface = 0;
- iface = bracket + 1;
+ if (!nistr ||
+ (use_tcp_bonding && LNET_NETTYP(net_id) == SOCKLND)) {
+ /*
+ * No interface list was specified, allocate a
+ * ni using the defaults.
+ */
+ ni = lnet_ni_alloc(net, net_el, NULL);
+ if (IS_ERR_OR_NULL(ni))
+ goto failed;
- bracket = strchr(iface, ')');
- if (bracket == NULL) {
- tmp = iface;
- goto failed_syntax;
+ if (!nistr) {
+ if (net_el) {
+ cfs_expr_list_free(net_el);
+ net_el = NULL;
+ }
+ continue;
+ }
}
- *bracket = 0;
do {
- comma = strchr(iface, ',');
- if (comma != NULL)
- *comma++ = 0;
+ elstr = NULL;
+
+ /* Interface name (mandatory) */
+ while (isspace(*nistr))
+ *nistr++ = '\0';
+ name = nistr;
+ nistr += strcspn(nistr, SPACESTR "[],");
+ while (isspace(*nistr))
+ *nistr++ = '\0';
+
+ /* CPT expression (optional) */
+ if (*nistr == '[') {
+ elstr = nistr;
+ nistr += strcspn(nistr, "]");
+ if (*nistr != ']') {
+ str = elstr;
+ goto failed_syntax;
+ }
+ rc = cfs_expr_list_parse(elstr,
+ nistr - elstr + 1,
+ 0, LNET_CPT_NUMBER - 1,
+ &ni_el);
+ if (rc != 0) {
+ str = elstr;
+ goto failed_syntax;
+ }
+ *elstr = '\0';
+ do {
+ *nistr++ = '\0';
+ } while (isspace(*nistr));
+ } else {
+ ni_el = net_el;
+ }
- iface = cfs_trimwhite(iface);
- if (*iface == 0) {
- tmp = iface;
+ /*
+ * End of single interface specificaton,
+ * advance to the start of the next one, if
+ * any.
+ */
+ if (*nistr == ',') {
+ do {
+ *nistr++ = '\0';
+ } while (isspace(*nistr));
+ if (!*nistr) {
+ str = nistr;
+ goto failed_syntax;
+ }
+ } else if (*nistr) {
+ str = nistr;
goto failed_syntax;
}
- if (niface == LNET_MAX_INTERFACES) {
- LCONSOLE_ERROR_MSG(0x115, "Too many interfaces "
- "for net %s\n",
- libcfs_net2str(net));
- goto failed;
+ /*
+ * At this point the name is properly terminated.
+ */
+ if (!*name) {
+ str = name;
+ goto failed_syntax;
}
- /* Allocate a separate piece of memory and copy
- * into it the string, so we don't have
- * a depencency on the tokens string. This way we
- * can free the tokens at the end of the function.
- * The newly allocated ni_interfaces[] can be
- * freed when freeing the NI */
- LIBCFS_ALLOC(ni->ni_interfaces[niface],
- strlen(iface) + 1);
- if (ni->ni_interfaces[niface] == NULL) {
- CERROR("Can't allocate net interface name\n");
- goto failed;
+ if (use_tcp_bonding &&
+ LNET_NETTYP(net->net_id) == SOCKLND) {
+ rc = lnet_ni_add_interface(ni, name);
+ if (rc != 0)
+ goto failed;
+ } else {
+ ni = lnet_ni_alloc(net, ni_el, name);
+ if (IS_ERR_OR_NULL(ni))
+ goto failed;
}
- strncpy(ni->ni_interfaces[niface], iface,
- strlen(iface));
- niface++;
- iface = comma;
- } while (iface != NULL);
-
- str = bracket + 1;
- comma = strchr(bracket + 1, ',');
- if (comma != NULL) {
- *comma = 0;
- str = cfs_trimwhite(str);
- if (*str != 0) {
- tmp = str;
- goto failed_syntax;
+
+ if (ni_el) {
+ if (ni_el != net_el) {
+ cfs_expr_list_free(ni_el);
+ ni_el = NULL;
+ }
}
- str = comma + 1;
- continue;
- }
+ } while (*nistr);
- str = cfs_trimwhite(str);
- if (*str != 0) {
- tmp = str;
- goto failed_syntax;
+ if (net_el) {
+ cfs_expr_list_free(net_el);
+ net_el = NULL;
}
- }
-
- list_for_each(temp_node, nilist)
- nnets++;
+ } while (*str);
LIBCFS_FREE(tokens, tokensize);
return nnets;
failed_syntax:
- lnet_syntax("networks", networks, (int)(tmp - tokens), strlen(tmp));
+ lnet_syntax("networks", networks, (int)(str - tokens), strlen(str));
failed:
- while (!list_empty(nilist)) {
- ni = list_entry(nilist->next, lnet_ni_t, ni_list);
+ /* free the net list and all the nis on each net */
+ while (!list_empty(netlist)) {
+ net = list_entry(netlist->next, struct lnet_net, net_list);
- list_del(&ni->ni_list);
- lnet_ni_free(ni);
+ list_del_init(&net->net_list);
+ lnet_net_free(net);
}
- if (el != NULL)
- cfs_expr_list_free(el);
+ if (ni_el && ni_el != net_el)
+ cfs_expr_list_free(ni_el);
+ if (net_el)
+ cfs_expr_list_free(net_el);
LIBCFS_FREE(tokens, tokensize);
lnet_md_free(md);
}
+int
+lnet_cpt_of_md(lnet_libmd_t *md)
+{
+ int cpt = CFS_CPT_ANY;
+
+ if (!md)
+ return CFS_CPT_ANY;
+
+ if ((md->md_options & LNET_MD_BULK_HANDLE) != 0 &&
+ !LNetHandleIsInvalid(md->md_bulk_handle)) {
+ md = lnet_handle2md(&md->md_bulk_handle);
+
+ if (!md)
+ return CFS_CPT_ANY;
+ }
+
+ if ((md->md_options & LNET_MD_KIOV) != 0) {
+ if (md->md_iov.kiov[0].kiov_page != NULL)
+ cpt = cfs_cpt_of_node(lnet_cpt_table(),
+ page_to_nid(md->md_iov.kiov[0].kiov_page));
+ } else if (md->md_iov.iov[0].iov_base != NULL) {
+ cpt = cfs_cpt_of_node(lnet_cpt_table(),
+ page_to_nid(virt_to_page(md->md_iov.iov[0].iov_base)));
+ }
+
+ return cpt;
+}
+
static int
lnet_md_build(lnet_libmd_t *lmd, lnet_md_t *umd, int unlink)
{
lmd->md_threshold = umd->threshold;
lmd->md_refcount = 0;
lmd->md_flags = (unlink == LNET_UNLINK) ? LNET_MD_FLAG_AUTO_UNLINK : 0;
+ lmd->md_bulk_handle = umd->bulk_handle;
if ((umd->options & LNET_MD_IOVEC) != 0) {
iov = msg->msg_iov;
kiov = msg->msg_kiov;
- LASSERT(niov > 0);
- LASSERT((iov == NULL) != (kiov == NULL));
+ LASSERT (niov > 0);
+ LASSERT ((iov == NULL) != (kiov == NULL));
}
}
- rc = (ni->ni_lnd->lnd_recv)(ni, private, msg, delayed,
- niov, iov, kiov, offset, mlen, rlen);
+ rc = (ni->ni_net->net_lnd->lnd_recv)(ni, private, msg, delayed,
+ niov, iov, kiov, offset, mlen,
+ rlen);
if (rc < 0)
lnet_finalize(ni, msg, rc);
}
if (len != 0)
lnet_setpayloadbuffer(msg);
- memset(&msg->msg_hdr, 0, sizeof(msg->msg_hdr));
- msg->msg_hdr.type = cpu_to_le32(type);
- msg->msg_hdr.dest_nid = cpu_to_le64(target.nid);
- msg->msg_hdr.dest_pid = cpu_to_le32(target.pid);
+ memset (&msg->msg_hdr, 0, sizeof (msg->msg_hdr));
+ msg->msg_hdr.type = cpu_to_le32(type);
+ msg->msg_hdr.dest_pid = cpu_to_le32(target.pid);
/* src_nid will be set later */
- msg->msg_hdr.src_pid = cpu_to_le32(the_lnet.ln_pid);
+ msg->msg_hdr.src_pid = cpu_to_le32(the_lnet.ln_pid);
msg->msg_hdr.payload_length = cpu_to_le32(len);
}
LASSERT (LNET_NETTYP(LNET_NIDNET(ni->ni_nid)) == LOLND ||
(msg->msg_txcredit && msg->msg_peertxcredit));
- rc = (ni->ni_lnd->lnd_send)(ni, priv, msg);
+ rc = (ni->ni_net->net_lnd->lnd_send)(ni, priv, msg);
if (rc < 0)
lnet_finalize(ni, msg, rc);
}
LASSERT(!msg->msg_sending);
LASSERT(msg->msg_receiving);
LASSERT(!msg->msg_rx_ready_delay);
- LASSERT(ni->ni_lnd->lnd_eager_recv != NULL);
+ LASSERT(ni->ni_net->net_lnd->lnd_eager_recv != NULL);
msg->msg_rx_ready_delay = 1;
- rc = (ni->ni_lnd->lnd_eager_recv)(ni, msg->msg_private, msg,
- &msg->msg_private);
+ rc = (ni->ni_net->net_lnd->lnd_eager_recv)(ni, msg->msg_private, msg,
+ &msg->msg_private);
if (rc != 0) {
CERROR("recv from %s / send to %s aborted: "
"eager_recv failed %d\n",
- libcfs_nid2str(msg->msg_rxpeer->lp_nid),
+ libcfs_nid2str(msg->msg_rxpeer->lpni_nid),
libcfs_id2str(msg->msg_target), rc);
LASSERT(rc < 0); /* required by my callers */
}
return rc;
}
-/* NB: caller shall hold a ref on 'lp' as I'd drop lnet_net_lock */
+/*
+ * This function can be called from two paths:
+ * 1. when sending a message
+ * 2. when decommiting a message (lnet_msg_decommit_tx())
+ * In both these cases the peer_ni should have it's reference count
+ * acquired by the caller and therefore it is safe to drop the spin
+ * lock before calling lnd_query()
+ */
static void
-lnet_ni_query_locked(lnet_ni_t *ni, lnet_peer_t *lp)
+lnet_ni_query_locked(lnet_ni_t *ni, struct lnet_peer_ni *lp)
{
cfs_time_t last_alive = 0;
+ int cpt = lnet_cpt_of_nid_locked(lp->lpni_nid, ni);
LASSERT(lnet_peer_aliveness_enabled(lp));
- LASSERT(ni->ni_lnd->lnd_query != NULL);
+ LASSERT(ni->ni_net->net_lnd->lnd_query != NULL);
- lnet_net_unlock(lp->lp_cpt);
- (ni->ni_lnd->lnd_query)(ni, lp->lp_nid, &last_alive);
- lnet_net_lock(lp->lp_cpt);
+ lnet_net_unlock(cpt);
+ (ni->ni_net->net_lnd->lnd_query)(ni, lp->lpni_nid, &last_alive);
+ lnet_net_lock(cpt);
- lp->lp_last_query = cfs_time_current();
+ lp->lpni_last_query = cfs_time_current();
if (last_alive != 0) /* NI has updated timestamp */
- lp->lp_last_alive = last_alive;
+ lp->lpni_last_alive = last_alive;
}
/* NB: always called with lnet_net_lock held */
static inline int
-lnet_peer_is_alive (lnet_peer_t *lp, cfs_time_t now)
+lnet_peer_is_alive (struct lnet_peer_ni *lp, cfs_time_t now)
{
- int alive;
+ int alive;
cfs_time_t deadline;
- LASSERT(lnet_peer_aliveness_enabled(lp));
+ LASSERT (lnet_peer_aliveness_enabled(lp));
- /* Trust lnet_notify() if it has more recent aliveness news, but
+ /*
+ * Trust lnet_notify() if it has more recent aliveness news, but
* ignore the initial assumed death (see lnet_peers_start_down()).
*/
- if (!lp->lp_alive && lp->lp_alive_count > 0 &&
- cfs_time_aftereq(lp->lp_timestamp, lp->lp_last_alive))
+ spin_lock(&lp->lpni_lock);
+ if (!lp->lpni_alive && lp->lpni_alive_count > 0 &&
+ cfs_time_aftereq(lp->lpni_timestamp, lp->lpni_last_alive)) {
+ spin_unlock(&lp->lpni_lock);
return 0;
+ }
- deadline = cfs_time_add(lp->lp_last_alive,
- cfs_time_seconds(lp->lp_ni->ni_peertimeout));
+ deadline =
+ cfs_time_add(lp->lpni_last_alive,
+ cfs_time_seconds(lp->lpni_net->net_tunables.
+ lct_peer_timeout));
alive = cfs_time_after(deadline, now);
- /* Update obsolete lp_alive except for routers assumed to be dead
+ /*
+ * Update obsolete lp_alive except for routers assumed to be dead
* initially, because router checker would update aliveness in this
- * case, and moreover lp_last_alive at peer creation is assumed.
+ * case, and moreover lpni_last_alive at peer creation is assumed.
*/
- if (alive && !lp->lp_alive &&
- !(lnet_isrouter(lp) && lp->lp_alive_count == 0))
- lnet_notify_locked(lp, 0, 1, lp->lp_last_alive);
+ if (alive && !lp->lpni_alive &&
+ !(lnet_isrouter(lp) && lp->lpni_alive_count == 0)) {
+ spin_unlock(&lp->lpni_lock);
+ lnet_notify_locked(lp, 0, 1, lp->lpni_last_alive);
+ } else {
+ spin_unlock(&lp->lpni_lock);
+ }
return alive;
}
/* NB: returns 1 when alive, 0 when dead, negative when error;
* may drop the lnet_net_lock */
static int
-lnet_peer_alive_locked (lnet_peer_t *lp)
+lnet_peer_alive_locked (struct lnet_ni *ni, struct lnet_peer_ni *lp)
{
cfs_time_t now = cfs_time_current();
if (lnet_peer_is_alive(lp, now))
return 1;
- /* Peer appears dead, but we should avoid frequent NI queries (at
- * most once per lnet_queryinterval seconds). */
- if (lp->lp_last_query != 0) {
+ /*
+ * Peer appears dead, but we should avoid frequent NI queries (at
+ * most once per lnet_queryinterval seconds).
+ */
+ if (lp->lpni_last_query != 0) {
static const int lnet_queryinterval = 1;
cfs_time_t next_query =
- cfs_time_add(lp->lp_last_query,
+ cfs_time_add(lp->lpni_last_query,
cfs_time_seconds(lnet_queryinterval));
if (cfs_time_before(now, next_query)) {
- if (lp->lp_alive)
+ if (lp->lpni_alive)
CWARN("Unexpected aliveness of peer %s: "
"%d < %d (%d/%d)\n",
- libcfs_nid2str(lp->lp_nid),
+ libcfs_nid2str(lp->lpni_nid),
(int)now, (int)next_query,
lnet_queryinterval,
- lp->lp_ni->ni_peertimeout);
+ lp->lpni_net->net_tunables.lct_peer_timeout);
return 0;
}
}
/* query NI for latest aliveness news */
- lnet_ni_query_locked(lp->lp_ni, lp);
+ lnet_ni_query_locked(ni, lp);
if (lnet_peer_is_alive(lp, now))
return 1;
- lnet_notify_locked(lp, 0, 0, lp->lp_last_alive);
+ lnet_notify_locked(lp, 0, 0, lp->lpni_last_alive);
return 0;
}
static int
lnet_post_send_locked(lnet_msg_t *msg, int do_send)
{
- lnet_peer_t *lp = msg->msg_txpeer;
- lnet_ni_t *ni = lp->lp_ni;
+ struct lnet_peer_ni *lp = msg->msg_txpeer;
+ struct lnet_ni *ni = msg->msg_txni;
int cpt = msg->msg_tx_cpt;
struct lnet_tx_queue *tq = ni->ni_tx_queues[cpt];
/* NB 'lp' is always the next hop */
if ((msg->msg_target.pid & LNET_PID_USERFLAG) == 0 &&
- lnet_peer_alive_locked(lp) == 0) {
+ lnet_peer_alive_locked(ni, lp) == 0) {
the_lnet.ln_counters[cpt]->drop_count++;
the_lnet.ln_counters[cpt]->drop_length += msg->msg_len;
lnet_net_unlock(cpt);
+ if (msg->msg_txpeer)
+ atomic_inc(&msg->msg_txpeer->lpni_stats.drop_count);
+ if (msg->msg_txni)
+ atomic_inc(&msg->msg_txni->ni_stats.drop_count);
CNETERR("Dropping message for %s: peer not alive\n",
libcfs_id2str(msg->msg_target));
}
if (!msg->msg_peertxcredit) {
- LASSERT((lp->lp_txcredits < 0) ==
- !list_empty(&lp->lp_txq));
+ spin_lock(&lp->lpni_lock);
+ LASSERT((lp->lpni_txcredits < 0) ==
+ !list_empty(&lp->lpni_txq));
msg->msg_peertxcredit = 1;
- lp->lp_txqnob += msg->msg_len + sizeof(lnet_hdr_t);
- lp->lp_txcredits--;
+ lp->lpni_txqnob += msg->msg_len + sizeof(lnet_hdr_t);
+ lp->lpni_txcredits--;
- if (lp->lp_txcredits < lp->lp_mintxcredits)
- lp->lp_mintxcredits = lp->lp_txcredits;
+ if (lp->lpni_txcredits < lp->lpni_mintxcredits)
+ lp->lpni_mintxcredits = lp->lpni_txcredits;
- if (lp->lp_txcredits < 0) {
+ if (lp->lpni_txcredits < 0) {
msg->msg_tx_delayed = 1;
- list_add_tail(&msg->msg_list, &lp->lp_txq);
+ list_add_tail(&msg->msg_list, &lp->lpni_txq);
+ spin_unlock(&lp->lpni_lock);
return LNET_CREDIT_WAIT;
}
+ spin_unlock(&lp->lpni_lock);
}
if (!msg->msg_txcredit) {
msg->msg_txcredit = 1;
tq->tq_credits--;
+ atomic_dec(&ni->ni_tx_credits);
if (tq->tq_credits < tq->tq_credits_min)
tq->tq_credits_min = tq->tq_credits;
* sets do_recv FALSE and I don't do the unlock/send/lock bit.
* I return LNET_CREDIT_WAIT if msg blocked and LNET_CREDIT_OK if
* received or OK to receive */
- lnet_peer_t *lp = msg->msg_rxpeer;
+ struct lnet_peer_ni *lp = msg->msg_rxpeer;
lnet_rtrbufpool_t *rbp;
- lnet_rtrbuf_t *rb;
+ lnet_rtrbuf_t *rb;
- LASSERT(msg->msg_iov == NULL);
- LASSERT(msg->msg_kiov == NULL);
- LASSERT(msg->msg_niov == 0);
- LASSERT(msg->msg_routing);
- LASSERT(msg->msg_receiving);
- LASSERT(!msg->msg_sending);
+ LASSERT (msg->msg_iov == NULL);
+ LASSERT (msg->msg_kiov == NULL);
+ LASSERT (msg->msg_niov == 0);
+ LASSERT (msg->msg_routing);
+ LASSERT (msg->msg_receiving);
+ LASSERT (!msg->msg_sending);
/* non-lnet_parse callers only receive delayed messages */
LASSERT(!do_recv || msg->msg_rx_delayed);
if (!msg->msg_peerrtrcredit) {
- LASSERT((lp->lp_rtrcredits < 0) ==
- !list_empty(&lp->lp_rtrq));
+ spin_lock(&lp->lpni_lock);
+ LASSERT((lp->lpni_rtrcredits < 0) ==
+ !list_empty(&lp->lpni_rtrq));
msg->msg_peerrtrcredit = 1;
- lp->lp_rtrcredits--;
- if (lp->lp_rtrcredits < lp->lp_minrtrcredits)
- lp->lp_minrtrcredits = lp->lp_rtrcredits;
+ lp->lpni_rtrcredits--;
+ if (lp->lpni_rtrcredits < lp->lpni_minrtrcredits)
+ lp->lpni_minrtrcredits = lp->lpni_rtrcredits;
- if (lp->lp_rtrcredits < 0) {
+ if (lp->lpni_rtrcredits < 0) {
/* must have checked eager_recv before here */
LASSERT(msg->msg_rx_ready_delay);
msg->msg_rx_delayed = 1;
- list_add_tail(&msg->msg_list, &lp->lp_rtrq);
+ list_add_tail(&msg->msg_list, &lp->lpni_rtrq);
+ spin_unlock(&lp->lpni_lock);
return LNET_CREDIT_WAIT;
}
+ spin_unlock(&lp->lpni_lock);
}
rbp = lnet_msg2bufpool(msg);
int cpt = msg->msg_rx_cpt;
lnet_net_unlock(cpt);
- lnet_ni_recv(lp->lp_ni, msg->msg_private, msg, 1,
+ lnet_ni_recv(msg->msg_rxni, msg->msg_private, msg, 1,
0, msg->msg_len, msg->msg_len);
lnet_net_lock(cpt);
}
void
lnet_return_tx_credits_locked(lnet_msg_t *msg)
{
- lnet_peer_t *txpeer = msg->msg_txpeer;
- lnet_msg_t *msg2;
+ struct lnet_peer_ni *txpeer = msg->msg_txpeer;
+ struct lnet_ni *txni = msg->msg_txni;
+ lnet_msg_t *msg2;
if (msg->msg_txcredit) {
- struct lnet_ni *ni = txpeer->lp_ni;
+ struct lnet_ni *ni = msg->msg_txni;
struct lnet_tx_queue *tq = ni->ni_tx_queues[msg->msg_tx_cpt];
/* give back NI txcredits */
!list_empty(&tq->tq_delayed));
tq->tq_credits++;
+ atomic_inc(&ni->ni_tx_credits);
if (tq->tq_credits <= 0) {
msg2 = list_entry(tq->tq_delayed.next,
lnet_msg_t, msg_list);
list_del(&msg2->msg_list);
- LASSERT(msg2->msg_txpeer->lp_ni == ni);
+ LASSERT(msg2->msg_txni == ni);
LASSERT(msg2->msg_tx_delayed);
+ LASSERT(msg2->msg_tx_cpt == msg->msg_tx_cpt);
(void) lnet_post_send_locked(msg2, 1);
}
/* give back peer txcredits */
msg->msg_peertxcredit = 0;
- LASSERT((txpeer->lp_txcredits < 0) ==
- !list_empty(&txpeer->lp_txq));
+ spin_lock(&txpeer->lpni_lock);
+ LASSERT((txpeer->lpni_txcredits < 0) ==
+ !list_empty(&txpeer->lpni_txq));
- txpeer->lp_txqnob -= msg->msg_len + sizeof(lnet_hdr_t);
- LASSERT(txpeer->lp_txqnob >= 0);
+ txpeer->lpni_txqnob -= msg->msg_len + sizeof(lnet_hdr_t);
+ LASSERT(txpeer->lpni_txqnob >= 0);
- txpeer->lp_txcredits++;
- if (txpeer->lp_txcredits <= 0) {
- msg2 = list_entry(txpeer->lp_txq.next,
- lnet_msg_t, msg_list);
+ txpeer->lpni_txcredits++;
+ if (txpeer->lpni_txcredits <= 0) {
+ msg2 = list_entry(txpeer->lpni_txq.next,
+ lnet_msg_t, msg_list);
list_del(&msg2->msg_list);
+ spin_unlock(&txpeer->lpni_lock);
LASSERT(msg2->msg_txpeer == txpeer);
LASSERT(msg2->msg_tx_delayed);
- (void) lnet_post_send_locked(msg2, 1);
+ if (msg2->msg_tx_cpt != msg->msg_tx_cpt) {
+ lnet_net_unlock(msg->msg_tx_cpt);
+ lnet_net_lock(msg2->msg_tx_cpt);
+ }
+ (void) lnet_post_send_locked(msg2, 1);
+ if (msg2->msg_tx_cpt != msg->msg_tx_cpt) {
+ lnet_net_unlock(msg2->msg_tx_cpt);
+ lnet_net_lock(msg->msg_tx_cpt);
+ }
+ } else {
+ spin_unlock(&txpeer->lpni_lock);
}
+ }
+
+ if (txni != NULL) {
+ msg->msg_txni = NULL;
+ lnet_ni_decref_locked(txni, msg->msg_tx_cpt);
}
if (txpeer != NULL) {
+ /*
+ * TODO:
+ * Once the patch for the health comes in we need to set
+ * the health of the peer ni to bad when we fail to send
+ * a message.
+ * int status = msg->msg_ev.status;
+ * if (status != 0)
+ * lnet_set_peer_ni_health_locked(txpeer, false)
+ */
msg->msg_txpeer = NULL;
- lnet_peer_decref_locked(txpeer);
+ lnet_peer_ni_decref_locked(txpeer);
}
}
void
lnet_drop_routed_msgs_locked(struct list_head *list, int cpt)
{
- lnet_msg_t *msg;
- lnet_msg_t *tmp;
- struct list_head drop;
-
- INIT_LIST_HEAD(&drop);
-
- list_splice_init(list, &drop);
+ lnet_msg_t *msg;
+ lnet_msg_t *tmp;
lnet_net_unlock(cpt);
- list_for_each_entry_safe(msg, tmp, &drop, msg_list) {
- lnet_ni_recv(msg->msg_rxpeer->lp_ni, msg->msg_private, NULL,
+ list_for_each_entry_safe(msg, tmp, list, msg_list) {
+ lnet_ni_recv(msg->msg_rxni, msg->msg_private, NULL,
0, 0, 0, msg->msg_hdr.payload_length);
list_del_init(&msg->msg_list);
lnet_finalize(NULL, msg, -ECANCELED);
void
lnet_return_rx_credits_locked(lnet_msg_t *msg)
{
- lnet_peer_t *rxpeer = msg->msg_rxpeer;
- lnet_msg_t *msg2;
+ struct lnet_peer_ni *rxpeer = msg->msg_rxpeer;
+ struct lnet_ni *rxni = msg->msg_rxni;
+ lnet_msg_t *msg2;
if (msg->msg_rtrcredit) {
/* give back global router credits */
/* give back peer router credits */
msg->msg_peerrtrcredit = 0;
- LASSERT((rxpeer->lp_rtrcredits < 0) ==
- !list_empty(&rxpeer->lp_rtrq));
+ spin_lock(&rxpeer->lpni_lock);
+ LASSERT((rxpeer->lpni_rtrcredits < 0) ==
+ !list_empty(&rxpeer->lpni_rtrq));
- rxpeer->lp_rtrcredits++;
+ rxpeer->lpni_rtrcredits++;
/* drop all messages which are queued to be routed on that
* peer. */
if (!the_lnet.ln_routing) {
- lnet_drop_routed_msgs_locked(&rxpeer->lp_rtrq,
- msg->msg_rx_cpt);
- } else if (rxpeer->lp_rtrcredits <= 0) {
- msg2 = list_entry(rxpeer->lp_rtrq.next,
+ struct list_head drop;
+ INIT_LIST_HEAD(&drop);
+ list_splice_init(&rxpeer->lpni_rtrq, &drop);
+ spin_unlock(&rxpeer->lpni_lock);
+ lnet_drop_routed_msgs_locked(&drop, msg->msg_rx_cpt);
+ } else if (rxpeer->lpni_rtrcredits <= 0) {
+ msg2 = list_entry(rxpeer->lpni_rtrq.next,
lnet_msg_t, msg_list);
list_del(&msg2->msg_list);
-
+ spin_unlock(&rxpeer->lpni_lock);
(void) lnet_post_routed_recv_locked(msg2, 1);
+ } else {
+ spin_unlock(&rxpeer->lpni_lock);
}
}
+ if (rxni != NULL) {
+ msg->msg_rxni = NULL;
+ lnet_ni_decref_locked(rxni, msg->msg_rx_cpt);
+ }
if (rxpeer != NULL) {
msg->msg_rxpeer = NULL;
- lnet_peer_decref_locked(rxpeer);
+ lnet_peer_ni_decref_locked(rxpeer);
}
}
static int
+lnet_compare_peers(struct lnet_peer_ni *p1, struct lnet_peer_ni *p2)
+{
+ if (p1->lpni_txqnob < p2->lpni_txqnob)
+ return 1;
+
+ if (p1->lpni_txqnob > p2->lpni_txqnob)
+ return -1;
+
+ if (p1->lpni_txcredits > p2->lpni_txcredits)
+ return 1;
+
+ if (p1->lpni_txcredits < p2->lpni_txcredits)
+ return -1;
+
+ return 0;
+}
+
+static int
lnet_compare_routes(lnet_route_t *r1, lnet_route_t *r2)
{
- lnet_peer_t *p1 = r1->lr_gateway;
- lnet_peer_t *p2 = r2->lr_gateway;
+ struct lnet_peer_ni *p1 = r1->lr_gateway;
+ struct lnet_peer_ni *p2 = r2->lr_gateway;
int r1_hops = (r1->lr_hops == LNET_UNDEFINED_HOPS) ? 1 : r1->lr_hops;
int r2_hops = (r2->lr_hops == LNET_UNDEFINED_HOPS) ? 1 : r2->lr_hops;
+ int rc;
if (r1->lr_priority < r2->lr_priority)
return 1;
if (r1->lr_priority > r2->lr_priority)
- return -ERANGE;
+ return -1;
if (r1_hops < r2_hops)
return 1;
if (r1_hops > r2_hops)
- return -ERANGE;
+ return -1;
- if (p1->lp_txqnob < p2->lp_txqnob)
- return 1;
-
- if (p1->lp_txqnob > p2->lp_txqnob)
- return -ERANGE;
-
- if (p1->lp_txcredits > p2->lp_txcredits)
- return 1;
-
- if (p1->lp_txcredits < p2->lp_txcredits)
- return -ERANGE;
+ rc = lnet_compare_peers(p1, p2);
+ if (rc)
+ return rc;
if (r1->lr_seq - r2->lr_seq <= 0)
return 1;
- return -ERANGE;
+ return -1;
}
-static lnet_peer_t *
-lnet_find_route_locked(lnet_ni_t *ni, lnet_nid_t target, lnet_nid_t rtr_nid)
+static struct lnet_peer_ni *
+lnet_find_route_locked(struct lnet_net *net, lnet_nid_t target,
+ lnet_nid_t rtr_nid)
{
lnet_remotenet_t *rnet;
lnet_route_t *route;
lnet_route_t *best_route;
lnet_route_t *last_route;
- struct lnet_peer *lp_best;
- struct lnet_peer *lp;
+ struct lnet_peer_ni *lpni_best;
+ struct lnet_peer_ni *lp;
int rc;
/* If @rtr_nid is not LNET_NID_ANY, return the gateway with
* rtr_nid nid, otherwise find the best gateway I can use */
- rnet = lnet_find_net_locked(LNET_NIDNET(target));
+ rnet = lnet_find_rnet_locked(LNET_NIDNET(target));
if (rnet == NULL)
return NULL;
- lp_best = NULL;
+ lpni_best = NULL;
best_route = last_route = NULL;
list_for_each_entry(route, &rnet->lrn_routes, lr_list) {
lp = route->lr_gateway;
if (!lnet_is_route_alive(route))
continue;
- if (ni != NULL && lp->lp_ni != ni)
+ if (net != NULL && lp->lpni_net != net)
continue;
- if (lp->lp_nid == rtr_nid) /* it's pre-determined router */
+ if (lp->lpni_nid == rtr_nid) /* it's pre-determined router */
return lp;
- if (lp_best == NULL) {
+ if (lpni_best == NULL) {
best_route = last_route = route;
- lp_best = lp;
+ lpni_best = lp;
continue;
}
continue;
best_route = route;
- lp_best = lp;
+ lpni_best = lp;
}
/* set sequence number on the best router to the latest sequence + 1
* harmless and functional */
if (best_route != NULL)
best_route->lr_seq = last_route->lr_seq + 1;
- return lp_best;
+ return lpni_best;
}
-int
-lnet_send(lnet_nid_t src_nid, lnet_msg_t *msg, lnet_nid_t rtr_nid)
+static struct lnet_ni *
+lnet_get_best_ni(struct lnet_net *local_net, struct lnet_ni *cur_ni,
+ int md_cpt)
{
- lnet_nid_t dst_nid = msg->msg_target.nid;
- struct lnet_ni *src_ni;
- struct lnet_ni *local_ni;
- struct lnet_peer *lp;
- int cpt;
- int cpt2;
- int rc;
+ struct lnet_ni *ni = NULL, *best_ni = cur_ni;
+ unsigned int shortest_distance;
+ int best_credits;
- /* NB: rtr_nid is set to LNET_NID_ANY for all current use-cases,
- * but we might want to use pre-determined router for ACK/REPLY
- * in the future */
- /* NB: ni != NULL == interface pre-determined (ACK/REPLY) */
- LASSERT(msg->msg_txpeer == NULL);
- LASSERT(!msg->msg_sending);
- LASSERT(!msg->msg_target_is_router);
- LASSERT(!msg->msg_receiving);
+ if (best_ni == NULL) {
+ shortest_distance = UINT_MAX;
+ best_credits = INT_MIN;
+ } else {
+ shortest_distance = cfs_cpt_distance(lnet_cpt_table(), md_cpt,
+ best_ni->ni_dev_cpt);
+ best_credits = atomic_read(&best_ni->ni_tx_credits);
+ }
- msg->msg_sending = 1;
+ while ((ni = lnet_get_next_ni_locked(local_net, ni))) {
+ unsigned int distance;
+ int ni_credits;
- LASSERT(!msg->msg_tx_committed);
- cpt = lnet_cpt_of_nid(rtr_nid == LNET_NID_ANY ? dst_nid : rtr_nid);
- again:
- lnet_net_lock(cpt);
+ if (!lnet_is_ni_healthy_locked(ni))
+ continue;
+
+ ni_credits = atomic_read(&ni->ni_tx_credits);
+
+ /*
+ * calculate the distance from the CPT on which
+ * the message memory is allocated to the CPT of
+ * the NI's physical device
+ */
+ distance = cfs_cpt_distance(lnet_cpt_table(),
+ md_cpt,
+ ni->ni_dev_cpt);
+
+ /*
+ * All distances smaller than the NUMA range
+ * are treated equally.
+ */
+ if (distance < lnet_numa_range)
+ distance = lnet_numa_range;
+
+ /*
+ * Select on shorter distance, then available
+ * credits, then round-robin.
+ */
+ if (distance > shortest_distance) {
+ continue;
+ } else if (distance < shortest_distance) {
+ shortest_distance = distance;
+ } else if (ni_credits < best_credits) {
+ continue;
+ } else if (ni_credits == best_credits) {
+ if (best_ni && (best_ni)->ni_seq <= ni->ni_seq)
+ continue;
+ }
+ best_ni = ni;
+ best_credits = ni_credits;
+ }
+
+ return best_ni;
+}
+
+static int
+lnet_select_pathway(lnet_nid_t src_nid, lnet_nid_t dst_nid,
+ struct lnet_msg *msg, lnet_nid_t rtr_nid)
+{
+ struct lnet_ni *best_ni;
+ struct lnet_peer_ni *best_lpni;
+ struct lnet_peer_ni *best_gw;
+ struct lnet_peer_ni *lpni;
+ struct lnet_peer_ni *final_dst;
+ struct lnet_peer *peer;
+ struct lnet_peer_net *peer_net;
+ struct lnet_net *local_net;
+ __u32 seq;
+ int cpt, cpt2, rc;
+ bool routing;
+ bool routing2;
+ bool ni_is_pref;
+ bool preferred;
+ bool local_found;
+ int best_lpni_credits;
+ int md_cpt;
+
+ /*
+ * get an initial CPT to use for locking. The idea here is not to
+ * serialize the calls to select_pathway, so that as many
+ * operations can run concurrently as possible. To do that we use
+ * the CPT where this call is being executed. Later on when we
+ * determine the CPT to use in lnet_message_commit, we switch the
+ * lock and check if there was any configuration change. If none,
+ * then we proceed, if there is, then we restart the operation.
+ */
+ cpt = lnet_net_lock_current();
+
+ md_cpt = lnet_cpt_of_md(msg->msg_md);
+ if (md_cpt == CFS_CPT_ANY)
+ md_cpt = cpt;
+
+again:
+ best_ni = NULL;
+ best_lpni = NULL;
+ best_gw = NULL;
+ final_dst = NULL;
+ local_net = NULL;
+ routing = false;
+ routing2 = false;
+ local_found = false;
+
+ seq = lnet_get_dlc_seq_locked();
if (the_lnet.ln_shutdown) {
lnet_net_unlock(cpt);
return -ESHUTDOWN;
}
- if (src_nid == LNET_NID_ANY) {
- src_ni = NULL;
- } else {
- src_ni = lnet_nid2ni_locked(src_nid, cpt);
- if (src_ni == NULL) {
+ peer = lnet_find_or_create_peer_locked(dst_nid, cpt);
+ if (IS_ERR(peer)) {
+ lnet_net_unlock(cpt);
+ return PTR_ERR(peer);
+ }
+
+ /* If peer is not healthy then can not send anything to it */
+ if (!lnet_is_peer_healthy_locked(peer)) {
+ lnet_net_unlock(cpt);
+ return -EHOSTUNREACH;
+ }
+
+ if (!peer->lp_multi_rail && lnet_get_num_peer_nis(peer) > 1) {
+ CERROR("peer %s is declared to be non MR capable, "
+ "yet configured with more than one NID\n",
+ libcfs_nid2str(dst_nid));
+ return -EINVAL;
+ }
+
+ /*
+ * STEP 1: first jab at determining best_ni
+ * if src_nid is explicitly specified, then best_ni is already
+ * pre-determiend for us. Otherwise we need to select the best
+ * one to use later on
+ */
+ if (src_nid != LNET_NID_ANY) {
+ best_ni = lnet_nid2ni_locked(src_nid, cpt);
+ if (!best_ni) {
lnet_net_unlock(cpt);
LCONSOLE_WARN("Can't send to %s: src %s is not a "
"local nid\n", libcfs_nid2str(dst_nid),
libcfs_nid2str(src_nid));
return -EINVAL;
}
- LASSERT(!msg->msg_routing);
}
- /* Is this for someone on a local network? */
- local_ni = lnet_net2ni_locked(LNET_NIDNET(dst_nid), cpt);
+ if (msg->msg_type == LNET_MSG_REPLY ||
+ msg->msg_type == LNET_MSG_ACK ||
+ !peer->lp_multi_rail ||
+ best_ni) {
+ /*
+ * for replies we want to respond on the same peer_ni we
+ * received the message on if possible. If not, then pick
+ * a peer_ni to send to
+ *
+ * if the peer is non-multi-rail then you want to send to
+ * the dst_nid provided as well.
+ *
+ * If the best_ni has already been determined, IE the
+ * src_nid has been specified, then use the
+ * destination_nid provided as well, since we're
+ * continuing a series of related messages for the same
+ * RPC.
+ *
+ * It is expected to find the lpni using dst_nid, since we
+ * created it earlier.
+ */
+ best_lpni = lnet_find_peer_ni_locked(dst_nid);
+ if (best_lpni)
+ lnet_peer_ni_decref_locked(best_lpni);
+
+ if (best_lpni && !lnet_get_net_locked(LNET_NIDNET(dst_nid))) {
+ /*
+ * this lpni is not on a local network so we need
+ * to route this reply.
+ */
+ best_gw = lnet_find_route_locked(NULL,
+ best_lpni->lpni_nid,
+ rtr_nid);
+ if (best_gw) {
+ /*
+ * RULE: Each node considers only the next-hop
+ *
+ * We're going to route the message, so change the peer to
+ * the router.
+ */
+ LASSERT(best_gw->lpni_peer_net);
+ LASSERT(best_gw->lpni_peer_net->lpn_peer);
+ peer = best_gw->lpni_peer_net->lpn_peer;
+
+ /*
+ * if the router is not multi-rail then use the best_gw
+ * found to send the message to
+ */
+ if (!peer->lp_multi_rail)
+ best_lpni = best_gw;
+ else
+ best_lpni = NULL;
- if (local_ni != NULL) {
- if (src_ni == NULL) {
- src_ni = local_ni;
- src_nid = src_ni->ni_nid;
- } else if (src_ni == local_ni) {
- lnet_ni_decref_locked(local_ni, cpt);
- } else {
- lnet_ni_decref_locked(local_ni, cpt);
- lnet_ni_decref_locked(src_ni, cpt);
+ routing = true;
+ } else {
+ best_lpni = NULL;
+ }
+ } else if (!best_lpni) {
lnet_net_unlock(cpt);
- LCONSOLE_WARN("No route to %s via from %s\n",
- libcfs_nid2str(dst_nid),
- libcfs_nid2str(src_nid));
+ CERROR("unable to send msg_type %d to "
+ "originating %s. Destination NID not in DB\n",
+ msg->msg_type, libcfs_nid2str(dst_nid));
return -EINVAL;
}
+ }
+
+ /*
+ * if the peer is not MR capable, then we should always send to it
+ * using the first NI in the NET we determined.
+ */
+ if (!peer->lp_multi_rail) {
+ if (!best_lpni) {
+ lnet_net_unlock(cpt);
+ CERROR("no route to %s\n",
+ libcfs_nid2str(dst_nid));
+ return -EHOSTUNREACH;
+ }
- LASSERT(src_nid != LNET_NID_ANY);
- lnet_msg_commit(msg, cpt);
+ /* best ni could be set because src_nid was provided */
+ if (!best_ni) {
+ best_ni = lnet_net2ni_locked(best_lpni->lpni_net->net_id, cpt);
+ if (!best_ni) {
+ lnet_net_unlock(cpt);
+ CERROR("no path to %s from net %s\n",
+ libcfs_nid2str(best_lpni->lpni_nid),
+ libcfs_net2str(best_lpni->lpni_net->net_id));
+ return -EHOSTUNREACH;
+ }
+ }
+ }
+ if (best_ni == the_lnet.ln_loni) {
+ /* No send credit hassles with LOLND */
+ lnet_ni_addref_locked(best_ni, cpt);
+ msg->msg_hdr.dest_nid = cpu_to_le64(best_ni->ni_nid);
if (!msg->msg_routing)
- msg->msg_hdr.src_nid = cpu_to_le64(src_nid);
+ msg->msg_hdr.src_nid = cpu_to_le64(best_ni->ni_nid);
+ msg->msg_target.nid = best_ni->ni_nid;
+ lnet_msg_commit(msg, cpt);
+ msg->msg_txni = best_ni;
+ lnet_net_unlock(cpt);
- if (src_ni == the_lnet.ln_loni) {
- /* No send credit hassles with LOLND */
- lnet_net_unlock(cpt);
- lnet_ni_send(src_ni, msg);
+ return LNET_CREDIT_OK;
+ }
- lnet_net_lock(cpt);
- lnet_ni_decref_locked(src_ni, cpt);
- lnet_net_unlock(cpt);
- return 0;
- }
+ /*
+ * if we already found a best_ni because src_nid is specified and
+ * best_lpni because we are replying to a message then just send
+ * the message
+ */
+ if (best_ni && best_lpni)
+ goto send;
- rc = lnet_nid2peer_locked(&lp, dst_nid, cpt);
- /* lp has ref on src_ni; lose mine */
- lnet_ni_decref_locked(src_ni, cpt);
- if (rc != 0) {
- lnet_net_unlock(cpt);
- LCONSOLE_WARN("Error %d finding peer %s\n", rc,
- libcfs_nid2str(dst_nid));
- /* ENOMEM or shutting down */
- return rc;
+ /*
+ * If we already found a best_ni because src_nid is specified then
+ * pick the peer then send the message
+ */
+ if (best_ni)
+ goto pick_peer;
+
+ /*
+ * pick the best_ni by going through all the possible networks of
+ * that peer and see which local NI is best suited to talk to that
+ * peer.
+ *
+ * Locally connected networks will always be preferred over
+ * a routed network. If there are only routed paths to the peer,
+ * then the best route is chosen. If all routes are equal then
+ * they are used in round robin.
+ */
+ list_for_each_entry(peer_net, &peer->lp_peer_nets, lpn_on_peer_list) {
+ if (!lnet_is_peer_net_healthy_locked(peer_net))
+ continue;
+
+ local_net = lnet_get_net_locked(peer_net->lpn_net_id);
+ if (!local_net && !routing && !local_found) {
+ struct lnet_peer_ni *net_gw;
+
+ lpni = list_entry(peer_net->lpn_peer_nis.next,
+ struct lnet_peer_ni,
+ lpni_on_peer_net_list);
+
+ net_gw = lnet_find_route_locked(NULL,
+ lpni->lpni_nid,
+ rtr_nid);
+ if (!net_gw)
+ continue;
+
+ if (best_gw) {
+ /*
+ * lnet_find_route_locked() call
+ * will return the best_Gw on the
+ * lpni->lpni_nid network.
+ * However, best_gw and net_gw can
+ * be on different networks.
+ * Therefore need to compare them
+ * to pick the better of either.
+ */
+ if (lnet_compare_peers(best_gw, net_gw) > 0)
+ continue;
+ if (best_gw->lpni_gw_seq <= net_gw->lpni_gw_seq)
+ continue;
+ }
+ best_gw = net_gw;
+ final_dst = lpni;
+
+ routing2 = true;
+ } else {
+ best_gw = NULL;
+ final_dst = NULL;
+ routing2 = false;
+ local_found = true;
}
- LASSERT(lp->lp_ni == src_ni);
- } else {
- /* sending to a remote network */
- lp = lnet_find_route_locked(src_ni, dst_nid, rtr_nid);
- if (lp == NULL) {
- if (src_ni != NULL)
- lnet_ni_decref_locked(src_ni, cpt);
- lnet_net_unlock(cpt);
- LCONSOLE_WARN("No route to %s via %s "
- "(all routers down)\n",
- libcfs_id2str(msg->msg_target),
- libcfs_nid2str(src_nid));
+ /*
+ * a gw on this network is found, but there could be
+ * other better gateways on other networks. So don't pick
+ * the best_ni until we determine the best_gw.
+ */
+ if (best_gw)
+ continue;
+
+ /* if no local_net found continue */
+ if (!local_net)
+ continue;
+
+ /*
+ * Iterate through the NIs in this local Net and select
+ * the NI to send from. The selection is determined by
+ * these 3 criterion in the following priority:
+ * 1. NUMA
+ * 2. NI available credits
+ * 3. Round Robin
+ */
+ best_ni = lnet_get_best_ni(local_net, best_ni, md_cpt);
+ }
+
+ if (!best_ni && !best_gw) {
+ lnet_net_unlock(cpt);
+ LCONSOLE_WARN("No local ni found to send from to %s\n",
+ libcfs_nid2str(dst_nid));
+ return -EINVAL;
+ }
+
+ if (!best_ni) {
+ best_ni = lnet_get_best_ni(best_gw->lpni_net, best_ni, md_cpt);
+ LASSERT(best_gw && best_ni);
+
+ /*
+ * We're going to route the message, so change the peer to
+ * the router.
+ */
+ LASSERT(best_gw->lpni_peer_net);
+ LASSERT(best_gw->lpni_peer_net->lpn_peer);
+ best_gw->lpni_gw_seq++;
+ peer = best_gw->lpni_peer_net->lpn_peer;
+ }
+
+ /*
+ * Now that we selected the NI to use increment its sequence
+ * number so the Round Robin algorithm will detect that it has
+ * been used and pick the next NI.
+ */
+ best_ni->ni_seq++;
+
+pick_peer:
+ /*
+ * At this point the best_ni is on a local network on which
+ * the peer has a peer_ni as well
+ */
+ peer_net = lnet_peer_get_net_locked(peer,
+ best_ni->ni_net->net_id);
+ /*
+ * peer_net is not available or the src_nid is explicitly defined
+ * and the peer_net for that src_nid is unhealthy. find a route to
+ * the destination nid.
+ */
+ if (!peer_net ||
+ (src_nid != LNET_NID_ANY &&
+ !lnet_is_peer_net_healthy_locked(peer_net))) {
+ best_gw = lnet_find_route_locked(best_ni->ni_net,
+ dst_nid,
+ rtr_nid);
+ /*
+ * if no route is found for that network then
+ * move onto the next peer_ni in the peer
+ */
+ if (!best_gw) {
+ lnet_net_unlock(cpt);
+ LCONSOLE_WARN("No route to peer from %s\n",
+ libcfs_nid2str(best_ni->ni_nid));
return -EHOSTUNREACH;
}
- /* rtr_nid is LNET_NID_ANY or NID of pre-determined router,
- * it's possible that rtr_nid isn't LNET_NID_ANY and lp isn't
- * pre-determined router, this can happen if router table
- * was changed when we release the lock */
- if (rtr_nid != lp->lp_nid) {
- cpt2 = lnet_cpt_of_nid_locked(lp->lp_nid);
- if (cpt2 != cpt) {
- if (src_ni != NULL)
- lnet_ni_decref_locked(src_ni, cpt);
- lnet_net_unlock(cpt);
+ CDEBUG(D_NET, "Best route to %s via %s for %s %d\n",
+ libcfs_nid2str(dst_nid),
+ libcfs_nid2str(best_gw->lpni_nid),
+ lnet_msgtyp2str(msg->msg_type), msg->msg_len);
- rtr_nid = lp->lp_nid;
- cpt = cpt2;
- goto again;
+ routing2 = true;
+ /*
+ * RULE: Each node considers only the next-hop
+ *
+ * We're going to route the message, so change the peer to
+ * the router.
+ */
+ LASSERT(best_gw->lpni_peer_net);
+ LASSERT(best_gw->lpni_peer_net->lpn_peer);
+ peer = best_gw->lpni_peer_net->lpn_peer;
+ } else if (!lnet_is_peer_net_healthy_locked(peer_net)) {
+ /*
+ * this peer_net is unhealthy but we still have an opportunity
+ * to find another peer_net that we can use
+ */
+ __u32 net_id = peer_net->lpn_net_id;
+ LCONSOLE_WARN("peer net %s unhealthy\n",
+ libcfs_net2str(net_id));
+ goto again;
+ }
+
+ /*
+ * Look at the peer NIs for the destination peer that connect
+ * to the chosen net. If a peer_ni is preferred when using the
+ * best_ni to communicate, we use that one. If there is no
+ * preferred peer_ni, or there are multiple preferred peer_ni,
+ * the available transmit credits are used. If the transmit
+ * credits are equal, we round-robin over the peer_ni.
+ */
+ lpni = NULL;
+ best_lpni_credits = INT_MIN;
+ preferred = false;
+ best_lpni = NULL;
+ while ((lpni = lnet_get_next_peer_ni_locked(peer, peer_net, lpni))) {
+ /*
+ * if this peer ni is not healthy just skip it, no point in
+ * examining it further
+ */
+ if (!lnet_is_peer_ni_healthy_locked(lpni))
+ continue;
+ ni_is_pref = lnet_peer_is_ni_pref_locked(lpni, best_ni);
+
+ /* if this is a preferred peer use it */
+ if (!preferred && ni_is_pref) {
+ preferred = true;
+ } else if (preferred && !ni_is_pref) {
+ /*
+ * this is not the preferred peer so let's ignore
+ * it.
+ */
+ continue;
+ } else if (lpni->lpni_txcredits < best_lpni_credits) {
+ /*
+ * We already have a peer that has more credits
+ * available than this one. No need to consider
+ * this peer further.
+ */
+ continue;
+ } else if (lpni->lpni_txcredits == best_lpni_credits) {
+ /*
+ * The best peer found so far and the current peer
+ * have the same number of available credits let's
+ * make sure to select between them using Round
+ * Robin
+ */
+ if (best_lpni) {
+ if (best_lpni->lpni_seq <= lpni->lpni_seq)
+ continue;
}
}
- CDEBUG(D_NET, "Best route to %s via %s for %s %d\n",
- libcfs_nid2str(dst_nid), libcfs_nid2str(lp->lp_nid),
- lnet_msgtyp2str(msg->msg_type), msg->msg_len);
+ best_lpni = lpni;
+ best_lpni_credits = lpni->lpni_txcredits;
+ }
- if (src_ni == NULL) {
- src_ni = lp->lp_ni;
- src_nid = src_ni->ni_nid;
- } else {
- LASSERT(src_ni == lp->lp_ni);
- lnet_ni_decref_locked(src_ni, cpt);
- }
+ /* if we still can't find a peer ni then we can't reach it */
+ if (!best_lpni) {
+ __u32 net_id = (peer_net) ? peer_net->lpn_net_id :
+ LNET_NIDNET(dst_nid);
+ lnet_net_unlock(cpt);
+ LCONSOLE_WARN("no peer_ni found on peer net %s\n",
+ libcfs_net2str(net_id));
+ return -EHOSTUNREACH;
+ }
- lnet_peer_addref_locked(lp);
- LASSERT(src_nid != LNET_NID_ANY);
- lnet_msg_commit(msg, cpt);
+send:
+ routing = routing || routing2;
- if (!msg->msg_routing) {
- /* I'm the source and now I know which NI to send on */
- msg->msg_hdr.src_nid = cpu_to_le64(src_nid);
+ /*
+ * Increment sequence number of the peer selected so that we
+ * pick the next one in Round Robin.
+ */
+ best_lpni->lpni_seq++;
+
+ /*
+ * grab a reference on the peer_ni so it sticks around even if
+ * we need to drop and relock the lnet_net_lock below.
+ */
+ lnet_peer_ni_addref_locked(best_lpni);
+
+ /*
+ * Use lnet_cpt_of_nid() to determine the CPT used to commit the
+ * message. This ensures that we get a CPT that is correct for
+ * the NI when the NI has been restricted to a subset of all CPTs.
+ * If the selected CPT differs from the one currently locked, we
+ * must unlock and relock the lnet_net_lock(), and then check whether
+ * the configuration has changed. We don't have a hold on the best_ni
+ * yet, and it may have vanished.
+ */
+ cpt2 = lnet_cpt_of_nid_locked(best_lpni->lpni_nid, best_ni);
+ if (cpt != cpt2) {
+ lnet_net_unlock(cpt);
+ cpt = cpt2;
+ lnet_net_lock(cpt);
+ if (seq != lnet_get_dlc_seq_locked()) {
+ lnet_peer_ni_decref_locked(best_lpni);
+ goto again;
}
+ }
+
+ /*
+ * store the best_lpni in the message right away to avoid having
+ * to do the same operation under different conditions
+ */
+ msg->msg_txpeer = best_lpni;
+ msg->msg_txni = best_ni;
+
+ /*
+ * grab a reference for the best_ni since now it's in use in this
+ * send. the reference will need to be dropped when the message is
+ * finished in lnet_finalize()
+ */
+ lnet_ni_addref_locked(msg->msg_txni, cpt);
+
+ /*
+ * Always set the target.nid to the best peer picked. Either the
+ * nid will be one of the preconfigured NIDs, or the same NID as
+ * what was originally set in the target or it will be the NID of
+ * a router if this message should be routed
+ */
+ msg->msg_target.nid = msg->msg_txpeer->lpni_nid;
+ /*
+ * lnet_msg_commit assigns the correct cpt to the message, which
+ * is used to decrement the correct refcount on the ni when it's
+ * time to return the credits
+ */
+ lnet_msg_commit(msg, cpt);
+
+ /*
+ * If we are routing the message then we don't need to overwrite
+ * the src_nid since it would've been set at the origin. Otherwise
+ * we are the originator so we need to set it.
+ */
+ if (!msg->msg_routing)
+ msg->msg_hdr.src_nid = cpu_to_le64(msg->msg_txni->ni_nid);
+
+ if (routing) {
msg->msg_target_is_router = 1;
- msg->msg_target.nid = lp->lp_nid;
msg->msg_target.pid = LNET_PID_LUSTRE;
+ /*
+ * since we're routing we want to ensure that the
+ * msg_hdr.dest_nid is set to the final destination. When
+ * the router receives this message it knows how to route
+ * it.
+ */
+ msg->msg_hdr.dest_nid =
+ cpu_to_le64(final_dst ? final_dst->lpni_nid : dst_nid);
+ } else {
+ /*
+ * if we're not routing set the dest_nid to the best peer
+ * ni that we picked earlier in the algorithm.
+ */
+ msg->msg_hdr.dest_nid = cpu_to_le64(msg->msg_txpeer->lpni_nid);
}
- /* 'lp' is our best choice of peer */
+ rc = lnet_post_send_locked(msg, 0);
- LASSERT(!msg->msg_peertxcredit);
- LASSERT(!msg->msg_txcredit);
- LASSERT(msg->msg_txpeer == NULL);
+ lnet_net_unlock(cpt);
- msg->msg_txpeer = lp; /* msg takes my ref on lp */
+ return rc;
+}
- rc = lnet_post_send_locked(msg, 0);
- lnet_net_unlock(cpt);
+int
+lnet_send(lnet_nid_t src_nid, lnet_msg_t *msg, lnet_nid_t rtr_nid)
+{
+ lnet_nid_t dst_nid = msg->msg_target.nid;
+ int rc;
+ /*
+ * NB: rtr_nid is set to LNET_NID_ANY for all current use-cases,
+ * but we might want to use pre-determined router for ACK/REPLY
+ * in the future
+ */
+ /* NB: ni != NULL == interface pre-determined (ACK/REPLY) */
+ LASSERT (msg->msg_txpeer == NULL);
+ LASSERT (!msg->msg_sending);
+ LASSERT (!msg->msg_target_is_router);
+ LASSERT (!msg->msg_receiving);
+
+ msg->msg_sending = 1;
+
+ LASSERT(!msg->msg_tx_committed);
+
+ rc = lnet_select_pathway(src_nid, dst_nid, msg, rtr_nid);
if (rc < 0)
return rc;
if (rc == LNET_CREDIT_OK)
- lnet_ni_send(src_ni, msg);
+ lnet_ni_send(msg->msg_txni, msg);
- return 0; /* rc == LNET_CREDIT_OK or LNET_CREDIT_WAIT */
+ /* rc == LNET_CREDIT_OK or LNET_CREDIT_WAIT */
+ return 0;
}
void
hdr->msg.put.ptl_index = le32_to_cpu(hdr->msg.put.ptl_index);
hdr->msg.put.offset = le32_to_cpu(hdr->msg.put.offset);
- info.mi_id.nid = hdr->src_nid;
+ /* Primary peer NID. */
+ info.mi_id.nid = msg->msg_initiator;
info.mi_id.pid = hdr->src_pid;
info.mi_opc = LNET_MD_OP_PUT;
info.mi_portal = hdr->msg.put.ptl_index;
info.mi_rlength = hdr->payload_length;
info.mi_roffset = hdr->msg.put.offset;
info.mi_mbits = hdr->msg.put.match_bits;
+ info.mi_cpt = lnet_cpt_of_nid(msg->msg_rxpeer->lpni_nid, ni);
- msg->msg_rx_ready_delay = ni->ni_lnd->lnd_eager_recv == NULL;
+ msg->msg_rx_ready_delay = ni->ni_net->net_lnd->lnd_eager_recv == NULL;
ready_delay = msg->msg_rx_ready_delay;
again:
{
struct lnet_match_info info;
lnet_hdr_t *hdr = &msg->msg_hdr;
+ lnet_process_id_t source_id;
struct lnet_handle_wire reply_wmd;
int rc;
hdr->msg.get.sink_length = le32_to_cpu(hdr->msg.get.sink_length);
hdr->msg.get.src_offset = le32_to_cpu(hdr->msg.get.src_offset);
- info.mi_id.nid = hdr->src_nid;
+ source_id.nid = hdr->src_nid;
+ source_id.pid = hdr->src_pid;
+ /* Primary peer NID */
+ info.mi_id.nid = msg->msg_initiator;
info.mi_id.pid = hdr->src_pid;
info.mi_opc = LNET_MD_OP_GET;
info.mi_portal = hdr->msg.get.ptl_index;
info.mi_rlength = hdr->msg.get.sink_length;
info.mi_roffset = hdr->msg.get.src_offset;
info.mi_mbits = hdr->msg.get.match_bits;
+ info.mi_cpt = lnet_cpt_of_nid(msg->msg_rxpeer->lpni_nid, ni);
rc = lnet_ptl_match_md(&info, msg);
if (rc == LNET_MATCHMD_DROP) {
reply_wmd = hdr->msg.get.return_wmd;
- lnet_prep_send(msg, LNET_MSG_REPLY, info.mi_id,
+ lnet_prep_send(msg, LNET_MSG_REPLY, source_id,
msg->msg_offset, msg->msg_wanted);
msg->msg_hdr.msg.reply.dst_wmd = reply_wmd;
if (!the_lnet.ln_routing)
return -ECANCELED;
- if (msg->msg_rxpeer->lp_rtrcredits <= 0 ||
+ if (msg->msg_rxpeer->lpni_rtrcredits <= 0 ||
lnet_msg2bufpool(msg)->rbp_credits <= 0) {
- if (ni->ni_lnd->lnd_eager_recv == NULL) {
+ if (ni->ni_net->net_lnd->lnd_eager_recv == NULL) {
msg->msg_rx_ready_delay = 1;
} else {
lnet_net_unlock(msg->msg_rx_cpt);
lnet_pid_t dest_pid;
lnet_nid_t dest_nid;
lnet_nid_t src_nid;
- __u32 payload_length;
- __u32 type;
+ struct lnet_peer_ni *lpni;
+ __u32 payload_length;
+ __u32 type;
LASSERT (!in_interrupt ());
payload_length = le32_to_cpu(hdr->payload_length);
for_me = (ni->ni_nid == dest_nid);
- cpt = lnet_cpt_of_nid(from_nid);
+ cpt = lnet_cpt_of_nid(from_nid, ni);
switch (type) {
case LNET_MSG_ACK:
msg->msg_hdr.dest_pid = dest_pid;
msg->msg_hdr.payload_length = payload_length;
}
+ /* Multi-Rail: Primary NID of source. */
+ msg->msg_initiator = lnet_peer_primary_nid(src_nid);
lnet_net_lock(cpt);
- rc = lnet_nid2peer_locked(&msg->msg_rxpeer, from_nid, cpt);
- if (rc != 0) {
+ lpni = lnet_nid2peerni_locked(from_nid, cpt);
+ if (IS_ERR(lpni)) {
lnet_net_unlock(cpt);
CERROR("%s, src %s: Dropping %s "
- "(error %d looking up sender)\n",
+ "(error %ld looking up sender)\n",
libcfs_nid2str(from_nid), libcfs_nid2str(src_nid),
- lnet_msgtyp2str(type), rc);
+ lnet_msgtyp2str(type), PTR_ERR(lpni));
lnet_msg_free(msg);
if (rc == -ESHUTDOWN)
/* We are shutting down. Don't do anything more */
return 0;
goto drop;
}
+ msg->msg_rxpeer = lpni;
+ msg->msg_rxni = ni;
+ lnet_ni_addref_locked(ni, cpt);
if (lnet_isrouter(msg->msg_rxpeer)) {
lnet_peer_set_alive(msg->msg_rxpeer);
* called lnet_drop_message(), so I just hang onto msg as well
* until that's done */
- lnet_drop_message(msg->msg_rxpeer->lp_ni,
- msg->msg_rxpeer->lp_cpt,
+ lnet_drop_message(msg->msg_rxni, msg->msg_rx_cpt,
msg->msg_private, msg->msg_len);
/*
* NB: message will not generate event because w/o attached MD,
* but we still should give error code so lnet_msg_decommit()
* can skip counters operations and other checks.
*/
- lnet_finalize(msg->msg_rxpeer->lp_ni, msg, -ENOENT);
+ lnet_finalize(msg->msg_rxni, msg, -ENOENT);
}
}
LASSERT(msg->msg_rx_delayed);
LASSERT(msg->msg_md != NULL);
LASSERT(msg->msg_rxpeer != NULL);
+ LASSERT(msg->msg_rxni != NULL);
LASSERT(msg->msg_hdr.type == LNET_MSG_PUT);
CDEBUG(D_NET, "Resuming delayed PUT from %s portal %d "
msg->msg_hdr.msg.put.offset,
msg->msg_hdr.payload_length);
- lnet_recv_put(msg->msg_rxpeer->lp_ni, msg);
+ lnet_recv_put(msg->msg_rxni, msg);
}
}
libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id), getmd);
/* setup information for lnet_build_msg_event */
+ msg->msg_initiator = lnet_peer_primary_nid(peer_id.nid);
+ /* Cheaper: msg->msg_initiator = getmsg->msg_txpeer->lp_nid; */
msg->msg_from = peer_id.nid;
msg->msg_type = LNET_MSG_GET; /* flag this msg as an "optimized" GET */
msg->msg_hdr.src_nid = peer_id.nid;
lnet_msg_attach_md(msg, getmd, getmd->md_offset, getmd->md_length);
lnet_res_unlock(cpt);
- cpt = lnet_cpt_of_nid(peer_id.nid);
+ cpt = lnet_cpt_of_nid(peer_id.nid, ni);
lnet_net_lock(cpt);
lnet_msg_commit(msg, cpt);
return msg;
drop:
- cpt = lnet_cpt_of_nid(peer_id.nid);
+ cpt = lnet_cpt_of_nid(peer_id.nid, ni);
lnet_net_lock(cpt);
the_lnet.ln_counters[cpt]->drop_count++;
LNetDist(lnet_nid_t dstnid, lnet_nid_t *srcnidp, __u32 *orderp)
{
struct list_head *e;
- struct lnet_ni *ni;
+ struct lnet_ni *ni = NULL;
lnet_remotenet_t *rnet;
__u32 dstnet = LNET_NIDNET(dstnid);
int hops;
cpt = lnet_net_lock_current();
- list_for_each(e, &the_lnet.ln_nis) {
- ni = list_entry(e, lnet_ni_t, ni_list);
-
+ while ((ni = lnet_get_next_ni_locked(NULL, ni))) {
if (ni->ni_nid == dstnid) {
if (srcnidp != NULL)
*srcnidp = dstnid;
LASSERT(shortest != NULL);
hops = shortest_hops;
- if (srcnidp != NULL)
- *srcnidp = shortest->lr_gateway->lp_ni->ni_nid;
+ if (srcnidp != NULL) {
+ ni = lnet_get_next_ni_locked(
+ shortest->lr_gateway->lpni_net,
+ NULL);
+ *srcnidp = ni->ni_nid;
+ }
if (orderp != NULL)
*orderp = order;
lnet_net_unlock(cpt);
ev->target.pid = le32_to_cpu(hdr->dest_pid);
ev->initiator.nid = LNET_NID_ANY;
ev->initiator.pid = the_lnet.ln_pid;
+ ev->source.nid = LNET_NID_ANY;
+ ev->source.pid = the_lnet.ln_pid;
ev->sender = LNET_NID_ANY;
} else {
ev->target.pid = hdr->dest_pid;
ev->target.nid = hdr->dest_nid;
ev->initiator.pid = hdr->src_pid;
- ev->initiator.nid = hdr->src_nid;
- ev->rlength = hdr->payload_length;
+ /* Multi-Rail: resolve src_nid to "primary" peer NID */
+ ev->initiator.nid = msg->msg_initiator;
+ /* Multi-Rail: track source NID. */
+ ev->source.pid = hdr->src_pid;
+ ev->source.nid = hdr->src_nid;
+ ev->rlength = hdr->payload_length;
ev->sender = msg->msg_from;
ev->mlength = msg->msg_wanted;
ev->offset = msg->msg_offset;
}
counters->send_count++;
+ if (msg->msg_txpeer)
+ atomic_inc(&msg->msg_txpeer->lpni_stats.send_count);
+ if (msg->msg_txni)
+ atomic_inc(&msg->msg_txni->ni_stats.send_count);
out:
lnet_return_tx_credits_locked(msg);
msg->msg_tx_committed = 0;
}
counters->recv_count++;
+ if (msg->msg_rxpeer)
+ atomic_inc(&msg->msg_rxpeer->lpni_stats.recv_count);
+ if (msg->msg_rxni)
+ atomic_inc(&msg->msg_rxni->ni_stats.recv_count);
if (ev->type == LNET_EVENT_PUT || ev->type == LNET_EVENT_REPLY)
counters->recv_length += msg->msg_wanted;
ack_wmd = msg->msg_hdr.msg.put.ack_wmd;
- lnet_prep_send(msg, LNET_MSG_ACK, msg->msg_ev.initiator, 0, 0);
+ lnet_prep_send(msg, LNET_MSG_ACK, msg->msg_ev.source, 0, 0);
msg->msg_hdr.msg.ack.dst_wmd = ack_wmd;
msg->msg_hdr.msg.ack.match_bits = msg->msg_ev.match_bits;
/* if it's a unique portal, return match-table hashed by NID */
return lnet_ptl_is_unique(ptl) ?
- ptl->ptl_mtables[lnet_cpt_of_nid(id.nid)] : NULL;
+ ptl->ptl_mtables[lnet_cpt_of_nid(id.nid, NULL)] : NULL;
}
struct lnet_match_table *
rotor = ptl->ptl_rotor++; /* get round-robin factor */
if (portal_rotor == LNET_PTL_ROTOR_HASH_RT && routed)
- cpt = lnet_cpt_of_nid(msg->msg_hdr.src_nid);
+ cpt = info->mi_cpt;
else
cpt = rotor % LNET_CPT_NUMBER;
LASSERT(msg->msg_rx_delayed || head == &ptl->ptl_msg_stealing);
hdr = &msg->msg_hdr;
- info.mi_id.nid = hdr->src_nid;
+ /* Multi-Rail: Primary peer NID */
+ info.mi_id.nid = msg->msg_initiator;
info.mi_id.pid = hdr->src_pid;
info.mi_opc = LNET_MD_OP_PUT;
info.mi_portal = hdr->msg.put.ptl_index;
/* grab all messages which are on the NI passed in */
list_for_each_entry_safe(msg, tmp, &ptl->ptl_msg_delayed,
msg_list) {
- if (msg->msg_rxpeer->lp_ni == ni)
+ if (msg->msg_txni == ni || msg->msg_rxni == ni)
list_move(&msg->msg_list, &zombies);
}
} else {
static int
lolnd_startup (lnet_ni_t *ni)
{
- LASSERT (ni->ni_lnd == &the_lolnd);
+ LASSERT (ni->ni_net->net_lnd == &the_lolnd);
LASSERT (!lolnd_instanced);
lolnd_instanced = 1;
}
static int
-lnet_dyn_configure(struct libcfs_ioctl_hdr *hdr)
+lnet_dyn_configure_net(struct libcfs_ioctl_hdr *hdr)
{
struct lnet_ioctl_config_data *conf =
(struct lnet_ioctl_config_data *)hdr;
return -EINVAL;
mutex_lock(&lnet_config_mutex);
- if (!the_lnet.ln_niinit_self) {
+ if (the_lnet.ln_niinit_self)
+ rc = lnet_dyn_add_net(conf);
+ else
rc = -EINVAL;
- goto out_unlock;
- }
- rc = lnet_dyn_add_ni(LNET_PID_LUSTRE, conf);
-out_unlock:
mutex_unlock(&lnet_config_mutex);
return rc;
}
static int
-lnet_dyn_unconfigure(struct libcfs_ioctl_hdr *hdr)
+lnet_dyn_unconfigure_net(struct libcfs_ioctl_hdr *hdr)
{
struct lnet_ioctl_config_data *conf =
(struct lnet_ioctl_config_data *) hdr;
return -EINVAL;
mutex_lock(&lnet_config_mutex);
- if (!the_lnet.ln_niinit_self) {
+ if (the_lnet.ln_niinit_self)
+ rc = lnet_dyn_del_net(conf->cfg_net);
+ else
+ rc = -EINVAL;
+ mutex_unlock(&lnet_config_mutex);
+
+ return rc;
+}
+
+static int
+lnet_dyn_configure_ni(struct libcfs_ioctl_hdr *hdr)
+{
+ struct lnet_ioctl_config_ni *conf =
+ (struct lnet_ioctl_config_ni *)hdr;
+ int rc;
+
+ if (conf->lic_cfg_hdr.ioc_len < sizeof(*conf))
+ return -EINVAL;
+
+ mutex_lock(&lnet_config_mutex);
+ if (the_lnet.ln_niinit_self)
+ rc = lnet_dyn_add_ni(conf);
+ else
+ rc = -EINVAL;
+ mutex_unlock(&lnet_config_mutex);
+
+ return rc;
+}
+
+static int
+lnet_dyn_unconfigure_ni(struct libcfs_ioctl_hdr *hdr)
+{
+ struct lnet_ioctl_config_ni *conf =
+ (struct lnet_ioctl_config_ni *) hdr;
+ int rc;
+
+ if (conf->lic_cfg_hdr.ioc_len < sizeof(*conf))
+ return -EINVAL;
+
+ mutex_lock(&lnet_config_mutex);
+ if (the_lnet.ln_niinit_self)
+ rc = lnet_dyn_del_ni(conf);
+ else
rc = -EINVAL;
- goto out_unlock;
- }
- rc = lnet_dyn_del_ni(conf->cfg_net);
-out_unlock:
mutex_unlock(&lnet_config_mutex);
return rc;
return lnet_unconfigure();
case IOC_LIBCFS_ADD_NET:
- return lnet_dyn_configure(hdr);
+ return lnet_dyn_configure_net(hdr);
case IOC_LIBCFS_DEL_NET:
- return lnet_dyn_unconfigure(hdr);
+ return lnet_dyn_unconfigure_net(hdr);
+
+ case IOC_LIBCFS_ADD_LOCAL_NI:
+ return lnet_dyn_configure_ni(hdr);
+
+ case IOC_LIBCFS_DEL_LOCAL_NI:
+ return lnet_dyn_unconfigure_ni(hdr);
default:
/* Passing LNET_PID_ANY only gives me a ref if the net is up
msg = list_entry(msg_list->next, struct lnet_msg, msg_list);
LASSERT(msg->msg_rxpeer != NULL);
+ LASSERT(msg->msg_rxni != NULL);
- ni = msg->msg_rxpeer->lp_ni;
+ ni = msg->msg_rxni;
cpt = msg->msg_rx_cpt;
list_del_init(&msg->msg_list);
#include <lnet/lib-lnet.h>
#include <lnet/lib-dlc.h>
+static void
+lnet_peer_remove_from_remote_list(struct lnet_peer_ni *lpni)
+{
+ if (!list_empty(&lpni->lpni_on_remote_peer_ni_list)) {
+ list_del_init(&lpni->lpni_on_remote_peer_ni_list);
+ lnet_peer_ni_decref_locked(lpni);
+ }
+}
+
+void
+lnet_peer_net_added(struct lnet_net *net)
+{
+ struct lnet_peer_ni *lpni, *tmp;
+
+ list_for_each_entry_safe(lpni, tmp, &the_lnet.ln_remote_peer_ni_list,
+ lpni_on_remote_peer_ni_list) {
+
+ if (LNET_NIDNET(lpni->lpni_nid) == net->net_id) {
+ lpni->lpni_net = net;
+
+ spin_lock(&lpni->lpni_lock);
+ lpni->lpni_txcredits =
+ lpni->lpni_net->net_tunables.lct_peer_tx_credits;
+ lpni->lpni_mintxcredits = lpni->lpni_txcredits;
+ lpni->lpni_rtrcredits =
+ lnet_peer_buffer_credits(lpni->lpni_net);
+ lpni->lpni_minrtrcredits = lpni->lpni_rtrcredits;
+ spin_unlock(&lpni->lpni_lock);
+
+ lnet_peer_remove_from_remote_list(lpni);
+ }
+ }
+}
+
+static void
+lnet_peer_tables_destroy(void)
+{
+ struct lnet_peer_table *ptable;
+ struct list_head *hash;
+ int i;
+ int j;
+
+ if (!the_lnet.ln_peer_tables)
+ return;
+
+ cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
+ hash = ptable->pt_hash;
+ if (!hash) /* not intialized */
+ break;
+
+ LASSERT(list_empty(&ptable->pt_zombie_list));
+
+ ptable->pt_hash = NULL;
+ for (j = 0; j < LNET_PEER_HASH_SIZE; j++)
+ LASSERT(list_empty(&hash[j]));
+
+ LIBCFS_FREE(hash, LNET_PEER_HASH_SIZE * sizeof(*hash));
+ }
+
+ cfs_percpt_free(the_lnet.ln_peer_tables);
+ the_lnet.ln_peer_tables = NULL;
+}
+
int
lnet_peer_tables_create(void)
{
}
cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
- INIT_LIST_HEAD(&ptable->pt_deathrow);
-
LIBCFS_CPT_ALLOC(hash, lnet_cpt_table(), i,
LNET_PEER_HASH_SIZE * sizeof(*hash));
if (hash == NULL) {
return -ENOMEM;
}
+ spin_lock_init(&ptable->pt_zombie_lock);
+ INIT_LIST_HEAD(&ptable->pt_zombie_list);
+
for (j = 0; j < LNET_PEER_HASH_SIZE; j++)
INIT_LIST_HEAD(&hash[j]);
ptable->pt_hash = hash; /* sign of initialization */
return 0;
}
-void
-lnet_peer_tables_destroy(void)
+static struct lnet_peer_ni *
+lnet_peer_ni_alloc(lnet_nid_t nid)
{
- struct lnet_peer_table *ptable;
- struct list_head *hash;
- int i;
- int j;
+ struct lnet_peer_ni *lpni;
+ struct lnet_net *net;
+ int cpt;
+
+ cpt = lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
+
+ LIBCFS_CPT_ALLOC(lpni, lnet_cpt_table(), cpt, sizeof(*lpni));
+ if (!lpni)
+ return NULL;
+
+ INIT_LIST_HEAD(&lpni->lpni_txq);
+ INIT_LIST_HEAD(&lpni->lpni_rtrq);
+ INIT_LIST_HEAD(&lpni->lpni_routes);
+ INIT_LIST_HEAD(&lpni->lpni_hashlist);
+ INIT_LIST_HEAD(&lpni->lpni_on_peer_net_list);
+ INIT_LIST_HEAD(&lpni->lpni_on_remote_peer_ni_list);
+
+ spin_lock_init(&lpni->lpni_lock);
+
+ lpni->lpni_alive = !lnet_peers_start_down(); /* 1 bit!! */
+ lpni->lpni_last_alive = cfs_time_current(); /* assumes alive */
+ lpni->lpni_ping_feats = LNET_PING_FEAT_INVAL;
+ lpni->lpni_nid = nid;
+ lpni->lpni_cpt = cpt;
+ lnet_set_peer_ni_health_locked(lpni, true);
+
+ net = lnet_get_net_locked(LNET_NIDNET(nid));
+ lpni->lpni_net = net;
+ if (net) {
+ lpni->lpni_txcredits = net->net_tunables.lct_peer_tx_credits;
+ lpni->lpni_mintxcredits = lpni->lpni_txcredits;
+ lpni->lpni_rtrcredits = lnet_peer_buffer_credits(net);
+ lpni->lpni_minrtrcredits = lpni->lpni_rtrcredits;
+ } else {
+ /*
+ * This peer_ni is not on a local network, so we
+ * cannot add the credits here. In case the net is
+ * added later, add the peer_ni to the remote peer ni
+ * list so it can be easily found and revisited.
+ */
+ /* FIXME: per-net implementation instead? */
+ atomic_inc(&lpni->lpni_refcount);
+ list_add_tail(&lpni->lpni_on_remote_peer_ni_list,
+ &the_lnet.ln_remote_peer_ni_list);
+ }
+
+ /* TODO: update flags */
+
+ return lpni;
+}
+
+static struct lnet_peer_net *
+lnet_peer_net_alloc(__u32 net_id)
+{
+ struct lnet_peer_net *lpn;
+
+ LIBCFS_CPT_ALLOC(lpn, lnet_cpt_table(), CFS_CPT_ANY, sizeof(*lpn));
+ if (!lpn)
+ return NULL;
- if (the_lnet.ln_peer_tables == NULL)
+ INIT_LIST_HEAD(&lpn->lpn_on_peer_list);
+ INIT_LIST_HEAD(&lpn->lpn_peer_nis);
+ lpn->lpn_net_id = net_id;
+
+ return lpn;
+}
+
+static struct lnet_peer *
+lnet_peer_alloc(lnet_nid_t nid)
+{
+ struct lnet_peer *lp;
+
+ LIBCFS_CPT_ALLOC(lp, lnet_cpt_table(), CFS_CPT_ANY, sizeof(*lp));
+ if (!lp)
+ return NULL;
+
+ INIT_LIST_HEAD(&lp->lp_on_lnet_peer_list);
+ INIT_LIST_HEAD(&lp->lp_peer_nets);
+ lp->lp_primary_nid = nid;
+
+ /* TODO: update flags */
+
+ return lp;
+}
+
+
+static void
+lnet_try_destroy_peer_hierarchy_locked(struct lnet_peer_ni *lpni)
+{
+ struct lnet_peer_net *peer_net;
+ struct lnet_peer *peer;
+
+ /* TODO: could the below situation happen? accessing an already
+ * destroyed peer? */
+ if (lpni->lpni_peer_net == NULL ||
+ lpni->lpni_peer_net->lpn_peer == NULL)
return;
- cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
- hash = ptable->pt_hash;
- if (hash == NULL) /* not intialized */
- break;
+ peer_net = lpni->lpni_peer_net;
+ peer = lpni->lpni_peer_net->lpn_peer;
- LASSERT(list_empty(&ptable->pt_deathrow));
+ list_del_init(&lpni->lpni_on_peer_net_list);
+ lpni->lpni_peer_net = NULL;
- ptable->pt_hash = NULL;
- for (j = 0; j < LNET_PEER_HASH_SIZE; j++)
- LASSERT(list_empty(&hash[j]));
+ /* if peer_net is empty, then remove it from the peer */
+ if (list_empty(&peer_net->lpn_peer_nis)) {
+ list_del_init(&peer_net->lpn_on_peer_list);
+ peer_net->lpn_peer = NULL;
+ LIBCFS_FREE(peer_net, sizeof(*peer_net));
- LIBCFS_FREE(hash, LNET_PEER_HASH_SIZE * sizeof(*hash));
+ /* if the peer is empty then remove it from the
+ * the_lnet.ln_peers */
+ if (list_empty(&peer->lp_peer_nets)) {
+ list_del_init(&peer->lp_on_lnet_peer_list);
+ LIBCFS_FREE(peer, sizeof(*peer));
+ }
}
+}
- cfs_percpt_free(the_lnet.ln_peer_tables);
- the_lnet.ln_peer_tables = NULL;
+/* called with lnet_net_lock LNET_LOCK_EX held */
+static int
+lnet_peer_ni_del_locked(struct lnet_peer_ni *lpni)
+{
+ struct lnet_peer_table *ptable = NULL;
+
+ /* don't remove a peer_ni if it's also a gateway */
+ if (lpni->lpni_rtr_refcount > 0) {
+ CERROR("Peer NI %s is a gateway. Can not delete it\n",
+ libcfs_nid2str(lpni->lpni_nid));
+ return -EBUSY;
+ }
+
+ lnet_peer_remove_from_remote_list(lpni);
+
+ /* remove peer ni from the hash list. */
+ list_del_init(&lpni->lpni_hashlist);
+
+ /* decrement the ref count on the peer table */
+ ptable = the_lnet.ln_peer_tables[lpni->lpni_cpt];
+ LASSERT(atomic_read(&ptable->pt_number) > 0);
+ atomic_dec(&ptable->pt_number);
+
+ /*
+ * The peer_ni can no longer be found with a lookup. But there
+ * can be current users, so keep track of it on the zombie
+ * list until the reference count has gone to zero.
+ *
+ * The last reference may be lost in a place where the
+ * lnet_net_lock locks only a single cpt, and that cpt may not
+ * be lpni->lpni_cpt. So the zombie list of this peer_table
+ * has its own lock.
+ */
+ spin_lock(&ptable->pt_zombie_lock);
+ list_add(&lpni->lpni_hashlist, &ptable->pt_zombie_list);
+ ptable->pt_zombies++;
+ spin_unlock(&ptable->pt_zombie_lock);
+
+ /* no need to keep this peer on the hierarchy anymore */
+ lnet_try_destroy_peer_hierarchy_locked(lpni);
+
+ /* decrement reference on peer */
+ lnet_peer_ni_decref_locked(lpni);
+
+ return 0;
+}
+
+void lnet_peer_uninit(void)
+{
+ struct lnet_peer_ni *lpni, *tmp;
+
+ lnet_net_lock(LNET_LOCK_EX);
+
+ /* remove all peer_nis from the remote peer and the hash list */
+ list_for_each_entry_safe(lpni, tmp, &the_lnet.ln_remote_peer_ni_list,
+ lpni_on_remote_peer_ni_list)
+ lnet_peer_ni_del_locked(lpni);
+
+ lnet_peer_tables_destroy();
+
+ lnet_net_unlock(LNET_LOCK_EX);
+}
+
+static int
+lnet_peer_del_locked(struct lnet_peer *peer)
+{
+ struct lnet_peer_ni *lpni = NULL, *lpni2;
+ int rc = 0, rc2 = 0;
+
+ lpni = lnet_get_next_peer_ni_locked(peer, NULL, lpni);
+ while (lpni != NULL) {
+ lpni2 = lnet_get_next_peer_ni_locked(peer, NULL, lpni);
+ rc = lnet_peer_ni_del_locked(lpni);
+ if (rc != 0)
+ rc2 = rc;
+ lpni = lpni2;
+ }
+
+ return rc2;
}
static void
-lnet_peer_table_cleanup_locked(lnet_ni_t *ni, struct lnet_peer_table *ptable)
+lnet_peer_table_cleanup_locked(struct lnet_net *net,
+ struct lnet_peer_table *ptable)
{
- int i;
- lnet_peer_t *lp;
- lnet_peer_t *tmp;
+ int i;
+ struct lnet_peer_ni *next;
+ struct lnet_peer_ni *lpni;
+ struct lnet_peer *peer;
for (i = 0; i < LNET_PEER_HASH_SIZE; i++) {
- list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i],
- lp_hashlist) {
- if (ni != NULL && ni != lp->lp_ni)
+ list_for_each_entry_safe(lpni, next, &ptable->pt_hash[i],
+ lpni_hashlist) {
+ if (net != NULL && net != lpni->lpni_net)
continue;
- list_del_init(&lp->lp_hashlist);
- /* Lose hash table's ref */
- ptable->pt_zombies++;
- lnet_peer_decref_locked(lp);
+
+ peer = lpni->lpni_peer_net->lpn_peer;
+ if (peer->lp_primary_nid != lpni->lpni_nid) {
+ lnet_peer_ni_del_locked(lpni);
+ continue;
+ }
+ /*
+ * Removing the primary NID implies removing
+ * the entire peer. Advance next beyond any
+ * peer_ni that belongs to the same peer.
+ */
+ list_for_each_entry_from(next, &ptable->pt_hash[i],
+ lpni_hashlist) {
+ if (next->lpni_peer_net->lpn_peer != peer)
+ break;
+ }
+ lnet_peer_del_locked(peer);
}
}
}
static void
-lnet_peer_table_deathrow_wait_locked(struct lnet_peer_table *ptable,
- int cpt_locked)
+lnet_peer_ni_finalize_wait(struct lnet_peer_table *ptable)
{
- int i;
+ int i = 3;
- for (i = 3; ptable->pt_zombies != 0; i++) {
- lnet_net_unlock(cpt_locked);
+ spin_lock(&ptable->pt_zombie_lock);
+ while (ptable->pt_zombies) {
+ spin_unlock(&ptable->pt_zombie_lock);
if (IS_PO2(i)) {
CDEBUG(D_WARNING,
}
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(cfs_time_seconds(1) >> 1);
- lnet_net_lock(cpt_locked);
+ spin_lock(&ptable->pt_zombie_lock);
}
+ spin_unlock(&ptable->pt_zombie_lock);
}
static void
-lnet_peer_table_del_rtrs_locked(lnet_ni_t *ni, struct lnet_peer_table *ptable,
- int cpt_locked)
+lnet_peer_table_del_rtrs_locked(struct lnet_net *net,
+ struct lnet_peer_table *ptable)
{
- lnet_peer_t *lp;
- lnet_peer_t *tmp;
- lnet_nid_t lp_nid;
- int i;
+ struct lnet_peer_ni *lp;
+ struct lnet_peer_ni *tmp;
+ lnet_nid_t lpni_nid;
+ int i;
for (i = 0; i < LNET_PEER_HASH_SIZE; i++) {
list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i],
- lp_hashlist) {
- if (ni != lp->lp_ni)
+ lpni_hashlist) {
+ if (net != lp->lpni_net)
continue;
- if (lp->lp_rtr_refcount == 0)
+ if (lp->lpni_rtr_refcount == 0)
continue;
- lp_nid = lp->lp_nid;
+ lpni_nid = lp->lpni_nid;
- lnet_net_unlock(cpt_locked);
- lnet_del_route(LNET_NIDNET(LNET_NID_ANY), lp_nid);
- lnet_net_lock(cpt_locked);
+ lnet_net_unlock(LNET_LOCK_EX);
+ lnet_del_route(LNET_NIDNET(LNET_NID_ANY), lpni_nid);
+ lnet_net_lock(LNET_LOCK_EX);
}
}
}
void
-lnet_peer_tables_cleanup(lnet_ni_t *ni)
+lnet_peer_tables_cleanup(struct lnet_net *net)
{
- int i;
- struct lnet_peer_table *ptable;
- lnet_peer_t *lp;
- struct list_head deathrow;
-
- INIT_LIST_HEAD(&deathrow);
+ int i;
+ struct lnet_peer_table *ptable;
- LASSERT(the_lnet.ln_shutdown || ni != NULL);
+ LASSERT(the_lnet.ln_shutdown || net != NULL);
/* If just deleting the peers for a NI, get rid of any routes these
* peers are gateways for. */
cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
- lnet_net_lock(i);
- lnet_peer_table_del_rtrs_locked(ni, ptable, i);
- lnet_net_unlock(i);
+ lnet_net_lock(LNET_LOCK_EX);
+ lnet_peer_table_del_rtrs_locked(net, ptable);
+ lnet_net_unlock(LNET_LOCK_EX);
}
- /* Start the process of moving the applicable peers to
- * deathrow. */
+ /* Start the cleanup process */
cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
- lnet_net_lock(i);
- lnet_peer_table_cleanup_locked(ni, ptable);
- lnet_net_unlock(i);
+ lnet_net_lock(LNET_LOCK_EX);
+ lnet_peer_table_cleanup_locked(net, ptable);
+ lnet_net_unlock(LNET_LOCK_EX);
}
- /* Cleanup all entries on deathrow. */
- cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
- lnet_net_lock(i);
- lnet_peer_table_deathrow_wait_locked(ptable, i);
- list_splice_init(&ptable->pt_deathrow, &deathrow);
- lnet_net_unlock(i);
- }
+ cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables)
+ lnet_peer_ni_finalize_wait(ptable);
+}
- while (!list_empty(&deathrow)) {
- lp = list_entry(deathrow.next, lnet_peer_t, lp_hashlist);
- list_del(&lp->lp_hashlist);
- LIBCFS_FREE(lp, sizeof(*lp));
+static struct lnet_peer_ni *
+lnet_get_peer_ni_locked(struct lnet_peer_table *ptable, lnet_nid_t nid)
+{
+ struct list_head *peers;
+ struct lnet_peer_ni *lp;
+
+ LASSERT(!the_lnet.ln_shutdown);
+
+ peers = &ptable->pt_hash[lnet_nid2peerhash(nid)];
+ list_for_each_entry(lp, peers, lpni_hashlist) {
+ if (lp->lpni_nid == nid) {
+ lnet_peer_ni_addref_locked(lp);
+ return lp;
+ }
}
+
+ return NULL;
}
-void
-lnet_destroy_peer_locked(lnet_peer_t *lp)
+struct lnet_peer_ni *
+lnet_find_peer_ni_locked(lnet_nid_t nid)
{
+ struct lnet_peer_ni *lpni;
struct lnet_peer_table *ptable;
+ int cpt;
- LASSERT(lp->lp_refcount == 0);
- LASSERT(lp->lp_rtr_refcount == 0);
- LASSERT(list_empty(&lp->lp_txq));
- LASSERT(list_empty(&lp->lp_hashlist));
- LASSERT(lp->lp_txqnob == 0);
-
- ptable = the_lnet.ln_peer_tables[lp->lp_cpt];
- LASSERT(ptable->pt_number > 0);
- ptable->pt_number--;
+ cpt = lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
- lnet_ni_decref_locked(lp->lp_ni, lp->lp_cpt);
- lp->lp_ni = NULL;
+ ptable = the_lnet.ln_peer_tables[cpt];
+ lpni = lnet_get_peer_ni_locked(ptable, nid);
- list_add(&lp->lp_hashlist, &ptable->pt_deathrow);
- LASSERT(ptable->pt_zombies > 0);
- ptable->pt_zombies--;
+ return lpni;
}
-lnet_peer_t *
-lnet_find_peer_locked(struct lnet_peer_table *ptable, lnet_nid_t nid)
+struct lnet_peer *
+lnet_find_or_create_peer_locked(lnet_nid_t dst_nid, int cpt)
{
- struct list_head *peers;
- lnet_peer_t *lp;
+ struct lnet_peer_ni *lpni;
+ struct lnet_peer *lp;
+
+ lpni = lnet_find_peer_ni_locked(dst_nid);
+ if (!lpni) {
+ lpni = lnet_nid2peerni_locked(dst_nid, cpt);
+ if (IS_ERR(lpni))
+ return ERR_CAST(lpni);
+ }
- LASSERT(!the_lnet.ln_shutdown);
+ lp = lpni->lpni_peer_net->lpn_peer;
+ lnet_peer_ni_decref_locked(lpni);
- peers = &ptable->pt_hash[lnet_nid2peerhash(nid)];
- list_for_each_entry(lp, peers, lp_hashlist) {
- if (lp->lp_nid == nid) {
- lnet_peer_addref_locked(lp);
- return lp;
+ return lp;
+}
+
+struct lnet_peer_ni *
+lnet_get_peer_ni_idx_locked(int idx, struct lnet_peer_net **lpn,
+ struct lnet_peer **lp)
+{
+ struct lnet_peer_ni *lpni;
+
+ list_for_each_entry((*lp), &the_lnet.ln_peers, lp_on_lnet_peer_list) {
+ list_for_each_entry((*lpn), &((*lp)->lp_peer_nets), lpn_on_peer_list) {
+ list_for_each_entry(lpni, &((*lpn)->lpn_peer_nis),
+ lpni_on_peer_net_list)
+ if (idx-- == 0)
+ return lpni;
}
}
return NULL;
}
-int
-lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid, int cpt)
+struct lnet_peer_ni *
+lnet_get_next_peer_ni_locked(struct lnet_peer *peer,
+ struct lnet_peer_net *peer_net,
+ struct lnet_peer_ni *prev)
{
- struct lnet_peer_table *ptable;
- lnet_peer_t *lp = NULL;
- lnet_peer_t *lp2;
- int cpt2;
- int rc = 0;
+ struct lnet_peer_ni *lpni;
+ struct lnet_peer_net *net = peer_net;
+
+ if (!prev) {
+ if (!net)
+ net = list_entry(peer->lp_peer_nets.next,
+ struct lnet_peer_net,
+ lpn_on_peer_list);
+ lpni = list_entry(net->lpn_peer_nis.next, struct lnet_peer_ni,
+ lpni_on_peer_net_list);
+
+ return lpni;
+ }
- *lpp = NULL;
- if (the_lnet.ln_shutdown) /* it's shutting down */
- return -ESHUTDOWN;
+ if (prev->lpni_on_peer_net_list.next ==
+ &prev->lpni_peer_net->lpn_peer_nis) {
+ /*
+ * if you reached the end of the peer ni list and the peer
+ * net is specified then there are no more peer nis in that
+ * net.
+ */
+ if (net)
+ return NULL;
+
+ /*
+ * we reached the end of this net ni list. move to the
+ * next net
+ */
+ if (prev->lpni_peer_net->lpn_on_peer_list.next ==
+ &peer->lp_peer_nets)
+ /* no more nets and no more NIs. */
+ return NULL;
+
+ /* get the next net */
+ net = list_entry(prev->lpni_peer_net->lpn_on_peer_list.next,
+ struct lnet_peer_net,
+ lpn_on_peer_list);
+ /* get the ni on it */
+ lpni = list_entry(net->lpn_peer_nis.next, struct lnet_peer_ni,
+ lpni_on_peer_net_list);
+
+ return lpni;
+ }
- /* cpt can be LNET_LOCK_EX if it's called from router functions */
- cpt2 = cpt != LNET_LOCK_EX ? cpt : lnet_cpt_of_nid_locked(nid);
+ /* there are more nis left */
+ lpni = list_entry(prev->lpni_on_peer_net_list.next,
+ struct lnet_peer_ni, lpni_on_peer_net_list);
- ptable = the_lnet.ln_peer_tables[cpt2];
- lp = lnet_find_peer_locked(ptable, nid);
- if (lp != NULL) {
- *lpp = lp;
- return 0;
+ return lpni;
+}
+
+bool
+lnet_peer_is_ni_pref_locked(struct lnet_peer_ni *lpni, struct lnet_ni *ni)
+{
+ int i;
+
+ for (i = 0; i < lpni->lpni_pref_nnids; i++) {
+ if (lpni->lpni_pref_nids[i] == ni->ni_nid)
+ return true;
}
+ return false;
+}
- if (!list_empty(&ptable->pt_deathrow)) {
- lp = list_entry(ptable->pt_deathrow.next,
- lnet_peer_t, lp_hashlist);
- list_del(&lp->lp_hashlist);
+lnet_nid_t
+lnet_peer_primary_nid(lnet_nid_t nid)
+{
+ struct lnet_peer_ni *lpni;
+ lnet_nid_t primary_nid = nid;
+ int cpt;
+
+ cpt = lnet_net_lock_current();
+ lpni = lnet_find_peer_ni_locked(nid);
+ if (lpni) {
+ primary_nid = lpni->lpni_peer_net->lpn_peer->lp_primary_nid;
+ lnet_peer_ni_decref_locked(lpni);
}
+ lnet_net_unlock(cpt);
+
+ return primary_nid;
+}
+
+lnet_nid_t
+LNetPrimaryNID(lnet_nid_t nid)
+{
+ struct lnet_peer_ni *lpni;
+ lnet_nid_t primary_nid = nid;
+ int rc = 0;
+ int cpt;
+
+ cpt = lnet_net_lock_current();
+ lpni = lnet_nid2peerni_locked(nid, cpt);
+ if (IS_ERR(lpni)) {
+ rc = PTR_ERR(lpni);
+ goto out_unlock;
+ }
+ primary_nid = lpni->lpni_peer_net->lpn_peer->lp_primary_nid;
+ lnet_peer_ni_decref_locked(lpni);
+out_unlock:
+ lnet_net_unlock(cpt);
+
+ CDEBUG(D_NET, "NID %s primary NID %s rc %d\n", libcfs_nid2str(nid),
+ libcfs_nid2str(primary_nid), rc);
+ return primary_nid;
+}
+EXPORT_SYMBOL(LNetPrimaryNID);
+
+struct lnet_peer_net *
+lnet_peer_get_net_locked(struct lnet_peer *peer, __u32 net_id)
+{
+ struct lnet_peer_net *peer_net;
+ list_for_each_entry(peer_net, &peer->lp_peer_nets, lpn_on_peer_list) {
+ if (peer_net->lpn_net_id == net_id)
+ return peer_net;
+ }
+ return NULL;
+}
+
+static int
+lnet_peer_setup_hierarchy(struct lnet_peer *lp, struct lnet_peer_ni *lpni,
+ lnet_nid_t nid)
+{
+ struct lnet_peer_net *lpn = NULL;
+ struct lnet_peer_table *ptable;
+ __u32 net_id = LNET_NIDNET(nid);
/*
- * take extra refcount in case another thread has shutdown LNet
- * and destroyed locks and peer-table before I finish the allocation
+ * Create the peer_ni, peer_net, and peer if they don't exist
+ * yet.
*/
- ptable->pt_number++;
- lnet_net_unlock(cpt);
+ if (lp) {
+ lpn = lnet_peer_get_net_locked(lp, net_id);
+ } else {
+ lp = lnet_peer_alloc(nid);
+ if (!lp)
+ goto out_enomem;
+ }
- if (lp != NULL)
- memset(lp, 0, sizeof(*lp));
- else
- LIBCFS_CPT_ALLOC(lp, lnet_cpt_table(), cpt2, sizeof(*lp));
-
- if (lp == NULL) {
- rc = -ENOMEM;
- lnet_net_lock(cpt);
- goto out;
- }
-
- INIT_LIST_HEAD(&lp->lp_txq);
- INIT_LIST_HEAD(&lp->lp_rtrq);
- INIT_LIST_HEAD(&lp->lp_routes);
-
- lp->lp_notify = 0;
- lp->lp_notifylnd = 0;
- lp->lp_notifying = 0;
- lp->lp_alive_count = 0;
- lp->lp_timestamp = 0;
- lp->lp_alive = !lnet_peers_start_down(); /* 1 bit!! */
- lp->lp_last_alive = cfs_time_current(); /* assumes alive */
- lp->lp_last_query = 0; /* haven't asked NI yet */
- lp->lp_ping_timestamp = 0;
- lp->lp_ping_feats = LNET_PING_FEAT_INVAL;
- lp->lp_nid = nid;
- lp->lp_cpt = cpt2;
- lp->lp_refcount = 2; /* 1 for caller; 1 for hash */
- lp->lp_rtr_refcount = 0;
+ if (!lpn) {
+ lpn = lnet_peer_net_alloc(net_id);
+ if (!lpn)
+ goto out_maybe_free_lp;
+ }
- lnet_net_lock(cpt);
+ if (!lpni) {
+ lpni = lnet_peer_ni_alloc(nid);
+ if (!lpni)
+ goto out_maybe_free_lpn;
+ }
- if (the_lnet.ln_shutdown) {
- rc = -ESHUTDOWN;
- goto out;
+ /* Install the new peer_ni */
+ lnet_net_lock(LNET_LOCK_EX);
+ /* Add peer_ni to global peer table hash, if necessary. */
+ if (list_empty(&lpni->lpni_hashlist)) {
+ ptable = the_lnet.ln_peer_tables[lpni->lpni_cpt];
+ list_add_tail(&lpni->lpni_hashlist,
+ &ptable->pt_hash[lnet_nid2peerhash(nid)]);
+ ptable->pt_version++;
+ atomic_inc(&ptable->pt_number);
+ atomic_inc(&lpni->lpni_refcount);
+ }
+
+ /* Detach the peer_ni from an existing peer, if necessary. */
+ if (lpni->lpni_peer_net && lpni->lpni_peer_net->lpn_peer != lp)
+ lnet_try_destroy_peer_hierarchy_locked(lpni);
+
+ /* Add peer_ni to peer_net */
+ lpni->lpni_peer_net = lpn;
+ list_add_tail(&lpni->lpni_on_peer_net_list, &lpn->lpn_peer_nis);
+
+ /* Add peer_net to peer */
+ if (!lpn->lpn_peer) {
+ lpn->lpn_peer = lp;
+ list_add_tail(&lpn->lpn_on_peer_list, &lp->lp_peer_nets);
+ }
+
+ /* Add peer to global peer list */
+ if (list_empty(&lp->lp_on_lnet_peer_list))
+ list_add_tail(&lp->lp_on_lnet_peer_list, &the_lnet.ln_peers);
+ lnet_net_unlock(LNET_LOCK_EX);
+
+ return 0;
+
+out_maybe_free_lpn:
+ if (list_empty(&lpn->lpn_on_peer_list))
+ LIBCFS_FREE(lpn, sizeof(*lpn));
+out_maybe_free_lp:
+ if (list_empty(&lp->lp_on_lnet_peer_list))
+ LIBCFS_FREE(lp, sizeof(*lp));
+out_enomem:
+ return -ENOMEM;
+}
+
+static int
+lnet_add_prim_lpni(lnet_nid_t nid)
+{
+ int rc;
+ struct lnet_peer *peer;
+ struct lnet_peer_ni *lpni;
+
+ LASSERT(nid != LNET_NID_ANY);
+
+ /*
+ * lookup the NID and its peer
+ * if the peer doesn't exist, create it.
+ * if this is a non-MR peer then change its state to MR and exit.
+ * if this is an MR peer and it's a primary NI: NO-OP.
+ * if this is an MR peer and it's not a primary NI. Operation not
+ * allowed.
+ *
+ * The adding and deleting of peer nis is being serialized through
+ * the api_mutex. So we can look up peers with the mutex locked
+ * safely. Only when we need to change the ptable, do we need to
+ * exclusively lock the lnet_net_lock()
+ */
+ lpni = lnet_find_peer_ni_locked(nid);
+ if (!lpni) {
+ rc = lnet_peer_setup_hierarchy(NULL, NULL, nid);
+ if (rc != 0)
+ return rc;
+ lpni = lnet_find_peer_ni_locked(nid);
}
- lp2 = lnet_find_peer_locked(ptable, nid);
- if (lp2 != NULL) {
- *lpp = lp2;
- goto out;
+ LASSERT(lpni);
+
+ lnet_peer_ni_decref_locked(lpni);
+
+ peer = lpni->lpni_peer_net->lpn_peer;
+
+ /*
+ * If we found a lpni with the same nid as the NID we're trying to
+ * create, then we're trying to create an already existing lpni
+ * that belongs to a different peer
+ */
+ if (peer->lp_primary_nid != nid)
+ return -EEXIST;
+
+ /*
+ * if we found an lpni that is not a multi-rail, which could occur
+ * if lpni is already created as a non-mr lpni or we just created
+ * it, then make sure you indicate that this lpni is a primary mr
+ * capable peer.
+ *
+ * TODO: update flags if necessary
+ */
+ if (!peer->lp_multi_rail && peer->lp_primary_nid == nid)
+ peer->lp_multi_rail = true;
+
+ return rc;
+}
+
+static int
+lnet_add_peer_ni_to_prim_lpni(lnet_nid_t prim_nid, lnet_nid_t nid)
+{
+ struct lnet_peer *peer, *primary_peer;
+ struct lnet_peer_ni *lpni = NULL, *klpni = NULL;
+
+ LASSERT(prim_nid != LNET_NID_ANY && nid != LNET_NID_ANY);
+
+ /*
+ * key nid must be created by this point. If not then this
+ * operation is not permitted
+ */
+ klpni = lnet_find_peer_ni_locked(prim_nid);
+ if (!klpni)
+ return -ENOENT;
+
+ lnet_peer_ni_decref_locked(klpni);
+
+ primary_peer = klpni->lpni_peer_net->lpn_peer;
+
+ lpni = lnet_find_peer_ni_locked(nid);
+ if (lpni) {
+ lnet_peer_ni_decref_locked(lpni);
+
+ peer = lpni->lpni_peer_net->lpn_peer;
+ /*
+ * lpni already exists in the system but it belongs to
+ * a different peer. We can't re-added it
+ */
+ if (peer->lp_primary_nid != prim_nid && peer->lp_multi_rail) {
+ CERROR("Cannot add NID %s owned by peer %s to peer %s\n",
+ libcfs_nid2str(lpni->lpni_nid),
+ libcfs_nid2str(peer->lp_primary_nid),
+ libcfs_nid2str(prim_nid));
+ return -EEXIST;
+ } else if (peer->lp_primary_nid == prim_nid) {
+ /*
+ * found a peer_ni that is already part of the
+ * peer. This is a no-op operation.
+ */
+ return 0;
+ }
+
+ /*
+ * TODO: else if (peer->lp_primary_nid != prim_nid &&
+ * !peer->lp_multi_rail)
+ * peer is not an MR peer and it will be moved in the next
+ * step to klpni, so update its flags accordingly.
+ * lnet_move_peer_ni()
+ */
+
+ /*
+ * TODO: call lnet_update_peer() from here to update the
+ * flags. This is the case when the lpni you're trying to
+ * add is already part of the peer. This could've been
+ * added by the DD previously, so go ahead and do any
+ * updates to the state if necessary
+ */
+
}
- lp->lp_ni = lnet_net2ni_locked(LNET_NIDNET(nid), cpt2);
- if (lp->lp_ni == NULL) {
- rc = -EHOSTUNREACH;
- goto out;
+ /*
+ * When we get here we either have found an existing lpni, which
+ * we can switch to the new peer. Or we need to create one and
+ * add it to the new peer
+ */
+ return lnet_peer_setup_hierarchy(primary_peer, lpni, nid);
+}
+
+/*
+ * lpni creation initiated due to traffic either sending or receiving.
+ */
+static int
+lnet_peer_ni_traffic_add(lnet_nid_t nid)
+{
+ struct lnet_peer_ni *lpni;
+ int rc = 0;
+
+ if (nid == LNET_NID_ANY)
+ return -EINVAL;
+
+ /* lnet_net_lock is not needed here because ln_api_lock is held */
+ lpni = lnet_find_peer_ni_locked(nid);
+ if (lpni) {
+ /*
+ * TODO: lnet_update_primary_nid() but not all of it
+ * only indicate if we're converting this to MR capable
+ * Can happen due to DD
+ */
+ lnet_peer_ni_decref_locked(lpni);
+ } else {
+ rc = lnet_peer_setup_hierarchy(NULL, NULL, nid);
}
- lp->lp_txcredits =
- lp->lp_mintxcredits = lp->lp_ni->ni_peertxcredits;
- lp->lp_rtrcredits =
- lp->lp_minrtrcredits = lnet_peer_buffer_credits(lp->lp_ni);
+ return rc;
- list_add_tail(&lp->lp_hashlist,
- &ptable->pt_hash[lnet_nid2peerhash(nid)]);
- ptable->pt_version++;
- *lpp = lp;
+}
+
+static int
+lnet_peer_ni_add_non_mr(lnet_nid_t nid)
+{
+ struct lnet_peer_ni *lpni;
+
+ lpni = lnet_find_peer_ni_locked(nid);
+ if (lpni) {
+ CERROR("Cannot add %s as non-mr when it already exists\n",
+ libcfs_nid2str(nid));
+ lnet_peer_ni_decref_locked(lpni);
+ return -EEXIST;
+ }
+
+ return lnet_peer_setup_hierarchy(NULL, NULL, nid);
+}
+
+/*
+ * This API handles the following combinations:
+ * Create a primary NI if only the prim_nid is provided
+ * Create or add an lpni to a primary NI. Primary NI must've already
+ * been created
+ * Create a non-MR peer.
+ */
+int
+lnet_add_peer_ni_to_peer(lnet_nid_t prim_nid, lnet_nid_t nid, bool mr)
+{
+ /*
+ * Caller trying to setup an MR like peer hierarchy but
+ * specifying it to be non-MR. This is not allowed.
+ */
+ if (prim_nid != LNET_NID_ANY &&
+ nid != LNET_NID_ANY && !mr)
+ return -EPERM;
+
+ /* Add the primary NID of a peer */
+ if (prim_nid != LNET_NID_ANY &&
+ nid == LNET_NID_ANY && mr)
+ return lnet_add_prim_lpni(prim_nid);
+
+ /* Add a NID to an existing peer */
+ if (prim_nid != LNET_NID_ANY &&
+ nid != LNET_NID_ANY && mr)
+ return lnet_add_peer_ni_to_prim_lpni(prim_nid, nid);
+
+ /* Add a non-MR peer NI */
+ if (((prim_nid != LNET_NID_ANY &&
+ nid == LNET_NID_ANY) ||
+ (prim_nid == LNET_NID_ANY &&
+ nid != LNET_NID_ANY)) && !mr)
+ return lnet_peer_ni_add_non_mr(prim_nid != LNET_NID_ANY ?
+ prim_nid : nid);
return 0;
-out:
- if (lp != NULL)
- list_add(&lp->lp_hashlist, &ptable->pt_deathrow);
- ptable->pt_number--;
+}
+
+int
+lnet_del_peer_ni_from_peer(lnet_nid_t prim_nid, lnet_nid_t nid)
+{
+ lnet_nid_t local_nid;
+ struct lnet_peer *peer;
+ struct lnet_peer_ni *lpni;
+ int rc;
+
+ if (prim_nid == LNET_NID_ANY)
+ return -EINVAL;
+
+ local_nid = (nid != LNET_NID_ANY) ? nid : prim_nid;
+
+ lpni = lnet_find_peer_ni_locked(local_nid);
+ if (!lpni)
+ return -EINVAL;
+ lnet_peer_ni_decref_locked(lpni);
+
+ peer = lpni->lpni_peer_net->lpn_peer;
+ LASSERT(peer != NULL);
+
+ if (peer->lp_primary_nid == lpni->lpni_nid) {
+ /*
+ * deleting the primary ni is equivalent to deleting the
+ * entire peer
+ */
+ lnet_net_lock(LNET_LOCK_EX);
+ rc = lnet_peer_del_locked(peer);
+ lnet_net_unlock(LNET_LOCK_EX);
+
+ return rc;
+ }
+
+ lnet_net_lock(LNET_LOCK_EX);
+ rc = lnet_peer_ni_del_locked(lpni);
+ lnet_net_unlock(LNET_LOCK_EX);
+
return rc;
}
void
+lnet_destroy_peer_ni_locked(struct lnet_peer_ni *lpni)
+{
+ struct lnet_peer_table *ptable;
+
+ LASSERT(atomic_read(&lpni->lpni_refcount) == 0);
+ LASSERT(lpni->lpni_rtr_refcount == 0);
+ LASSERT(list_empty(&lpni->lpni_txq));
+ LASSERT(lpni->lpni_txqnob == 0);
+
+ lpni->lpni_net = NULL;
+
+ /* remove the peer ni from the zombie list */
+ ptable = the_lnet.ln_peer_tables[lpni->lpni_cpt];
+ spin_lock(&ptable->pt_zombie_lock);
+ list_del_init(&lpni->lpni_hashlist);
+ ptable->pt_zombies--;
+ spin_unlock(&ptable->pt_zombie_lock);
+
+ LIBCFS_FREE(lpni, sizeof(*lpni));
+}
+
+struct lnet_peer_ni *
+lnet_nid2peerni_ex(lnet_nid_t nid, int cpt)
+{
+ struct lnet_peer_ni *lpni = NULL;
+ int rc;
+
+ if (the_lnet.ln_shutdown) /* it's shutting down */
+ return ERR_PTR(-ESHUTDOWN);
+
+ /*
+ * find if a peer_ni already exists.
+ * If so then just return that.
+ */
+ lpni = lnet_find_peer_ni_locked(nid);
+ if (lpni)
+ return lpni;
+
+ lnet_net_unlock(cpt);
+
+ rc = lnet_peer_ni_traffic_add(nid);
+ if (rc) {
+ lpni = ERR_PTR(rc);
+ goto out_net_relock;
+ }
+
+ lpni = lnet_find_peer_ni_locked(nid);
+ LASSERT(lpni);
+
+out_net_relock:
+ lnet_net_lock(cpt);
+
+ return lpni;
+}
+
+struct lnet_peer_ni *
+lnet_nid2peerni_locked(lnet_nid_t nid, int cpt)
+{
+ struct lnet_peer_ni *lpni = NULL;
+ int rc;
+
+ if (the_lnet.ln_shutdown) /* it's shutting down */
+ return ERR_PTR(-ESHUTDOWN);
+
+ /*
+ * find if a peer_ni already exists.
+ * If so then just return that.
+ */
+ lpni = lnet_find_peer_ni_locked(nid);
+ if (lpni)
+ return lpni;
+
+ /*
+ * Slow path:
+ * use the lnet_api_mutex to serialize the creation of the peer_ni
+ * and the creation/deletion of the local ni/net. When a local ni is
+ * created, if there exists a set of peer_nis on that network,
+ * they need to be traversed and updated. When a local NI is
+ * deleted, which could result in a network being deleted, then
+ * all peer nis on that network need to be removed as well.
+ *
+ * Creation through traffic should also be serialized with
+ * creation through DLC.
+ */
+ lnet_net_unlock(cpt);
+ mutex_lock(&the_lnet.ln_api_mutex);
+ /*
+ * Shutdown is only set under the ln_api_lock, so a single
+ * check here is sufficent.
+ */
+ if (the_lnet.ln_shutdown) {
+ lpni = ERR_PTR(-ESHUTDOWN);
+ goto out_mutex_unlock;
+ }
+
+ rc = lnet_peer_ni_traffic_add(nid);
+ if (rc) {
+ lpni = ERR_PTR(rc);
+ goto out_mutex_unlock;
+ }
+
+ lpni = lnet_find_peer_ni_locked(nid);
+ LASSERT(lpni);
+
+out_mutex_unlock:
+ mutex_unlock(&the_lnet.ln_api_mutex);
+ lnet_net_lock(cpt);
+
+ return lpni;
+}
+
+void
lnet_debug_peer(lnet_nid_t nid)
{
- char *aliveness = "NA";
- lnet_peer_t *lp;
- int rc;
- int cpt;
+ char *aliveness = "NA";
+ struct lnet_peer_ni *lp;
+ int cpt;
- cpt = lnet_cpt_of_nid(nid);
+ cpt = lnet_cpt_of_nid(nid, NULL);
lnet_net_lock(cpt);
- rc = lnet_nid2peer_locked(&lp, nid, cpt);
- if (rc != 0) {
+ lp = lnet_nid2peerni_locked(nid, cpt);
+ if (IS_ERR(lp)) {
lnet_net_unlock(cpt);
CDEBUG(D_WARNING, "No peer %s\n", libcfs_nid2str(nid));
return;
}
if (lnet_isrouter(lp) || lnet_peer_aliveness_enabled(lp))
- aliveness = lp->lp_alive ? "up" : "down";
+ aliveness = lp->lpni_alive ? "up" : "down";
CDEBUG(D_WARNING, "%-24s %4d %5s %5d %5d %5d %5d %5d %ld\n",
- libcfs_nid2str(lp->lp_nid), lp->lp_refcount,
- aliveness, lp->lp_ni->ni_peertxcredits,
- lp->lp_rtrcredits, lp->lp_minrtrcredits,
- lp->lp_txcredits, lp->lp_mintxcredits, lp->lp_txqnob);
+ libcfs_nid2str(lp->lpni_nid), atomic_read(&lp->lpni_refcount),
+ aliveness, lp->lpni_net->net_tunables.lct_peer_tx_credits,
+ lp->lpni_rtrcredits, lp->lpni_minrtrcredits,
+ lp->lpni_txcredits, lp->lpni_mintxcredits, lp->lpni_txqnob);
- lnet_peer_decref_locked(lp);
+ lnet_peer_ni_decref_locked(lp);
lnet_net_unlock(cpt);
}
-int lnet_get_peer_info(__u32 peer_index, __u64 *nid,
- char aliveness[LNET_MAX_STR_LEN],
- __u32 *cpt_iter, __u32 *refcount,
- __u32 *ni_peer_tx_credits, __u32 *peer_tx_credits,
- __u32 *peer_rtr_credits, __u32 *peer_min_rtr_credits,
- __u32 *peer_tx_qnob)
+int lnet_get_peer_ni_info(__u32 peer_index, __u64 *nid,
+ char aliveness[LNET_MAX_STR_LEN],
+ __u32 *cpt_iter, __u32 *refcount,
+ __u32 *ni_peer_tx_credits, __u32 *peer_tx_credits,
+ __u32 *peer_rtr_credits, __u32 *peer_min_rtr_credits,
+ __u32 *peer_tx_qnob)
{
- struct lnet_peer_table *peer_table;
- lnet_peer_t *lp;
- int j;
- int lncpt;
- bool found = false;
+ struct lnet_peer_table *peer_table;
+ struct lnet_peer_ni *lp;
+ int j;
+ int lncpt;
+ bool found = false;
/* get the number of CPTs */
lncpt = cfs_percpt_number(the_lnet.ln_peer_tables);
for (j = 0; j < LNET_PEER_HASH_SIZE && !found; j++) {
struct list_head *peers = &peer_table->pt_hash[j];
- list_for_each_entry(lp, peers, lp_hashlist) {
+ list_for_each_entry(lp, peers, lpni_hashlist) {
if (peer_index-- > 0)
continue;
if (lnet_isrouter(lp) ||
lnet_peer_aliveness_enabled(lp))
snprintf(aliveness, LNET_MAX_STR_LEN,
- lp->lp_alive ? "up" : "down");
+ lp->lpni_alive ? "up" : "down");
- *nid = lp->lp_nid;
- *refcount = lp->lp_refcount;
- *ni_peer_tx_credits = lp->lp_ni->ni_peertxcredits;
- *peer_tx_credits = lp->lp_txcredits;
- *peer_rtr_credits = lp->lp_rtrcredits;
- *peer_min_rtr_credits = lp->lp_mintxcredits;
- *peer_tx_qnob = lp->lp_txqnob;
+ *nid = lp->lpni_nid;
+ *refcount = atomic_read(&lp->lpni_refcount);
+ *ni_peer_tx_credits =
+ lp->lpni_net->net_tunables.lct_peer_tx_credits;
+ *peer_tx_credits = lp->lpni_txcredits;
+ *peer_rtr_credits = lp->lpni_rtrcredits;
+ *peer_min_rtr_credits = lp->lpni_mintxcredits;
+ *peer_tx_qnob = lp->lpni_txqnob;
found = true;
}
return found ? 0 : -ENOENT;
}
+
+int lnet_get_peer_info(__u32 idx, lnet_nid_t *primary_nid, lnet_nid_t *nid,
+ bool *mr, struct lnet_peer_ni_credit_info *peer_ni_info,
+ struct lnet_ioctl_element_stats *peer_ni_stats)
+{
+ struct lnet_peer_ni *lpni = NULL;
+ struct lnet_peer_net *lpn = NULL;
+ struct lnet_peer *lp = NULL;
+
+ lpni = lnet_get_peer_ni_idx_locked(idx, &lpn, &lp);
+
+ if (!lpni)
+ return -ENOENT;
+
+ *primary_nid = lp->lp_primary_nid;
+ *mr = lp->lp_multi_rail;
+ *nid = lpni->lpni_nid;
+ snprintf(peer_ni_info->cr_aliveness, LNET_MAX_STR_LEN, "NA");
+ if (lnet_isrouter(lpni) ||
+ lnet_peer_aliveness_enabled(lpni))
+ snprintf(peer_ni_info->cr_aliveness, LNET_MAX_STR_LEN,
+ lpni->lpni_alive ? "up" : "down");
+
+ peer_ni_info->cr_refcount = atomic_read(&lpni->lpni_refcount);
+ peer_ni_info->cr_ni_peer_tx_credits = (lpni->lpni_net != NULL) ?
+ lpni->lpni_net->net_tunables.lct_peer_tx_credits : 0;
+ peer_ni_info->cr_peer_tx_credits = lpni->lpni_txcredits;
+ peer_ni_info->cr_peer_rtr_credits = lpni->lpni_rtrcredits;
+ peer_ni_info->cr_peer_min_rtr_credits = lpni->lpni_minrtrcredits;
+ peer_ni_info->cr_peer_min_tx_credits = lpni->lpni_mintxcredits;
+ peer_ni_info->cr_peer_tx_qnob = lpni->lpni_txqnob;
+
+ peer_ni_stats->send_count = atomic_read(&lpni->lpni_stats.send_count);
+ peer_ni_stats->recv_count = atomic_read(&lpni->lpni_stats.recv_count);
+ peer_ni_stats->drop_count = atomic_read(&lpni->lpni_stats.drop_count);
+
+ return 0;
+}
MODULE_PARM_DESC(auto_down, "Automatically mark peers down on comms error");
int
-lnet_peer_buffer_credits(lnet_ni_t *ni)
+lnet_peer_buffer_credits(struct lnet_net *net)
{
/* NI option overrides LNet default */
- if (ni->ni_peerrtrcredits > 0)
- return ni->ni_peerrtrcredits;
+ if (net->net_tunables.lct_peer_rtr_credits > 0)
+ return net->net_tunables.lct_peer_rtr_credits;
if (peer_buffer_credits > 0)
return peer_buffer_credits;
/* As an approximation, allow this peer the same number of router
* buffers as it is allowed outstanding sends */
- return ni->ni_peertxcredits;
+ return net->net_tunables.lct_peer_tx_credits;
}
/* forward ref's */
}
void
-lnet_notify_locked(lnet_peer_t *lp, int notifylnd, int alive, cfs_time_t when)
+lnet_notify_locked(struct lnet_peer_ni *lp, int notifylnd, int alive,
+ cfs_time_t when)
{
- if (cfs_time_before(when, lp->lp_timestamp)) { /* out of date information */
+ if (cfs_time_before(when, lp->lpni_timestamp)) { /* out of date information */
CDEBUG(D_NET, "Out of date\n");
return;
}
- lp->lp_timestamp = when; /* update timestamp */
- lp->lp_ping_deadline = 0; /* disable ping timeout */
+ /*
+ * This function can be called with different cpt locks being
+ * held. lpni_alive_count modification needs to be properly protected.
+ * Significant reads to lpni_alive_count are also protected with
+ * the same lock
+ */
+ spin_lock(&lp->lpni_lock);
- if (lp->lp_alive_count != 0 && /* got old news */
- (!lp->lp_alive) == (!alive)) { /* new date for old news */
+ lp->lpni_timestamp = when; /* update timestamp */
+ lp->lpni_ping_deadline = 0; /* disable ping timeout */
+
+ if (lp->lpni_alive_count != 0 && /* got old news */
+ (!lp->lpni_alive) == (!alive)) { /* new date for old news */
+ spin_unlock(&lp->lpni_lock);
CDEBUG(D_NET, "Old news\n");
return;
}
/* Flag that notification is outstanding */
- lp->lp_alive_count++;
- lp->lp_alive = !(!alive); /* 1 bit! */
- lp->lp_notify = 1;
- lp->lp_notifylnd |= notifylnd;
- if (lp->lp_alive)
- lp->lp_ping_feats = LNET_PING_FEAT_INVAL; /* reset */
+ lp->lpni_alive_count++;
+ lp->lpni_alive = (alive) ? 1 : 0;
+ lp->lpni_notify = 1;
+ lp->lpni_notifylnd = notifylnd;
+ if (lp->lpni_alive)
+ lp->lpni_ping_feats = LNET_PING_FEAT_INVAL; /* reset */
+
+ spin_unlock(&lp->lpni_lock);
- CDEBUG(D_NET, "set %s %d\n", libcfs_nid2str(lp->lp_nid), alive);
+ CDEBUG(D_NET, "set %s %d\n", libcfs_nid2str(lp->lpni_nid), alive);
}
+/*
+ * This function will always be called with lp->lpni_cpt lock held.
+ */
static void
-lnet_ni_notify_locked(lnet_ni_t *ni, lnet_peer_t *lp)
+lnet_ni_notify_locked(lnet_ni_t *ni, struct lnet_peer_ni *lp)
{
- int alive;
- int notifylnd;
+ int alive;
+ int notifylnd;
/* Notify only in 1 thread at any time to ensure ordered notification.
* NB individual events can be missed; the only guarantee is that you
* always get the most recent news */
- if (lp->lp_notifying || ni == NULL)
+ spin_lock(&lp->lpni_lock);
+
+ if (lp->lpni_notifying || ni == NULL) {
+ spin_unlock(&lp->lpni_lock);
return;
+ }
- lp->lp_notifying = 1;
+ lp->lpni_notifying = 1;
- while (lp->lp_notify) {
- alive = lp->lp_alive;
- notifylnd = lp->lp_notifylnd;
+ /*
+ * lp->lpni_notify needs to be protected because it can be set in
+ * lnet_notify_locked().
+ */
+ while (lp->lpni_notify) {
+ alive = lp->lpni_alive;
+ notifylnd = lp->lpni_notifylnd;
- lp->lp_notifylnd = 0;
- lp->lp_notify = 0;
+ lp->lpni_notifylnd = 0;
+ lp->lpni_notify = 0;
- if (notifylnd && ni->ni_lnd->lnd_notify != NULL) {
- lnet_net_unlock(lp->lp_cpt);
+ if (notifylnd && ni->ni_net->net_lnd->lnd_notify != NULL) {
+ spin_unlock(&lp->lpni_lock);
+ lnet_net_unlock(lp->lpni_cpt);
/* A new notification could happen now; I'll handle it
* when control returns to me */
- (ni->ni_lnd->lnd_notify)(ni, lp->lp_nid, alive);
+ (ni->ni_net->net_lnd->lnd_notify)(ni, lp->lpni_nid,
+ alive);
- lnet_net_lock(lp->lp_cpt);
+ lnet_net_lock(lp->lpni_cpt);
+ spin_lock(&lp->lpni_lock);
}
}
- lp->lp_notifying = 0;
+ lp->lpni_notifying = 0;
+ spin_unlock(&lp->lpni_lock);
}
-
static void
-lnet_rtr_addref_locked(lnet_peer_t *lp)
+lnet_rtr_addref_locked(struct lnet_peer_ni *lp)
{
- LASSERT(lp->lp_refcount > 0);
- LASSERT(lp->lp_rtr_refcount >= 0);
+ LASSERT(atomic_read(&lp->lpni_refcount) > 0);
+ LASSERT(lp->lpni_rtr_refcount >= 0);
/* lnet_net_lock must be exclusively locked */
- lp->lp_rtr_refcount++;
- if (lp->lp_rtr_refcount == 1) {
+ lp->lpni_rtr_refcount++;
+ if (lp->lpni_rtr_refcount == 1) {
struct list_head *pos;
/* a simple insertion sort */
list_for_each_prev(pos, &the_lnet.ln_routers) {
- lnet_peer_t *rtr = list_entry(pos, lnet_peer_t,
- lp_rtr_list);
+ struct lnet_peer_ni *rtr =
+ list_entry(pos, struct lnet_peer_ni,
+ lpni_rtr_list);
- if (rtr->lp_nid < lp->lp_nid)
+ if (rtr->lpni_nid < lp->lpni_nid)
break;
}
- list_add(&lp->lp_rtr_list, pos);
+ list_add(&lp->lpni_rtr_list, pos);
/* addref for the_lnet.ln_routers */
- lnet_peer_addref_locked(lp);
+ lnet_peer_ni_addref_locked(lp);
the_lnet.ln_routers_version++;
}
}
static void
-lnet_rtr_decref_locked(lnet_peer_t *lp)
+lnet_rtr_decref_locked(struct lnet_peer_ni *lp)
{
- LASSERT(lp->lp_refcount > 0);
- LASSERT(lp->lp_rtr_refcount > 0);
+ LASSERT(atomic_read(&lp->lpni_refcount) > 0);
+ LASSERT(lp->lpni_rtr_refcount > 0);
/* lnet_net_lock must be exclusively locked */
- lp->lp_rtr_refcount--;
- if (lp->lp_rtr_refcount == 0) {
- LASSERT(list_empty(&lp->lp_routes));
+ lp->lpni_rtr_refcount--;
+ if (lp->lpni_rtr_refcount == 0) {
+ LASSERT(list_empty(&lp->lpni_routes));
- if (lp->lp_rcd != NULL) {
- list_add(&lp->lp_rcd->rcd_list,
+ if (lp->lpni_rcd != NULL) {
+ list_add(&lp->lpni_rcd->rcd_list,
&the_lnet.ln_rcd_deathrow);
- lp->lp_rcd = NULL;
+ lp->lpni_rcd = NULL;
}
- list_del(&lp->lp_rtr_list);
+ list_del(&lp->lpni_rtr_list);
/* decref for the_lnet.ln_routers */
- lnet_peer_decref_locked(lp);
+ lnet_peer_ni_decref_locked(lp);
the_lnet.ln_routers_version++;
}
}
lnet_remotenet_t *
-lnet_find_net_locked (__u32 net)
+lnet_find_rnet_locked(__u32 net)
{
lnet_remotenet_t *rnet;
struct list_head *tmp;
__u32 lnd_type;
__u32 seed[2];
struct timespec64 ts;
- lnet_ni_t *ni;
- struct list_head *tmp;
+ lnet_ni_t *ni = NULL;
if (seeded)
return;
/* Nodes with small feet have little entropy
* the NID for this node gives the most entropy in the low bits */
- list_for_each(tmp, &the_lnet.ln_nis) {
- ni = list_entry(tmp, lnet_ni_t, ni_list);
+ while ((ni = lnet_get_next_ni_locked(NULL, ni))) {
lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid));
if (lnd_type != LOLND)
offset--;
}
list_add(&route->lr_list, e);
- list_add(&route->lr_gwlist, &route->lr_gateway->lp_routes);
+ list_add(&route->lr_gwlist, &route->lr_gateway->lpni_routes);
the_lnet.ln_remote_nets_version++;
lnet_rtr_addref_locked(route->lr_gateway);
lnet_remotenet_t *rnet2;
lnet_route_t *route;
lnet_ni_t *ni;
+ struct lnet_peer_ni *lpni;
int add_route;
int rc;
lnet_net_lock(LNET_LOCK_EX);
- rc = lnet_nid2peer_locked(&route->lr_gateway, gateway, LNET_LOCK_EX);
- if (rc != 0) {
+ lpni = lnet_nid2peerni_ex(gateway, LNET_LOCK_EX);
+ if (IS_ERR(lpni)) {
lnet_net_unlock(LNET_LOCK_EX);
LIBCFS_FREE(route, sizeof(*route));
LIBCFS_FREE(rnet, sizeof(*rnet));
+ rc = PTR_ERR(lpni);
if (rc == -EHOSTUNREACH) /* gateway is not on a local net. */
return rc; /* ignore the route entry */
CERROR("Error %d creating route %s %d %s\n", rc,
libcfs_nid2str(gateway));
return rc;
}
-
+ route->lr_gateway = lpni;
LASSERT(!the_lnet.ln_shutdown);
- rnet2 = lnet_find_net_locked(net);
+ rnet2 = lnet_find_rnet_locked(net);
if (rnet2 == NULL) {
/* new network */
list_add_tail(&rnet->lrn_list, lnet_net2rnethash(net));
}
/* our lookups must be true */
- LASSERT(route2->lr_gateway->lp_nid != gateway);
+ LASSERT(route2->lr_gateway->lpni_nid != gateway);
}
if (add_route) {
- lnet_peer_addref_locked(route->lr_gateway); /* +1 for notify */
+ lnet_peer_ni_addref_locked(route->lr_gateway); /* +1 for notify */
lnet_add_route_to_rnet(rnet2, route);
- ni = route->lr_gateway->lp_ni;
+ ni = lnet_get_next_ni_locked(route->lr_gateway->lpni_net, NULL);
lnet_net_unlock(LNET_LOCK_EX);
/* XXX Assume alive */
- if (ni->ni_lnd->lnd_notify != NULL)
- (ni->ni_lnd->lnd_notify)(ni, gateway, 1);
+ if (ni->ni_net->net_lnd->lnd_notify != NULL)
+ (ni->ni_net->net_lnd->lnd_notify)(ni, gateway, 1);
lnet_net_lock(LNET_LOCK_EX);
}
/* -1 for notify or !add_route */
- lnet_peer_decref_locked(route->lr_gateway);
+ lnet_peer_ni_decref_locked(route->lr_gateway);
lnet_net_unlock(LNET_LOCK_EX);
rc = 0;
continue;
}
- if (route->lr_gateway->lp_ni ==
- route2->lr_gateway->lp_ni)
+ if (route->lr_gateway->lpni_net ==
+ route2->lr_gateway->lpni_net)
continue;
- nid1 = route->lr_gateway->lp_nid;
- nid2 = route2->lr_gateway->lp_nid;
+ nid1 = route->lr_gateway->lpni_nid;
+ nid2 = route2->lr_gateway->lpni_nid;
net = rnet->lrn_net;
lnet_net_unlock(cpt);
int
lnet_del_route(__u32 net, lnet_nid_t gw_nid)
{
- struct lnet_peer *gateway;
+ struct lnet_peer_ni *gateway;
lnet_remotenet_t *rnet;
lnet_route_t *route;
struct list_head *e1;
gateway = route->lr_gateway;
if (!(gw_nid == LNET_NID_ANY ||
- gw_nid == gateway->lp_nid))
+ gw_nid == gateway->lpni_nid))
continue;
list_del(&route->lr_list);
rnet = NULL;
lnet_rtr_decref_locked(gateway);
- lnet_peer_decref_locked(gateway);
+ lnet_peer_ni_decref_locked(gateway);
lnet_net_unlock(LNET_LOCK_EX);
*net = rnet->lrn_net;
*hops = route->lr_hops;
*priority = route->lr_priority;
- *gateway = route->lr_gateway->lp_nid;
+ *gateway = route->lr_gateway->lpni_nid;
*alive = lnet_is_route_alive(route);
lnet_net_unlock(cpt);
return 0;
lnet_parse_rc_info(lnet_rc_data_t *rcd)
{
struct lnet_ping_info *info = rcd->rcd_pinginfo;
- struct lnet_peer *gw = rcd->rcd_gateway;
+ struct lnet_peer_ni *gw = rcd->rcd_gateway;
lnet_route_t *rte;
- if (!gw->lp_alive)
+ if (!gw->lpni_alive)
return;
+ /*
+ * Protect gw->lpni_ping_feats. This can be set from
+ * lnet_notify_locked with different locks being held
+ */
+ spin_lock(&gw->lpni_lock);
+
if (info->pi_magic == __swab32(LNET_PROTO_PING_MAGIC))
lnet_swap_pinginfo(info);
/* NB always racing with network! */
if (info->pi_magic != LNET_PROTO_PING_MAGIC) {
CDEBUG(D_NET, "%s: Unexpected magic %08x\n",
- libcfs_nid2str(gw->lp_nid), info->pi_magic);
- gw->lp_ping_feats = LNET_PING_FEAT_INVAL;
+ libcfs_nid2str(gw->lpni_nid), info->pi_magic);
+ gw->lpni_ping_feats = LNET_PING_FEAT_INVAL;
+ spin_unlock(&gw->lpni_lock);
return;
}
- gw->lp_ping_feats = info->pi_features;
- if ((gw->lp_ping_feats & LNET_PING_FEAT_MASK) == 0) {
+ gw->lpni_ping_feats = info->pi_features;
+ if ((gw->lpni_ping_feats & LNET_PING_FEAT_MASK) == 0) {
CDEBUG(D_NET, "%s: Unexpected features 0x%x\n",
- libcfs_nid2str(gw->lp_nid), gw->lp_ping_feats);
+ libcfs_nid2str(gw->lpni_nid), gw->lpni_ping_feats);
+ spin_unlock(&gw->lpni_lock);
return; /* nothing I can understand */
}
- if ((gw->lp_ping_feats & LNET_PING_FEAT_NI_STATUS) == 0)
+ if ((gw->lpni_ping_feats & LNET_PING_FEAT_NI_STATUS) == 0) {
+ spin_unlock(&gw->lpni_lock);
return; /* can't carry NI status info */
+ }
- list_for_each_entry(rte, &gw->lp_routes, lr_gwlist) {
+ list_for_each_entry(rte, &gw->lpni_routes, lr_gwlist) {
int down = 0;
int up = 0;
int i;
- if ((gw->lp_ping_feats & LNET_PING_FEAT_RTE_DISABLED) != 0) {
+ if ((gw->lpni_ping_feats & LNET_PING_FEAT_RTE_DISABLED) != 0) {
rte->lr_downis = 1;
continue;
}
if (nid == LNET_NID_ANY) {
CDEBUG(D_NET, "%s: unexpected LNET_NID_ANY\n",
- libcfs_nid2str(gw->lp_nid));
- gw->lp_ping_feats = LNET_PING_FEAT_INVAL;
+ libcfs_nid2str(gw->lpni_nid));
+ gw->lpni_ping_feats = LNET_PING_FEAT_INVAL;
+ spin_unlock(&gw->lpni_lock);
return;
}
}
CDEBUG(D_NET, "%s: Unexpected status 0x%x\n",
- libcfs_nid2str(gw->lp_nid), stat->ns_status);
- gw->lp_ping_feats = LNET_PING_FEAT_INVAL;
+ libcfs_nid2str(gw->lpni_nid), stat->ns_status);
+ gw->lpni_ping_feats = LNET_PING_FEAT_INVAL;
+ spin_unlock(&gw->lpni_lock);
return;
}
rte->lr_downis = down;
}
+
+ spin_unlock(&gw->lpni_lock);
}
static void
lnet_router_checker_event(lnet_event_t *event)
{
- lnet_rc_data_t *rcd = event->md.user_ptr;
- struct lnet_peer *lp;
+ lnet_rc_data_t *rcd = event->md.user_ptr;
+ struct lnet_peer_ni *lp;
LASSERT(rcd != NULL);
/* NB: it's called with holding lnet_res_lock, we have a few
* places need to hold both locks at the same time, please take
* care of lock ordering */
- lnet_net_lock(lp->lp_cpt);
- if (!lnet_isrouter(lp) || lp->lp_rcd != rcd) {
+ lnet_net_lock(lp->lpni_cpt);
+ if (!lnet_isrouter(lp) || lp->lpni_rcd != rcd) {
/* ignore if no longer a router or rcd is replaced */
goto out;
}
if (event->type == LNET_EVENT_SEND) {
- lp->lp_ping_notsent = 0;
+ lp->lpni_ping_notsent = 0;
if (event->status == 0)
goto out;
}
lnet_parse_rc_info(rcd);
out:
- lnet_net_unlock(lp->lp_cpt);
+ lnet_net_unlock(lp->lpni_cpt);
}
static void
lnet_wait_known_routerstate(void)
{
- lnet_peer_t *rtr;
+ struct lnet_peer_ni *rtr;
struct list_head *entry;
- int all_known;
+ int all_known;
LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING);
all_known = 1;
list_for_each(entry, &the_lnet.ln_routers) {
- rtr = list_entry(entry, lnet_peer_t, lp_rtr_list);
+ rtr = list_entry(entry, struct lnet_peer_ni,
+ lpni_rtr_list);
- if (rtr->lp_alive_count == 0) {
+ spin_lock(&rtr->lpni_lock);
+
+ if (rtr->lpni_alive_count == 0) {
all_known = 0;
+ spin_unlock(&rtr->lpni_lock);
break;
}
+ spin_unlock(&rtr->lpni_lock);
}
lnet_net_unlock(cpt);
}
void
-lnet_router_ni_update_locked(lnet_peer_t *gw, __u32 net)
+lnet_router_ni_update_locked(struct lnet_peer_ni *gw, __u32 net)
{
lnet_route_t *rte;
- if ((gw->lp_ping_feats & LNET_PING_FEAT_NI_STATUS) != 0) {
- list_for_each_entry(rte, &gw->lp_routes, lr_gwlist) {
+ if ((gw->lpni_ping_feats & LNET_PING_FEAT_NI_STATUS) != 0) {
+ list_for_each_entry(rte, &gw->lpni_routes, lr_gwlist) {
if (rte->lr_net == net) {
rte->lr_downis = 0;
break;
static void
lnet_update_ni_status_locked(void)
{
- lnet_ni_t *ni;
- time64_t now;
+ lnet_ni_t *ni = NULL;
+ time64_t now;
int timeout;
LASSERT(the_lnet.ln_routing);
MAX(live_router_check_interval, dead_router_check_interval);
now = ktime_get_real_seconds();
- list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
- if (ni->ni_lnd->lnd_type == LOLND)
+ while ((ni = lnet_get_next_ni_locked(NULL, ni))) {
+ if (ni->ni_net->net_lnd->lnd_type == LOLND)
continue;
if (now < ni->ni_last_alive + timeout)
LASSERT(LNetHandleIsInvalid(rcd->rcd_mdh));
if (rcd->rcd_gateway != NULL) {
- int cpt = rcd->rcd_gateway->lp_cpt;
+ int cpt = rcd->rcd_gateway->lpni_cpt;
lnet_net_lock(cpt);
- lnet_peer_decref_locked(rcd->rcd_gateway);
+ lnet_peer_ni_decref_locked(rcd->rcd_gateway);
lnet_net_unlock(cpt);
}
}
static lnet_rc_data_t *
-lnet_create_rc_data_locked(lnet_peer_t *gateway)
+lnet_create_rc_data_locked(struct lnet_peer_ni *gateway)
{
lnet_rc_data_t *rcd = NULL;
struct lnet_ping_info *pi;
int rc;
int i;
- lnet_net_unlock(gateway->lp_cpt);
+ lnet_net_unlock(gateway->lpni_cpt);
LIBCFS_ALLOC(rcd, sizeof(*rcd));
if (rcd == NULL)
}
LASSERT(rc == 0);
- lnet_net_lock(gateway->lp_cpt);
+ lnet_net_lock(gateway->lpni_cpt);
/* router table changed or someone has created rcd for this gateway */
- if (!lnet_isrouter(gateway) || gateway->lp_rcd != NULL) {
- lnet_net_unlock(gateway->lp_cpt);
+ if (!lnet_isrouter(gateway) || gateway->lpni_rcd != NULL) {
+ lnet_net_unlock(gateway->lpni_cpt);
goto out;
}
- lnet_peer_addref_locked(gateway);
+ lnet_peer_ni_addref_locked(gateway);
rcd->rcd_gateway = gateway;
- gateway->lp_rcd = rcd;
- gateway->lp_ping_notsent = 0;
+ gateway->lpni_rcd = rcd;
+ gateway->lpni_ping_notsent = 0;
return rcd;
- out:
+out:
if (rcd != NULL) {
if (!LNetHandleIsInvalid(rcd->rcd_mdh)) {
rc = LNetMDUnlink(rcd->rcd_mdh);
lnet_destroy_rc_data(rcd);
}
- lnet_net_lock(gateway->lp_cpt);
- return gateway->lp_rcd;
+ lnet_net_lock(gateway->lpni_cpt);
+ return gateway->lpni_rcd;
}
static int
-lnet_router_check_interval (lnet_peer_t *rtr)
+lnet_router_check_interval (struct lnet_peer_ni *rtr)
{
int secs;
- secs = rtr->lp_alive ? live_router_check_interval :
+ secs = rtr->lpni_alive ? live_router_check_interval :
dead_router_check_interval;
if (secs < 0)
secs = 0;
}
static void
-lnet_ping_router_locked (lnet_peer_t *rtr)
+lnet_ping_router_locked (struct lnet_peer_ni *rtr)
{
lnet_rc_data_t *rcd = NULL;
- cfs_time_t now = cfs_time_current();
- int secs;
+ cfs_time_t now = cfs_time_current();
+ int secs;
+ struct lnet_ni *ni;
- lnet_peer_addref_locked(rtr);
+ lnet_peer_ni_addref_locked(rtr);
- if (rtr->lp_ping_deadline != 0 && /* ping timed out? */
- cfs_time_after(now, rtr->lp_ping_deadline))
+ if (rtr->lpni_ping_deadline != 0 && /* ping timed out? */
+ cfs_time_after(now, rtr->lpni_ping_deadline))
lnet_notify_locked(rtr, 1, 0, now);
/* Run any outstanding notifications */
- lnet_ni_notify_locked(rtr->lp_ni, rtr);
+ ni = lnet_get_next_ni_locked(rtr->lpni_net, NULL);
+ lnet_ni_notify_locked(ni, rtr);
if (!lnet_isrouter(rtr) ||
the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING) {
/* router table changed or router checker is shutting down */
- lnet_peer_decref_locked(rtr);
+ lnet_peer_ni_decref_locked(rtr);
return;
}
- rcd = rtr->lp_rcd != NULL ?
- rtr->lp_rcd : lnet_create_rc_data_locked(rtr);
+ rcd = rtr->lpni_rcd != NULL ?
+ rtr->lpni_rcd : lnet_create_rc_data_locked(rtr);
if (rcd == NULL)
return;
CDEBUG(D_NET,
"rtr %s %d: deadline %lu ping_notsent %d alive %d "
- "alive_count %d lp_ping_timestamp %lu\n",
- libcfs_nid2str(rtr->lp_nid), secs,
- rtr->lp_ping_deadline, rtr->lp_ping_notsent,
- rtr->lp_alive, rtr->lp_alive_count, rtr->lp_ping_timestamp);
+ "alive_count %d lpni_ping_timestamp %lu\n",
+ libcfs_nid2str(rtr->lpni_nid), secs,
+ rtr->lpni_ping_deadline, rtr->lpni_ping_notsent,
+ rtr->lpni_alive, rtr->lpni_alive_count, rtr->lpni_ping_timestamp);
- if (secs != 0 && !rtr->lp_ping_notsent &&
- cfs_time_after(now, cfs_time_add(rtr->lp_ping_timestamp,
+ if (secs != 0 && !rtr->lpni_ping_notsent &&
+ cfs_time_after(now, cfs_time_add(rtr->lpni_ping_timestamp,
cfs_time_seconds(secs)))) {
- int rc;
+ int rc;
lnet_process_id_t id;
lnet_handle_md_t mdh;
- id.nid = rtr->lp_nid;
+ id.nid = rtr->lpni_nid;
id.pid = LNET_PID_LUSTRE;
CDEBUG(D_NET, "Check: %s\n", libcfs_id2str(id));
- rtr->lp_ping_notsent = 1;
- rtr->lp_ping_timestamp = now;
+ rtr->lpni_ping_notsent = 1;
+ rtr->lpni_ping_timestamp = now;
mdh = rcd->rcd_mdh;
- if (rtr->lp_ping_deadline == 0) {
- rtr->lp_ping_deadline =
+ if (rtr->lpni_ping_deadline == 0) {
+ rtr->lpni_ping_deadline =
cfs_time_shift(router_ping_timeout);
}
- lnet_net_unlock(rtr->lp_cpt);
+ lnet_net_unlock(rtr->lpni_cpt);
rc = LNetGet(LNET_NID_ANY, mdh, id, LNET_RESERVED_PORTAL,
LNET_PROTO_PING_MATCHBITS, 0);
- lnet_net_lock(rtr->lp_cpt);
+ lnet_net_lock(rtr->lpni_cpt);
if (rc != 0)
- rtr->lp_ping_notsent = 0; /* no event pending */
+ rtr->lpni_ping_notsent = 0; /* no event pending */
}
- lnet_peer_decref_locked(rtr);
+ lnet_peer_ni_decref_locked(rtr);
return;
}
{
lnet_rc_data_t *rcd;
lnet_rc_data_t *tmp;
- lnet_peer_t *lp;
+ struct lnet_peer_ni *lp;
struct list_head head;
int i = 2;
if (the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING) {
/* router checker is stopping, prune all */
list_for_each_entry(lp, &the_lnet.ln_routers,
- lp_rtr_list) {
- if (lp->lp_rcd == NULL)
+ lpni_rtr_list) {
+ if (lp->lpni_rcd == NULL)
continue;
- LASSERT(list_empty(&lp->lp_rcd->rcd_list));
- list_add(&lp->lp_rcd->rcd_list,
+ LASSERT(list_empty(&lp->lpni_rcd->rcd_list));
+ list_add(&lp->lpni_rcd->rcd_list,
&the_lnet.ln_rcd_deathrow);
- lp->lp_rcd = NULL;
+ lp->lpni_rcd = NULL;
}
}
static int
lnet_router_checker(void *arg)
{
- lnet_peer_t *rtr;
- struct list_head *entry;
+ struct lnet_peer_ni *rtr;
+ struct list_head *entry;
cfs_block_allsigs();
version = the_lnet.ln_routers_version;
list_for_each(entry, &the_lnet.ln_routers) {
- rtr = list_entry(entry, lnet_peer_t, lp_rtr_list);
+ rtr = list_entry(entry, struct lnet_peer_ni,
+ lpni_rtr_list);
- cpt2 = lnet_cpt_of_nid_locked(rtr->lp_nid);
+ cpt2 = rtr->lpni_cpt;
if (cpt != cpt2) {
lnet_net_unlock(cpt);
cpt = cpt2;
INIT_LIST_HEAD(&tmp);
lnet_net_lock(cpt);
- lnet_drop_routed_msgs_locked(&rbp->rbp_msgs, cpt);
+ list_splice_init(&rbp->rbp_msgs, &tmp);
+ lnet_drop_routed_msgs_locked(&tmp, cpt);
list_splice_init(&rbp->rbp_bufs, &tmp);
rbp->rbp_req_nbuffers = 0;
rbp->rbp_nbuffers = rbp->rbp_credits = 0;
int
lnet_notify(lnet_ni_t *ni, lnet_nid_t nid, int alive, cfs_time_t when)
{
- struct lnet_peer *lp = NULL;
- cfs_time_t now = cfs_time_current();
- int cpt = lnet_cpt_of_nid(nid);
+ struct lnet_peer_ni *lp = NULL;
+ cfs_time_t now = cfs_time_current();
+ int cpt = lnet_cpt_of_nid(nid, ni);
LASSERT (!in_interrupt ());
return -ESHUTDOWN;
}
- lp = lnet_find_peer_locked(the_lnet.ln_peer_tables[cpt], nid);
+ lp = lnet_find_peer_ni_locked(nid);
if (lp == NULL) {
/* nid not found */
lnet_net_unlock(cpt);
return 0;
}
+ /*
+ * It is possible for this function to be called for the same peer
+ * but with different NIs. We want to synchronize the notification
+ * between the different calls. So we will use the lpni_cpt to
+ * grab the net lock.
+ */
+ if (lp->lpni_cpt != cpt) {
+ lnet_net_unlock(cpt);
+ cpt = lp->lpni_cpt;
+ lnet_net_lock(cpt);
+ }
+
/* We can't fully trust LND on reporting exact peer last_alive
* if he notifies us about dead peer. For example ksocklnd can
* call us with when == _time_when_the_node_was_booted_ if
* no connections were successfully established */
- if (ni != NULL && !alive && when < lp->lp_last_alive)
- when = lp->lp_last_alive;
+ if (ni != NULL && !alive && when < lp->lpni_last_alive)
+ when = lp->lpni_last_alive;
lnet_notify_locked(lp, ni == NULL, alive, when);
if (ni != NULL)
lnet_ni_notify_locked(ni, lp);
- lnet_peer_decref_locked(lp);
+ lnet_peer_ni_decref_locked(lp);
lnet_net_unlock(cpt);
return 0;
__u32 net = rnet->lrn_net;
__u32 hops = route->lr_hops;
unsigned int priority = route->lr_priority;
- lnet_nid_t nid = route->lr_gateway->lp_nid;
- int alive = lnet_is_route_alive(route);
+ lnet_nid_t nid = route->lr_gateway->lpni_nid;
+ int alive = lnet_is_route_alive(route);
s += snprintf(s, tmpstr + tmpsiz - s,
"%-8s %4u %8u %7s %s\n",
*ppos = LNET_PROC_POS_MAKE(0, ver, 0, off);
} else {
struct list_head *r;
- struct lnet_peer *peer = NULL;
+ struct lnet_peer_ni *peer = NULL;
int skip = off - 1;
lnet_net_lock(0);
r = the_lnet.ln_routers.next;
while (r != &the_lnet.ln_routers) {
- lnet_peer_t *lp = list_entry(r, lnet_peer_t,
- lp_rtr_list);
+ struct lnet_peer_ni *lp =
+ list_entry(r, struct lnet_peer_ni,
+ lpni_rtr_list);
if (skip == 0) {
peer = lp;
}
if (peer != NULL) {
- lnet_nid_t nid = peer->lp_nid;
+ lnet_nid_t nid = peer->lpni_nid;
cfs_time_t now = cfs_time_current();
- cfs_time_t deadline = peer->lp_ping_deadline;
- int nrefs = peer->lp_refcount;
- int nrtrrefs = peer->lp_rtr_refcount;
- int alive_cnt = peer->lp_alive_count;
- int alive = peer->lp_alive;
- int pingsent = !peer->lp_ping_notsent;
+ cfs_time_t deadline = peer->lpni_ping_deadline;
+ int nrefs = atomic_read(&peer->lpni_refcount);
+ int nrtrrefs = peer->lpni_rtr_refcount;
+ int alive_cnt = peer->lpni_alive_count;
+ int alive = peer->lpni_alive;
+ int pingsent = !peer->lpni_ping_notsent;
int last_ping = cfs_duration_sec(cfs_time_sub(now,
- peer->lp_ping_timestamp));
+ peer->lpni_ping_timestamp));
int down_ni = 0;
lnet_route_t *rtr;
- if ((peer->lp_ping_feats &
+ if ((peer->lpni_ping_feats &
LNET_PING_FEAT_NI_STATUS) != 0) {
- list_for_each_entry(rtr, &peer->lp_routes,
+ list_for_each_entry(rtr, &peer->lpni_routes,
lr_gwlist) {
/* downis on any route should be the
* number of downis on the gateway */
return rc;
}
+/* TODO: there should be no direct access to ptable. We should add a set
+ * of APIs that give access to the ptable and its members */
static int
proc_lnet_peers(struct ctl_table *table, int write, void __user *buffer,
size_t *lenp, loff_t *ppos)
hoff++;
} else {
- struct lnet_peer *peer;
+ struct lnet_peer_ni *peer;
struct list_head *p;
int skip;
again:
p = ptable->pt_hash[hash].next;
while (p != &ptable->pt_hash[hash]) {
- lnet_peer_t *lp = list_entry(p, lnet_peer_t,
- lp_hashlist);
+ struct lnet_peer_ni *lp =
+ list_entry(p, struct lnet_peer_ni,
+ lpni_hashlist);
if (skip == 0) {
peer = lp;
/* minor optimization: start from idx+1
* on next iteration if we've just
- * drained lp_hashlist */
- if (lp->lp_hashlist.next ==
+ * drained lpni_hashlist */
+ if (lp->lpni_hashlist.next ==
&ptable->pt_hash[hash]) {
hoff = 1;
hash++;
}
skip--;
- p = lp->lp_hashlist.next;
+ p = lp->lpni_hashlist.next;
}
if (peer != NULL)
p = NULL;
hoff = 1;
hash++;
- }
+ }
if (peer != NULL) {
- lnet_nid_t nid = peer->lp_nid;
- int nrefs = peer->lp_refcount;
- int lastalive = -1;
- char *aliveness = "NA";
- int maxcr = peer->lp_ni->ni_peertxcredits;
- int txcr = peer->lp_txcredits;
- int mintxcr = peer->lp_mintxcredits;
- int rtrcr = peer->lp_rtrcredits;
- int minrtrcr = peer->lp_minrtrcredits;
- int txqnob = peer->lp_txqnob;
+ lnet_nid_t nid = peer->lpni_nid;
+ int nrefs = atomic_read(&peer->lpni_refcount);
+ int lastalive = -1;
+ char *aliveness = "NA";
+ int maxcr = (peer->lpni_net) ?
+ peer->lpni_net->net_tunables.lct_peer_tx_credits : 0;
+ int txcr = peer->lpni_txcredits;
+ int mintxcr = peer->lpni_mintxcredits;
+ int rtrcr = peer->lpni_rtrcredits;
+ int minrtrcr = peer->lpni_minrtrcredits;
+ int txqnob = peer->lpni_txqnob;
if (lnet_isrouter(peer) ||
lnet_peer_aliveness_enabled(peer))
- aliveness = peer->lp_alive ? "up" : "down";
+ aliveness = peer->lpni_alive ? "up" : "down";
if (lnet_peer_aliveness_enabled(peer)) {
- cfs_time_t now = cfs_time_current();
+ cfs_time_t now = cfs_time_current();
cfs_duration_t delta;
- delta = cfs_time_sub(now, peer->lp_last_alive);
+ delta = cfs_time_sub(now, peer->lpni_last_alive);
lastalive = cfs_duration_sec(delta);
/* No need to mess up peers contents with
"%-24s %6s %5s %4s %4s %4s %5s %5s %5s\n",
"nid", "status", "alive", "refs", "peer",
"rtr", "max", "tx", "min");
- LASSERT(tmpstr + tmpsiz - s > 0);
+ LASSERT (tmpstr + tmpsiz - s > 0);
} else {
- struct list_head *n;
- lnet_ni_t *ni = NULL;
- int skip = *ppos - 1;
+ lnet_ni_t *ni = NULL;
+ int skip = *ppos - 1;
lnet_net_lock(0);
- n = the_lnet.ln_nis.next;
-
- while (n != &the_lnet.ln_nis) {
- lnet_ni_t *a_ni = list_entry(n, lnet_ni_t, ni_list);
-
- if (skip == 0) {
- ni = a_ni;
- break;
- }
-
- skip--;
- n = n->next;
- }
+ ni = lnet_get_ni_idx_locked(skip);
if (ni != NULL) {
struct lnet_tx_queue *tq;
last_alive = now - ni->ni_last_alive;
/* @lo forever alive */
- if (ni->ni_lnd->lnd_type == LOLND)
+ if (ni->ni_net->net_lnd->lnd_type == LOLND)
last_alive = 0;
lnet_ni_lock(ni);
"%-24s %6s %5d %4d %4d %4d %5d %5d %5d\n",
libcfs_nid2str(ni->ni_nid), stat,
last_alive, *ni->ni_refs[i],
- ni->ni_peertxcredits,
- ni->ni_peerrtrcredits,
+ ni->ni_net->net_tunables.lct_peer_tx_credits,
+ ni->ni_net->net_tunables.lct_peer_rtr_credits,
tq->tq_credits_max,
tq->tq_credits, tq->tq_credits_min);
if (i != 0)
return -EINVAL;
list_for_each_entry(tsu, &tsi->tsi_units, tsu_list) {
- bulk = srpc_alloc_bulk(lnet_cpt_of_nid(tsu->tsu_dest.nid),
+ bulk = srpc_alloc_bulk(lnet_cpt_of_nid(tsu->tsu_dest.nid, NULL),
off, npg, len, opc == LST_BRW_READ);
if (bulk == NULL) {
brw_client_fini(tsi);
wi = &tsu->tsu_worker;
swi_init_workitem(wi, tsu, sfw_run_test,
lst_sched_test[\
- lnet_cpt_of_nid(tsu->tsu_dest.nid)]);
+ lnet_cpt_of_nid(tsu->tsu_dest.nid,
+ NULL)]);
swi_schedule_workitem(wi);
}
}
INIT_LIST_HEAD(&rpc->crpc_list);
swi_init_workitem(&rpc->crpc_wi, rpc, srpc_send_rpc,
- lst_sched_test[lnet_cpt_of_nid(peer.nid)]);
+ lst_sched_test[lnet_cpt_of_nid(peer.nid, NULL)]);
spin_lock_init(&rpc->crpc_lock);
atomic_set(&rpc->crpc_refcount, 1); /* 1 ref for caller */
[YAML_SCALAR_TOKEN] = yaml_scalar,
};
+/* dispatch table */
+static char *token_type_string[] = {
+ [YAML_NO_TOKEN] = "YAML_NO_TOKEN",
+ [YAML_STREAM_START_TOKEN] = "YAML_STREAM_START_TOKEN",
+ [YAML_STREAM_END_TOKEN] = "YAML_STREAM_END_TOKEN",
+ [YAML_VERSION_DIRECTIVE_TOKEN] = "YAML_VERSION_DIRECTIVE_TOKEN",
+ [YAML_TAG_DIRECTIVE_TOKEN] = "YAML_TAG_DIRECTIVE_TOKEN",
+ [YAML_DOCUMENT_START_TOKEN] = "YAML_DOCUMENT_START_TOKEN",
+ [YAML_DOCUMENT_END_TOKEN] = "YAML_DOCUMENT_END_TOKEN",
+ [YAML_BLOCK_SEQUENCE_START_TOKEN] = "YAML_BLOCK_SEQUENCE_START_TOKEN",
+ [YAML_BLOCK_MAPPING_START_TOKEN] = "YAML_BLOCK_MAPPING_START_TOKEN",
+ [YAML_BLOCK_END_TOKEN] = "YAML_BLOCK_END_TOKEN",
+ [YAML_FLOW_SEQUENCE_START_TOKEN] = "YAML_FLOW_SEQUENCE_START_TOKEN",
+ [YAML_FLOW_SEQUENCE_END_TOKEN] = "YAML_FLOW_SEQUENCE_END_TOKEN",
+ [YAML_FLOW_MAPPING_START_TOKEN] = "YAML_FLOW_MAPPING_START_TOKEN",
+ [YAML_FLOW_MAPPING_END_TOKEN] = "YAML_FLOW_MAPPING_END_TOKEN",
+ [YAML_BLOCK_ENTRY_TOKEN] = "YAML_BLOCK_ENTRY_TOKEN",
+ [YAML_FLOW_ENTRY_TOKEN] = "YAML_FLOW_ENTRY_TOKEN",
+ [YAML_KEY_TOKEN] = "YAML_KEY_TOKEN",
+ [YAML_VALUE_TOKEN] = "YAML_VALUE_TOKEN",
+ [YAML_ALIAS_TOKEN] = "YAML_ALIAS_TOKEN",
+ [YAML_ANCHOR_TOKEN] = "YAML_ANCHOR_TOKEN",
+ [YAML_TAG_TOKEN] = "YAML_TAG_TOKEN",
+ [YAML_SCALAR_TOKEN] = "YAML_SCALAR_TOKEN",
+};
+
static void cYAML_ll_free(struct list_head *ll)
{
struct cYAML_ll *node, *tmp;
struct cYAML *cYAML_get_object_item(struct cYAML *parent, const char *name)
{
- struct cYAML *node;
+ struct cYAML *node = parent, *found = NULL;
- if (parent == NULL || parent->cy_child == NULL || name == NULL)
+ if (!node || !name)
return NULL;
- node = parent->cy_child;
-
- while (node != NULL &&
- strcmp(node->cy_string, name) != 0) {
- node = node->cy_next;
+ if (node->cy_string) {
+ if (strcmp(node->cy_string, name) == 0)
+ return node;
}
- return node;
+ if (node->cy_child)
+ found = cYAML_get_object_item(node->cy_child, name);
+
+ if (!found && node->cy_next)
+ found = cYAML_get_object_item(node->cy_next, name);
+
+ return found;
}
struct cYAML *cYAML_get_next_seq_item(struct cYAML *seq, struct cYAML **itm)
struct cYAML *cYAML_build_tree(char *yaml_file,
const char *yaml_blk,
size_t yaml_blk_size,
- struct cYAML **err_rc)
+ struct cYAML **err_rc,
+ bool debug)
{
yaml_parser_t parser;
yaml_token_t token;
*/
yaml_parser_scan(&parser, &token);
+ if (debug)
+ fprintf(stderr, "token.type = %s: %s\n",
+ token_type_string[token.type],
+ (token.type == YAML_SCALAR_TOKEN) ?
+ (char*)token.data.scalar.value : "");
rc = dispatch_tbl[token.type](&token, &tree);
if (rc != CYAML_ERROR_NONE) {
snprintf(err_str, sizeof(err_str),
*/
struct cYAML *cYAML_build_tree(char *yaml_file, const char *yaml_blk,
size_t yaml_blk_size,
- struct cYAML **err_str);
+ struct cYAML **err_str, bool debug);
/*
* cYAML_print_tree
liblnetconfig_lnd.c liblnd.h $(CYAML)
liblnetconfig_la_CPPFLAGS = -D_LARGEFILE64_SOURCE=1 -D_FILE_OFFSET_BITS=64 \
-DLUSTRE_UTILS=1 -I$(top_builddir)/lnet/utils/cyaml
-liblnetconfig_la_LDFLAGS = -L$(top_builddir)/libcfs/libcfs -version-info 1:1:0
+liblnetconfig_la_LDFLAGS = -L$(top_builddir)/libcfs/libcfs -version-info 2:0:0
EXTRA_DIST =
#include "cyaml.h"
int
-lustre_interface_show_net(struct cYAML *interfaces, unsigned int index,
- bool detail, struct lnet_ioctl_config_data *data,
- struct lnet_ioctl_net_config *net_config);
+lustre_net_show_tunables(struct cYAML *tunables,
+ struct lnet_ioctl_config_lnd_cmn_tunables *cmn);
+
+int
+lustre_ni_show_tunables(struct cYAML *lnd_tunables,
+ __u32 net_type,
+ struct lnet_lnd_tunables *lnd);
void
-lustre_interface_parse(struct cYAML *lndparams, const char *dev_name,
- struct lnet_ioctl_config_lnd_tunables *lnd_cfg);
+lustre_yaml_extract_lnd_tunables(struct cYAML *tree,
+ __u32 net_type,
+ struct lnet_lnd_tunables *tun);
#endif /* LIB_LND_CONFIG_API_H */
#include <errno.h>
#include <limits.h>
+#include <byteswap.h>
#include <netdb.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
+#include <net/if.h>
#include <libcfs/util/ioctl.h>
#include <lnet/lnetctl.h>
#include <lnet/socklnd.h>
#include "liblnd.h"
+#include <lnet/lnet.h>
+#include <sys/types.h>
+#include <ifaddrs.h>
#include "liblnetconfig.h"
#include "cyaml.h"
#define ADD_CMD "add"
#define DEL_CMD "del"
#define SHOW_CMD "show"
+#define DBG_CMD "dbg"
+
+/*
+ * lustre_lnet_ip_range_descr
+ * Describes an IP range.
+ * Each octect is an expression
+ */
+struct lustre_lnet_ip_range_descr {
+ struct list_head ipr_entry;
+ struct list_head ipr_expr;
+};
+
+/*
+ * lustre_lnet_ip2nets
+ * Describes an ip2nets rule. This can be on a list of rules.
+ */
+struct lustre_lnet_ip2nets {
+ struct lnet_dlc_network_descr ip2nets_net;
+ struct list_head ip2nets_ip_ranges;
+};
+
+/*
+ * free_intf_descr
+ * frees the memory allocated for an intf descriptor.
+ */
+void free_intf_descr(struct lnet_dlc_intf_descr *intf_descr)
+{
+ if (!intf_descr)
+ return;
+
+ if (intf_descr->cpt_expr != NULL)
+ cfs_expr_list_free(intf_descr->cpt_expr);
+ free(intf_descr);
+}
+
+/*
+ * lustre_lnet_add_ip_range
+ * Formatting:
+ * given a string of the format:
+ * <expr.expr.expr.expr> parse each expr into
+ * a lustre_lnet_ip_range_descr structure and insert on the list.
+ *
+ * This function is called from
+ * YAML on each ip-range.
+ * As a result of lnetctl command
+ * When building a NID or P2P selection rules
+ */
+int lustre_lnet_add_ip_range(struct list_head *list, char *str_ip_range)
+{
+ struct lustre_lnet_ip_range_descr *ip_range;
+ int rc;
+
+ ip_range = calloc(1, sizeof(*ip_range));
+ if (ip_range == NULL)
+ return LUSTRE_CFG_RC_OUT_OF_MEM;
+
+ INIT_LIST_HEAD(&ip_range->ipr_entry);
+ INIT_LIST_HEAD(&ip_range->ipr_expr);
+
+ rc = cfs_ip_addr_parse(str_ip_range, strlen(str_ip_range),
+ &ip_range->ipr_expr);
+ if (rc != 0)
+ return LUSTRE_CFG_RC_BAD_PARAM;
+
+ list_add_tail(&ip_range->ipr_entry, list);
+
+ return LUSTRE_CFG_RC_NO_ERR;
+}
+
+int lustre_lnet_add_intf_descr(struct list_head *list, char *intf, int len)
+{
+ char *open_sq_bracket = NULL, *close_sq_bracket = NULL,
+ *intf_name;
+ struct lnet_dlc_intf_descr *intf_descr = NULL;
+ int rc;
+ char intf_string[LNET_MAX_STR_LEN];
+
+ if (len >= LNET_MAX_STR_LEN)
+ return LUSTRE_CFG_RC_BAD_PARAM;
+
+ strncpy(intf_string, intf, len);
+ intf_string[len] = '\0';
+
+ intf_descr = calloc(1, sizeof(*intf_descr));
+ if (intf_descr == NULL)
+ return LUSTRE_CFG_RC_OUT_OF_MEM;
+
+ INIT_LIST_HEAD(&intf_descr->intf_on_network);
+
+ intf_name = intf_string;
+ open_sq_bracket = strchr(intf_string, '[');
+ if (open_sq_bracket != NULL) {
+ close_sq_bracket = strchr(intf_string, ']');
+ if (close_sq_bracket == NULL) {
+ free(intf_descr);
+ return LUSTRE_CFG_RC_BAD_PARAM;
+ }
+ rc = cfs_expr_list_parse(open_sq_bracket,
+ strlen(open_sq_bracket), 0, UINT_MAX,
+ &intf_descr->cpt_expr);
+ if (rc < 0) {
+ free(intf_descr);
+ return LUSTRE_CFG_RC_BAD_PARAM;
+ }
+ strncpy(intf_descr->intf_name, intf_name,
+ open_sq_bracket - intf_name);
+ intf_descr->intf_name[open_sq_bracket - intf_name] = '\0';
+ } else {
+ strcpy(intf_descr->intf_name, intf_name);
+ intf_descr->cpt_expr = NULL;
+ }
+
+ list_add_tail(&intf_descr->intf_on_network, list);
+
+ return LUSTRE_CFG_RC_NO_ERR;
+}
+
+void lustre_lnet_init_nw_descr(struct lnet_dlc_network_descr *nw_descr)
+{
+ if (nw_descr != NULL) {
+ INIT_LIST_HEAD(&nw_descr->network_on_rule);
+ INIT_LIST_HEAD(&nw_descr->nw_intflist);
+ }
+}
+
+int lustre_lnet_parse_nids(char *nids, char **array, int size,
+ char ***out_array)
+{
+ int num_nids = 0;
+ char *comma = nids, *cur, *entry;
+ char **new_array;
+ int i, len, start = 0, finish = 0;
+
+ if (nids == NULL || strlen(nids) == 0)
+ return size;
+
+ /* count the number or new nids, by counting the number of commas */
+ while (comma) {
+ comma = strchr(comma, ',');
+ if (comma) {
+ comma++;
+ num_nids++;
+ } else {
+ num_nids++;
+ }
+ }
+
+ /*
+ * if the array is not NULL allocate a large enough array to house
+ * the old and new entries
+ */
+ new_array = calloc(sizeof(char*),
+ (size > 0) ? size + num_nids : num_nids);
+
+ if (!new_array)
+ goto failed;
+
+ /* parse our the new nids and add them to the tail of the array */
+ comma = nids;
+ cur = nids;
+ start = (size > 0) ? size: 0;
+ finish = (size > 0) ? size + num_nids : num_nids;
+ for (i = start; i < finish; i++) {
+ comma = strchr(comma, ',');
+ if (!comma)
+ /*
+ * the length of the string to be parsed out is
+ * from cur to end of string. So it's good enough
+ * to strlen(cur)
+ */
+ len = strlen(cur) + 1;
+ else
+ /* length of the string is comma - cur */
+ len = (comma - cur) + 1;
+
+ entry = calloc(1, len);
+ if (!entry) {
+ finish = i > 0 ? i - 1: 0;
+ goto failed;
+ }
+ strncpy(entry, cur, len - 1);
+ entry[len] = '\0';
+ new_array[i] = entry;
+ if (comma) {
+ comma++;
+ cur = comma;
+ }
+ }
+
+ /* add the old entries in the array and delete the old array*/
+ for (i = 0; i < size; i++)
+ new_array[i] = array[i];
+
+ if (array)
+ free(array);
+
+ *out_array = new_array;
+
+ return finish;
+
+failed:
+ for (i = start; i < finish; i++)
+ free(new_array[i]);
+ if (new_array)
+ free(new_array);
+
+ return size;
+}
+
+/*
+ * format expected:
+ * <intf>[<expr>], <intf>[<expr>],..
+ */
+int lustre_lnet_parse_interfaces(char *intf_str,
+ struct lnet_dlc_network_descr *nw_descr)
+{
+ char *open_square;
+ char *close_square;
+ char *comma;
+ char *cur = intf_str, *next = NULL;
+ char *end = intf_str + strlen(intf_str);
+ int rc, len;
+ struct lnet_dlc_intf_descr *intf_descr, *tmp;
+
+ if (nw_descr == NULL)
+ return LUSTRE_CFG_RC_BAD_PARAM;
+
+ while (cur < end) {
+ open_square = strchr(cur, '[');
+ if (open_square != NULL) {
+ close_square = strchr(cur, ']');
+ if (close_square == NULL) {
+ rc = LUSTRE_CFG_RC_BAD_PARAM;
+ goto failed;
+ }
+
+ comma = strchr(cur, ',');
+ if (comma != NULL && comma > close_square) {
+ next = comma + 1;
+ len = next - close_square;
+ } else {
+ len = strlen(cur);
+ next = cur + len;
+ }
+ } else {
+ comma = strchr(cur, ',');
+ if (comma != NULL) {
+ next = comma + 1;
+ len = comma - cur;
+ } else {
+ len = strlen(cur);
+ next = cur + len;
+ }
+ }
+
+ rc = lustre_lnet_add_intf_descr(&nw_descr->nw_intflist, cur, len);
+ if (rc != LUSTRE_CFG_RC_NO_ERR)
+ goto failed;
+
+ cur = next;
+ }
+
+ return LUSTRE_CFG_RC_NO_ERR;
+
+failed:
+ list_for_each_entry_safe(intf_descr, tmp, &nw_descr->nw_intflist,
+ intf_on_network) {
+ list_del(&intf_descr->intf_on_network);
+ free_intf_descr(intf_descr);
+ }
+
+ return rc;
+}
int lustre_lnet_config_lib_init(void)
{
LNET_DEV_MAJOR, LNET_DEV_MINOR);
}
+void lustre_lnet_config_lib_uninit(void)
+{
+ unregister_ioc_dev(LNET_DEV_ID);
+}
+
int lustre_lnet_config_ni_system(bool up, bool load_ni_from_mod,
int seq_no, struct cYAML **err_rc)
{
return rc;
}
+static lnet_nid_t *allocate_create_nid_array(char **nids, __u32 num_nids,
+ char *err_str)
+{
+ lnet_nid_t *array = NULL;
+ __u32 i;
+
+ if (!nids) {
+ snprintf(err_str, LNET_MAX_STR_LEN, "no NIDs to add");
+ return NULL;
+ }
+
+ array = calloc(sizeof(*array) * num_nids, 1);
+ if (array == NULL) {
+ snprintf(err_str, LNET_MAX_STR_LEN, "out of memory");
+ return NULL;
+ }
+
+ for (i = 0; i < num_nids; i++) {
+ array[i] = libcfs_str2nid(nids[i]);
+ if (array[i] == LNET_NID_ANY) {
+ free(array);
+ snprintf(err_str, LNET_MAX_STR_LEN,
+ "bad NID: '%s'",
+ nids[i]);
+ return NULL;
+ }
+ }
+
+ return array;
+}
+
+static int dispatch_peer_ni_cmd(lnet_nid_t pnid, lnet_nid_t nid, __u32 cmd,
+ struct lnet_ioctl_peer_cfg *data,
+ char *err_str, char *cmd_str)
+{
+ int rc;
+
+ data->prcfg_prim_nid = pnid;
+ data->prcfg_cfg_nid = nid;
+
+ rc = l_ioctl(LNET_DEV_ID, cmd, data);
+ if (rc != 0) {
+ rc = -errno;
+ snprintf(err_str,
+ LNET_MAX_STR_LEN,
+ "\"cannot %s peer ni: %s\"",
+ (cmd_str) ? cmd_str : "add", strerror(errno));
+ }
+
+ return rc;
+}
+
+int lustre_lnet_config_peer_nid(char *pnid, char **nid, int num_nids,
+ bool mr, int seq_no, struct cYAML **err_rc)
+{
+ struct lnet_ioctl_peer_cfg data;
+ lnet_nid_t prim_nid = LNET_NID_ANY;
+ int rc = LUSTRE_CFG_RC_NO_ERR;
+ int idx = 0;
+ bool nid0_used = false;
+ char err_str[LNET_MAX_STR_LEN] = {0};
+ lnet_nid_t *nids = allocate_create_nid_array(nid, num_nids, err_str);
+
+ if (pnid) {
+ prim_nid = libcfs_str2nid(pnid);
+ if (prim_nid == LNET_NID_ANY) {
+ snprintf(err_str, sizeof(err_str),
+ "bad key NID: '%s'",
+ pnid);
+ rc = LUSTRE_CFG_RC_MISSING_PARAM;
+ goto out;
+ }
+ } else if (!nids || nids[0] == LNET_NID_ANY) {
+ snprintf(err_str, sizeof(err_str),
+ "no NIDs provided for configuration");
+ rc = LUSTRE_CFG_RC_MISSING_PARAM;
+ goto out;
+ } else {
+ prim_nid = LNET_NID_ANY;
+ }
+
+ snprintf(err_str, sizeof(err_str), "\"Success\"");
+
+ LIBCFS_IOC_INIT_V2(data, prcfg_hdr);
+ data.prcfg_mr = mr;
+
+ /*
+ * if prim_nid is not specified use the first nid in the list of
+ * nids provided as the prim_nid. NOTE: on entering 'if' we must
+ * have at least 1 NID
+ */
+ if (prim_nid == LNET_NID_ANY) {
+ nid0_used = true;
+ prim_nid = nids[0];
+ }
+
+ /* Create the prim_nid first */
+ rc = dispatch_peer_ni_cmd(prim_nid, LNET_NID_ANY,
+ IOC_LIBCFS_ADD_PEER_NI,
+ &data, err_str, "add");
+
+ if (rc != 0)
+ goto out;
+
+ /* add the rest of the nids to the key nid if any are available */
+ for (idx = nid0_used ? 1 : 0 ; nids && idx < num_nids; idx++) {
+ /*
+ * If prim_nid is not provided then the first nid in the
+ * list becomes the prim_nid. First time round the loop use
+ * LNET_NID_ANY for the first parameter, then use nid[0]
+ * as the key nid after wards
+ */
+ rc = dispatch_peer_ni_cmd(prim_nid, nids[idx],
+ IOC_LIBCFS_ADD_PEER_NI, &data,
+ err_str, "add");
+
+ if (rc != 0)
+ goto out;
+ }
+
+out:
+ if (nids != NULL)
+ free(nids);
+ cYAML_build_error(rc, seq_no, ADD_CMD, "peer_ni", err_str, err_rc);
+ return rc;
+}
+
+int lustre_lnet_del_peer_nid(char *pnid, char **nid, int num_nids,
+ int seq_no, struct cYAML **err_rc)
+{
+ struct lnet_ioctl_peer_cfg data;
+ lnet_nid_t prim_nid;
+ int rc = LUSTRE_CFG_RC_NO_ERR;
+ int idx = 0;
+ char err_str[LNET_MAX_STR_LEN] = {0};
+ lnet_nid_t *nids = allocate_create_nid_array(nid, num_nids, err_str);
+
+ if (pnid == NULL) {
+ snprintf(err_str, sizeof(err_str),
+ "\"Primary nid is not provided\"");
+ rc = LUSTRE_CFG_RC_MISSING_PARAM;
+ goto out;
+ } else {
+ prim_nid = libcfs_str2nid(pnid);
+ if (prim_nid == LNET_NID_ANY) {
+ rc = LUSTRE_CFG_RC_BAD_PARAM;
+ snprintf(err_str, sizeof(err_str),
+ "bad key NID: '%s'",
+ pnid);
+ goto out;
+ }
+ }
+
+ snprintf(err_str, sizeof(err_str), "\"Success\"");
+
+ LIBCFS_IOC_INIT_V2(data, prcfg_hdr);
+ if (!nids || nids[0] == LNET_NID_ANY) {
+ rc = dispatch_peer_ni_cmd(prim_nid, LNET_NID_ANY,
+ IOC_LIBCFS_DEL_PEER_NI,
+ &data, err_str, "del");
+ goto out;
+ }
+
+ for (idx = 0; nids && idx < num_nids; idx++) {
+ rc = dispatch_peer_ni_cmd(prim_nid, nids[idx],
+ IOC_LIBCFS_DEL_PEER_NI, &data,
+ err_str, "del");
+
+ if (rc != 0)
+ goto out;
+ }
+
+out:
+ if (nids != NULL)
+ free(nids);
+ cYAML_build_error(rc, seq_no, DEL_CMD, "peer_ni", err_str, err_rc);
+ return rc;
+}
+
int lustre_lnet_config_route(char *nw, char *gw, int hops, int prio,
int seq_no, struct cYAML **err_rc)
{
return rc;
}
-int lustre_lnet_config_net(char *net, char *intf, char *ip2net,
- int peer_to, int peer_cr, int peer_buf_cr,
- int credits, char *smp, int seq_no,
- struct lnet_ioctl_config_lnd_tunables *lnd_tunables,
- struct cYAML **err_rc)
+static int socket_intf_query(int request, char *intf,
+ struct ifreq *ifr)
{
- struct lnet_ioctl_config_lnd_tunables *lnd = NULL;
- struct lnet_ioctl_config_data *data;
- size_t ioctl_size = sizeof(*data);
- char buf[LNET_MAX_STR_LEN];
- int rc = LUSTRE_CFG_RC_NO_ERR;
- char err_str[LNET_MAX_STR_LEN];
+ int rc;
+ int sockfd;
- snprintf(err_str, sizeof(err_str), "\"success\"");
+ if (strlen(intf) >= IFNAMSIZ || ifr == NULL)
+ return LUSTRE_CFG_RC_BAD_PARAM;
- /* No need to register lo */
- if (net != NULL && !strcmp(net, "lo"))
- return 0;
+ sockfd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sockfd < 0)
+ return LUSTRE_CFG_RC_BAD_PARAM;
- if (ip2net == NULL && (intf == NULL || net == NULL)) {
- snprintf(err_str,
- sizeof(err_str),
- "\"mandatory parameter '%s' not specified."
- " Optionally specify ip2net parameter\"",
- (intf == NULL && net == NULL) ? "net, if" :
- (intf == NULL) ? "if" : "net");
- rc = LUSTRE_CFG_RC_MISSING_PARAM;
- goto out;
- }
+ strcpy(ifr->ifr_name, intf);
+ rc = ioctl(sockfd, request, ifr);
+ if (rc != 0)
+ return LUSTRE_CFG_RC_BAD_PARAM;
- if (peer_to != -1 && peer_to <= 0) {
- snprintf(err_str,
- sizeof(err_str),
- "\"peer timeout %d, must be greater than 0\"",
- peer_to);
- rc = LUSTRE_CFG_RC_OUT_OF_RANGE_PARAM;
- goto out;
- }
+ return 0;
+}
- if (ip2net != NULL && strlen(ip2net) >= sizeof(buf)) {
- snprintf(err_str,
- sizeof(err_str),
- "\"ip2net string too long %d\"",
- (int)strlen(ip2net));
- rc = LUSTRE_CFG_RC_OUT_OF_RANGE_PARAM;
- goto out;
- }
+/*
+ * for each interface in the array of interfaces find the IP address of
+ * that interface, create its nid and add it to an array of NIDs.
+ * Stop if any of the interfaces is down
+ */
+static int lustre_lnet_intf2nids(struct lnet_dlc_network_descr *nw,
+ lnet_nid_t **nids, __u32 *nnids)
+{
+ int i = 0, count = 0, rc;
+ struct ifreq ifr;
+ __u32 ip;
+ struct lnet_dlc_intf_descr *intf;
- if (lnd_tunables != NULL)
- ioctl_size += sizeof(*lnd_tunables);
+ if (nw == NULL || nids == NULL)
+ return LUSTRE_CFG_RC_BAD_PARAM;
- data = calloc(1, ioctl_size);
- if (data == NULL)
- goto out;
+ list_for_each_entry(intf, &nw->nw_intflist, intf_on_network)
+ count++;
- if (ip2net == NULL)
- snprintf(buf, sizeof(buf) - 1, "%s(%s)%s",
- net, intf,
- (smp) ? smp : "");
+ *nids = calloc(count, sizeof(lnet_nid_t));
+ if (*nids == NULL)
+ return LUSTRE_CFG_RC_OUT_OF_MEM;
- LIBCFS_IOC_INIT_V2(*data, cfg_hdr);
- strncpy(data->cfg_config_u.cfg_net.net_intf,
- (ip2net != NULL) ? ip2net : buf, sizeof(buf));
- data->cfg_config_u.cfg_net.net_peer_timeout = peer_to;
- data->cfg_config_u.cfg_net.net_peer_tx_credits = peer_cr;
- data->cfg_config_u.cfg_net.net_peer_rtr_credits = peer_buf_cr;
- data->cfg_config_u.cfg_net.net_max_tx_credits = credits;
- /* Add in tunable settings if available */
- if (lnd_tunables != NULL) {
- lnd = (struct lnet_ioctl_config_lnd_tunables *)data->cfg_bulk;
+ list_for_each_entry(intf, &nw->nw_intflist, intf_on_network) {
+ memset(&ifr, 0, sizeof(ifr));
+ rc = socket_intf_query(SIOCGIFFLAGS, intf->intf_name, &ifr);
+ if (rc != 0)
+ goto failed;
- data->cfg_hdr.ioc_len = ioctl_size;
- memcpy(lnd, lnd_tunables, sizeof(*lnd_tunables));
- }
+ if ((ifr.ifr_flags & IFF_UP) == 0) {
+ rc = LUSTRE_CFG_RC_BAD_PARAM;
+ goto failed;
+ }
- rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_ADD_NET, data);
- if (rc < 0) {
- rc = -errno;
- snprintf(err_str,
- sizeof(err_str),
- "\"cannot add network: %s\"", strerror(errno));
+ memset(&ifr, 0, sizeof(ifr));
+ rc = socket_intf_query(SIOCGIFADDR, intf->intf_name, &ifr);
+ if (rc != 0)
+ goto failed;
+
+ ip = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr;
+ ip = bswap_32(ip);
+ (*nids)[i] = LNET_MKNID(nw->nw_id, ip);
+ i++;
}
- free(data);
-out:
- cYAML_build_error(rc, seq_no, ADD_CMD, "net", err_str, err_rc);
+ *nnids = count;
+
+ return 0;
+failed:
+ free(*nids);
+ *nids = NULL;
return rc;
}
-int lustre_lnet_del_net(char *nw, int seq_no, struct cYAML **err_rc)
+/*
+ * called repeatedly until a match or no more ip range
+ * What do you have?
+ * ip_range expression
+ * interface list with all the interface names.
+ * all the interfaces in the system.
+ *
+ * try to match the ip_range expr to one of the interfaces' IPs in
+ * the system. If we hit a patch for an interface. Check if that
+ * interface name is in the list.
+ *
+ * If there are more than one interface in the list, then make sure
+ * that the IPs for all of these interfaces match the ip ranges
+ * given.
+ *
+ * for each interface in intf_list
+ * look up the intf name in ifa
+ * if not there then no match
+ * check ip obtained from ifa against a match to any of the
+ * ip_ranges given.
+ * If no match, then fail
+ *
+ * The result is that all the interfaces have to match.
+ */
+int lustre_lnet_match_ip_to_intf(struct ifaddrs *ifa,
+ struct list_head *intf_list,
+ struct list_head *ip_ranges)
{
- struct lnet_ioctl_config_data data;
- __u32 net = LNET_NIDNET(LNET_NID_ANY);
- int rc = LUSTRE_CFG_RC_NO_ERR;
- char err_str[LNET_MAX_STR_LEN];
+ int rc;
+ __u32 ip;
+ struct lnet_dlc_intf_descr *intf_descr, *tmp;
+ struct ifaddrs *ifaddr = ifa;
+ struct lustre_lnet_ip_range_descr *ip_range;
+ int family;
+
+ /*
+ * if there are no explicit interfaces, and no ip ranges, then
+ * configure the first tcp interface we encounter.
+ */
+ if (list_empty(intf_list) && list_empty(ip_ranges)) {
+ for (ifaddr = ifa; ifaddr != NULL; ifaddr = ifaddr->ifa_next) {
+ if (ifaddr->ifa_addr == NULL)
+ continue;
+
+ if ((ifaddr->ifa_flags & IFF_UP) == 0)
+ continue;
+
+ family = ifaddr->ifa_addr->sa_family;
+ if (family == AF_INET &&
+ strcmp(ifaddr->ifa_name, "lo") != 0) {
+ rc = lustre_lnet_add_intf_descr
+ (intf_list, ifaddr->ifa_name,
+ strlen(ifaddr->ifa_name));
- snprintf(err_str, sizeof(err_str), "\"success\"");
+ if (rc != LUSTRE_CFG_RC_NO_ERR)
+ return rc;
- if (nw == NULL) {
- snprintf(err_str,
- sizeof(err_str),
- "\"missing mandatory parameter\"");
- rc = LUSTRE_CFG_RC_MISSING_PARAM;
- goto out;
+ return LUSTRE_CFG_RC_MATCH;
+ }
+ }
+ return LUSTRE_CFG_RC_NO_MATCH;
}
- net = libcfs_str2net(nw);
- if (net == LNET_NIDNET(LNET_NID_ANY)) {
- snprintf(err_str,
- sizeof(err_str),
- "\"cannot parse net '%s'\"", nw);
- rc = LUSTRE_CFG_RC_BAD_PARAM;
- goto out;
- }
+ /*
+ * First interface which matches an IP pattern will be used
+ */
+ if (list_empty(intf_list)) {
+ /*
+ * no interfaces provided in the rule, but an ip range is
+ * provided, so try and match an interface to the ip
+ * range.
+ */
+ for (ifaddr = ifa; ifaddr != NULL; ifaddr = ifaddr->ifa_next) {
+ if (ifaddr->ifa_addr == NULL)
+ continue;
+
+ if ((ifaddr->ifa_flags & IFF_UP) == 0)
+ continue;
+
+ family = ifaddr->ifa_addr->sa_family;
+ if (family == AF_INET) {
+ ip = ((struct sockaddr_in *)ifaddr->ifa_addr)->
+ sin_addr.s_addr;
+
+ list_for_each_entry(ip_range, ip_ranges,
+ ipr_entry) {
+ rc = cfs_ip_addr_match(bswap_32(ip),
+ &ip_range->ipr_expr);
+ if (!rc)
+ continue;
+
+ rc = lustre_lnet_add_intf_descr
+ (intf_list, ifaddr->ifa_name,
+ strlen(ifaddr->ifa_name));
+
+ if (rc != LUSTRE_CFG_RC_NO_ERR)
+ return rc;
+ }
+ }
+ }
- LIBCFS_IOC_INIT_V2(data, cfg_hdr);
- data.cfg_net = net;
+ if (!list_empty(intf_list))
+ return LUSTRE_CFG_RC_MATCH;
- rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_DEL_NET, &data);
- if (rc != 0) {
- rc = -errno;
- snprintf(err_str,
- sizeof(err_str),
- "\"cannot delete network: %s\"", strerror(errno));
- goto out;
+ return LUSTRE_CFG_RC_NO_MATCH;
}
-out:
- cYAML_build_error(rc, seq_no, DEL_CMD, "net", err_str, err_rc);
-
- return rc;
-}
+ /*
+ * If an interface is explicitly specified the ip-range might or
+ * might not be specified. if specified the interface needs to match the
+ * ip-range. If no ip-range then the interfaces are
+ * automatically matched if they are all up.
+ * If > 1 interfaces all the interfaces must match for the NI to
+ * be configured.
+ */
+ list_for_each_entry_safe(intf_descr, tmp, intf_list, intf_on_network) {
+ for (ifaddr = ifa; ifaddr != NULL; ifaddr = ifaddr->ifa_next) {
+ if (ifaddr->ifa_addr == NULL)
+ continue;
+
+ family = ifaddr->ifa_addr->sa_family;
+ if (family == AF_INET &&
+ strcmp(intf_descr->intf_name,
+ ifaddr->ifa_name) == 0)
+ break;
+ }
+
+ if (ifaddr == NULL) {
+ list_del(&intf_descr->intf_on_network);
+ free_intf_descr(intf_descr);
+ continue;
+ }
+
+ if ((ifaddr->ifa_flags & IFF_UP) == 0) {
+ list_del(&intf_descr->intf_on_network);
+ free_intf_descr(intf_descr);
+ continue;
+ }
+
+ ip = ((struct sockaddr_in *)ifaddr->ifa_addr)->sin_addr.s_addr;
+
+ rc = 1;
+ list_for_each_entry(ip_range, ip_ranges, ipr_entry) {
+ rc = cfs_ip_addr_match(bswap_32(ip), &ip_range->ipr_expr);
+ if (rc)
+ break;
+ }
+
+ if (!rc) {
+ /* no match for this interface */
+ list_del(&intf_descr->intf_on_network);
+ free_intf_descr(intf_descr);
+ }
+ }
+
+ return LUSTRE_CFG_RC_MATCH;
+}
+
+int lustre_lnet_resolve_ip2nets_rule(struct lustre_lnet_ip2nets *ip2nets,
+ lnet_nid_t **nids, __u32 *nnids)
+{
+ struct ifaddrs *ifa;
+ int rc = LUSTRE_CFG_RC_NO_ERR;
+
+ rc = getifaddrs(&ifa);
+ if (rc < 0)
+ return -errno;
+
+ rc = lustre_lnet_match_ip_to_intf(ifa,
+ &ip2nets->ip2nets_net.nw_intflist,
+ &ip2nets->ip2nets_ip_ranges);
+ if (rc != LUSTRE_CFG_RC_MATCH) {
+ freeifaddrs(ifa);
+ return rc;
+ }
+
+ rc = lustre_lnet_intf2nids(&ip2nets->ip2nets_net, nids, nnids);
+ if (rc != LUSTRE_CFG_RC_NO_ERR) {
+ *nids = NULL;
+ *nnids = 0;
+ }
+
+ freeifaddrs(ifa);
+
+ return rc;
+}
+
+static int
+lustre_lnet_ioctl_config_ni(struct list_head *intf_list,
+ struct lnet_ioctl_config_lnd_tunables *tunables,
+ struct cfs_expr_list *global_cpts,
+ lnet_nid_t *nids, char *err_str)
+{
+ char *data;
+ struct lnet_ioctl_config_ni *conf;
+ struct lnet_ioctl_config_lnd_tunables *tun = NULL;
+ int rc = LUSTRE_CFG_RC_NO_ERR, i = 0;
+ size_t len;
+ int count;
+ struct lnet_dlc_intf_descr *intf_descr;
+ __u32 *cpt_array;
+ struct cfs_expr_list *cpt_expr;
+
+ list_for_each_entry(intf_descr, intf_list,
+ intf_on_network) {
+ if (i == 0 && tunables != NULL)
+ len = sizeof(struct lnet_ioctl_config_ni) +
+ sizeof(struct lnet_ioctl_config_lnd_tunables);
+ else
+ len = sizeof(struct lnet_ioctl_config_ni);
+
+ data = calloc(1, len);
+ conf = (struct lnet_ioctl_config_ni*) data;
+ if (i == 0 && tunables != NULL)
+ tun = (struct lnet_ioctl_config_lnd_tunables*)
+ conf->lic_bulk;
+
+ LIBCFS_IOC_INIT_V2(*conf, lic_cfg_hdr);
+ conf->lic_cfg_hdr.ioc_len = len;
+ conf->lic_nid = nids[i];
+ strncpy(conf->lic_ni_intf[0], intf_descr->intf_name,
+ LNET_MAX_STR_LEN);
+
+ if (intf_descr->cpt_expr != NULL)
+ cpt_expr = intf_descr->cpt_expr;
+ else if (global_cpts != NULL)
+ cpt_expr = global_cpts;
+ else
+ cpt_expr = NULL;
+
+ if (cpt_expr != NULL) {
+ count = cfs_expr_list_values(cpt_expr,
+ LNET_MAX_SHOW_NUM_CPT,
+ &cpt_array);
+ if (count > 0) {
+ memcpy(conf->lic_cpts, cpt_array,
+ sizeof(cpt_array[0]) * LNET_MAX_STR_LEN);
+ free(cpt_array);
+ } else {
+ count = 0;
+ }
+ } else {
+ count = 0;
+ }
+
+ conf->lic_ncpts = count;
+
+ if (i == 0 && tunables != NULL)
+ /* TODO put in the LND tunables */
+ memcpy(tun, tunables, sizeof(*tunables));
+
+ rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_ADD_LOCAL_NI, data);
+ if (rc < 0) {
+ rc = -errno;
+ snprintf(err_str,
+ LNET_MAX_STR_LEN,
+ "\"cannot add network: %s\"", strerror(errno));
+ return rc;
+ }
+ i++;
+ }
+
+ return LUSTRE_CFG_RC_NO_ERR;
+}
+
+int
+lustre_lnet_config_ip2nets(struct lustre_lnet_ip2nets *ip2nets,
+ struct lnet_ioctl_config_lnd_tunables *tunables,
+ struct cfs_expr_list *global_cpts,
+ int seq_no, struct cYAML **err_rc)
+{
+ lnet_nid_t *nids = NULL;
+ __u32 nnids = 0;
+ int rc;
+ char err_str[LNET_MAX_STR_LEN];
+
+ snprintf(err_str, sizeof(err_str), "\"success\"");
+
+ if (!ip2nets) {
+ snprintf(err_str,
+ sizeof(err_str),
+ "\"incomplete ip2nets information\"");
+ rc = LUSTRE_CFG_RC_BAD_PARAM;
+ goto out;
+ }
+
+ rc = lustre_lnet_resolve_ip2nets_rule(ip2nets, &nids, &nnids);
+ if (rc != LUSTRE_CFG_RC_NO_ERR && rc != LUSTRE_CFG_RC_MATCH) {
+ snprintf(err_str,
+ sizeof(err_str),
+ "\"cannot resolve ip2nets rule\"");
+ goto out;
+ }
+
+ if (list_empty(&ip2nets->ip2nets_net.nw_intflist)) {
+ snprintf(err_str, sizeof(err_str),
+ "\"no interfaces match ip2nets rules\"");
+ goto out;
+ }
+
+ rc = lustre_lnet_ioctl_config_ni(&ip2nets->ip2nets_net.nw_intflist,
+ tunables, global_cpts, nids,
+ err_str);
+ if (rc != LUSTRE_CFG_RC_NO_ERR)
+ free(nids);
+
+out:
+ cYAML_build_error(rc, seq_no, ADD_CMD, "ip2nets", err_str, err_rc);
+ return rc;
+}
+
+int lustre_lnet_config_ni(struct lnet_dlc_network_descr *nw_descr,
+ struct cfs_expr_list *global_cpts,
+ char *ip2net,
+ struct lnet_ioctl_config_lnd_tunables *tunables,
+ int seq_no, struct cYAML **err_rc)
+{
+ char *data = NULL;
+ struct lnet_ioctl_config_ni *conf;
+ struct lnet_ioctl_config_lnd_tunables *tun = NULL;
+ char buf[LNET_MAX_STR_LEN];
+ int rc = LUSTRE_CFG_RC_NO_ERR;
+ char err_str[LNET_MAX_STR_LEN];
+ lnet_nid_t *nids = NULL;
+ __u32 nnids = 0;
+ size_t len;
+ int count;
+ struct lnet_dlc_intf_descr *intf_descr, *tmp;
+ __u32 *cpt_array;
+
+ snprintf(err_str, sizeof(err_str), "\"success\"");
+
+ if (ip2net == NULL && nw_descr == NULL) {
+ snprintf(err_str,
+ sizeof(err_str),
+ "\"mandatory parameters not specified.\"");
+ rc = LUSTRE_CFG_RC_MISSING_PARAM;
+ goto out;
+ }
+
+ if (ip2net != NULL && strlen(ip2net) >= sizeof(buf)) {
+ snprintf(err_str,
+ sizeof(err_str),
+ "\"ip2net string too long %d\"",
+ (int)strlen(ip2net));
+ rc = LUSTRE_CFG_RC_OUT_OF_RANGE_PARAM;
+ goto out;
+ }
+
+ if (ip2net != NULL) {
+ if (tunables != NULL)
+ len = sizeof(struct lnet_ioctl_config_ni) +
+ sizeof(struct lnet_ioctl_config_lnd_tunables);
+ else
+ len = sizeof(struct lnet_ioctl_config_ni);
+ data = calloc(1, len);
+ conf = (struct lnet_ioctl_config_ni*) data;
+ if (tunables != NULL)
+ tun = (struct lnet_ioctl_config_lnd_tunables*)
+ (data + sizeof(*conf));
+
+ LIBCFS_IOC_INIT_V2(*conf, lic_cfg_hdr);
+ conf->lic_cfg_hdr.ioc_len = len;
+ strncpy(conf->lic_legacy_ip2nets, ip2net,
+ LNET_MAX_STR_LEN);
+
+ if (global_cpts != NULL) {
+ count = cfs_expr_list_values(global_cpts,
+ LNET_MAX_SHOW_NUM_CPT,
+ &cpt_array);
+ if (count > 0) {
+ memcpy(conf->lic_cpts, cpt_array,
+ sizeof(cpt_array[0]) * LNET_MAX_STR_LEN);
+ free(cpt_array);
+ } else {
+ count = 0;
+ }
+ } else {
+ count = 0;
+ }
+
+ conf->lic_ncpts = count;
+
+ if (tunables != NULL)
+ memcpy(tun, tunables, sizeof(*tunables));
+
+ rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_ADD_LOCAL_NI, data);
+ if (rc < 0) {
+ rc = -errno;
+ snprintf(err_str,
+ sizeof(err_str),
+ "\"cannot add network: %s\"", strerror(errno));
+ goto out;
+ }
+
+ goto out;
+ }
+
+ if (LNET_NETTYP(nw_descr->nw_id) == LOLND)
+ return LUSTRE_CFG_RC_NO_ERR;
+
+ if (nw_descr->nw_id == LNET_NIDNET(LNET_NID_ANY)) {
+ snprintf(err_str,
+ sizeof(err_str),
+ "\"cannot parse net '%s'\"",
+ libcfs_net2str(nw_descr->nw_id));
+ rc = LUSTRE_CFG_RC_BAD_PARAM;
+ goto out;
+ }
+
+ if (list_empty(&nw_descr->nw_intflist)) {
+ snprintf(err_str,
+ sizeof(err_str),
+ "\"no interface name provided\"");
+ rc = LUSTRE_CFG_RC_BAD_PARAM;
+ goto out;
+ }
+
+ rc = lustre_lnet_intf2nids(nw_descr, &nids, &nnids);
+ if (rc != 0) {
+ snprintf(err_str, sizeof(err_str),
+ "\"bad parameter\"");
+ rc = LUSTRE_CFG_RC_BAD_PARAM;
+ goto out;
+ }
+
+ rc = lustre_lnet_ioctl_config_ni(&nw_descr->nw_intflist,
+ tunables, global_cpts, nids,
+ err_str);
+
+out:
+ if (nw_descr != NULL) {
+ list_for_each_entry_safe(intf_descr, tmp,
+ &nw_descr->nw_intflist,
+ intf_on_network) {
+ list_del(&intf_descr->intf_on_network);
+ free_intf_descr(intf_descr);
+ }
+ }
+
+ cYAML_build_error(rc, seq_no, ADD_CMD, "net", err_str, err_rc);
+
+ if (nids)
+ free(nids);
+
+ if (data)
+ free(data);
+
+ return rc;
+}
+
+int lustre_lnet_del_ni(struct lnet_dlc_network_descr *nw_descr,
+ int seq_no, struct cYAML **err_rc)
+{
+ struct lnet_ioctl_config_ni data;
+ int rc = LUSTRE_CFG_RC_NO_ERR, i;
+ char err_str[LNET_MAX_STR_LEN];
+ lnet_nid_t *nids = NULL;
+ __u32 nnids = 0;
+ struct lnet_dlc_intf_descr *intf_descr, *tmp;
+
+ if (LNET_NETTYP(nw_descr->nw_id) == LOLND)
+ return LUSTRE_CFG_RC_NO_ERR;
+
+ snprintf(err_str, sizeof(err_str), "\"success\"");
+
+ if (nw_descr == NULL) {
+ snprintf(err_str,
+ sizeof(err_str),
+ "\"missing mandatory parameter\"");
+ rc = LUSTRE_CFG_RC_MISSING_PARAM;
+ goto out;
+ }
+
+ if (nw_descr->nw_id == LNET_NIDNET(LNET_NID_ANY)) {
+ snprintf(err_str,
+ sizeof(err_str),
+ "\"cannot parse net '%s'\"",
+ libcfs_net2str(nw_descr->nw_id));
+ rc = LUSTRE_CFG_RC_BAD_PARAM;
+ goto out;
+ }
+
+ rc = lustre_lnet_intf2nids(nw_descr, &nids, &nnids);
+ if (rc != 0) {
+ snprintf(err_str, sizeof(err_str),
+ "\"bad parameter\"");
+ rc = LUSTRE_CFG_RC_BAD_PARAM;
+ goto out;
+ }
+
+ /*
+ * no interfaces just the nw_id is specified
+ */
+ if (nnids == 0) {
+ nids = calloc(1, sizeof(*nids));
+ if (nids == NULL) {
+ snprintf(err_str, sizeof(err_str),
+ "\"out of memory\"");
+ rc = LUSTRE_CFG_RC_OUT_OF_MEM;
+ goto out;
+ }
+ nids[0] = LNET_MKNID(nw_descr->nw_id, 0);
+ nnids = 1;
+ }
+
+ for (i = 0; i < nnids; i++) {
+ LIBCFS_IOC_INIT_V2(data, lic_cfg_hdr);
+ data.lic_nid = nids[i];
+
+ rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_DEL_LOCAL_NI, &data);
+ if (rc < 0) {
+ rc = -errno;
+ snprintf(err_str,
+ sizeof(err_str),
+ "\"cannot del network: %s\"", strerror(errno));
+ }
+ }
+
+ list_for_each_entry_safe(intf_descr, tmp, &nw_descr->nw_intflist,
+ intf_on_network) {
+ list_del(&intf_descr->intf_on_network);
+ free_intf_descr(intf_descr);
+ }
+
+out:
+ cYAML_build_error(rc, seq_no, DEL_CMD, "net", err_str, err_rc);
+
+ if (nids != NULL)
+ free(nids);
+
+ return rc;
+}
int lustre_lnet_show_net(char *nw, int detail, int seq_no,
struct cYAML **show_rc, struct cYAML **err_rc)
{
char *buf;
- struct lnet_ioctl_config_lnd_tunables *lnd_cfg;
- struct lnet_ioctl_config_data *data;
- struct lnet_ioctl_net_config *net_config;
+ struct lnet_ioctl_config_ni *ni_data;
+ struct lnet_ioctl_config_lnd_tunables *lnd;
+ struct lnet_ioctl_element_stats *stats;
__u32 net = LNET_NIDNET(LNET_NID_ANY);
+ __u32 prev_net = LNET_NIDNET(LNET_NID_ANY);
int rc = LUSTRE_CFG_RC_OUT_OF_MEM, i, j;
int l_errno = 0;
- struct cYAML *root = NULL, *tunables = NULL, *net_node = NULL,
- *interfaces = NULL, *item = NULL, *first_seq = NULL;
+ struct cYAML *root = NULL, *tunables = NULL,
+ *net_node = NULL, *interfaces = NULL,
+ *item = NULL, *first_seq = NULL,
+ *tmp = NULL, *statistics = NULL;
int str_buf_len = LNET_MAX_SHOW_NUM_CPT * 2;
char str_buf[str_buf_len];
char *pos;
char err_str[LNET_MAX_STR_LEN];
- bool exist = false;
- size_t buf_len;
+ bool exist = false, new_net = true;
+ int net_num = 0;
+ size_t buf_size = sizeof(*ni_data) + sizeof(*lnd) + sizeof(*stats);
snprintf(err_str, sizeof(err_str), "\"out of memory\"");
- buf_len = sizeof(*data) + sizeof(*net_config) + sizeof(*lnd_cfg);
- buf = calloc(1, buf_len);
+ buf = calloc(1, buf_size);
if (buf == NULL)
goto out;
- data = (struct lnet_ioctl_config_data *)buf;
+ ni_data = (struct lnet_ioctl_config_ni *)buf;
if (nw != NULL) {
net = libcfs_str2net(nw);
for (i = 0;; i++) {
pos = str_buf;
+ __u32 rc_net;
- memset(buf, 0, buf_len);
+ memset(buf, 0, buf_size);
- LIBCFS_IOC_INIT_V2(*data, cfg_hdr);
+ LIBCFS_IOC_INIT_V2(*ni_data, lic_cfg_hdr);
/*
* set the ioc_len to the proper value since INIT assumes
* size of data
*/
- data->cfg_hdr.ioc_len = buf_len;
- data->cfg_count = i;
+ ni_data->lic_cfg_hdr.ioc_len = buf_size;
+ ni_data->lic_idx = i;
- rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_GET_NET, data);
+ rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_GET_LOCAL_NI, ni_data);
if (rc != 0) {
l_errno = errno;
break;
}
+ rc_net = LNET_NIDNET(ni_data->lic_nid);
+
/* filter on provided data */
if (net != LNET_NIDNET(LNET_NID_ANY) &&
- net != LNET_NIDNET(data->cfg_nid))
+ net != rc_net)
continue;
/* default rc to -1 in case we hit the goto */
rc = -1;
exist = true;
- net_config = (struct lnet_ioctl_net_config *)data->cfg_bulk;
+ stats = (struct lnet_ioctl_element_stats *)ni_data->lic_bulk;
+ lnd = (struct lnet_ioctl_config_lnd_tunables *)
+ (ni_data->lic_bulk + sizeof(*stats));
+
+ if (rc_net != prev_net) {
+ prev_net = rc_net;
+ new_net = true;
+ net_num++;
+ }
+
+ if (new_net) {
+ if (!cYAML_create_string(net_node, "net type",
+ libcfs_net2str(rc_net)))
+ goto out;
+
+ tmp = cYAML_create_seq(net_node, "local NI(s)");
+ if (tmp == NULL)
+ goto out;
+ new_net = false;
+ }
/* create the tree to be printed. */
- item = cYAML_create_seq_item(net_node);
+ item = cYAML_create_seq_item(tmp);
if (item == NULL)
goto out;
if (first_seq == NULL)
first_seq = item;
- if (cYAML_create_string(item, "net",
- libcfs_net2str(
- LNET_NIDNET(data->cfg_nid)))
- == NULL)
- goto out;
-
if (cYAML_create_string(item, "nid",
- libcfs_nid2str(data->cfg_nid)) == NULL)
+ libcfs_nid2str(ni_data->lic_nid)) == NULL)
goto out;
- if (cYAML_create_string(item, "status",
- (net_config->ni_status ==
+ if (cYAML_create_string(item,
+ "status",
+ (ni_data->lic_status ==
LNET_NI_STATUS_UP) ?
"up" : "down") == NULL)
goto out;
/* don't add interfaces unless there is at least one
* interface */
- if (strlen(net_config->ni_interfaces[0]) > 0) {
+ if (strlen(ni_data->lic_ni_intf[0]) > 0) {
interfaces = cYAML_create_object(item, "interfaces");
if (interfaces == NULL)
goto out;
for (j = 0; j < LNET_MAX_INTERFACES; j++) {
- if (lustre_interface_show_net(interfaces, j,
- detail, data,
- net_config) < 0)
- goto out;
+ if (strlen(ni_data->lic_ni_intf[j]) > 0) {
+ snprintf(str_buf,
+ sizeof(str_buf), "%d", j);
+ if (cYAML_create_string(interfaces,
+ str_buf,
+ ni_data->lic_ni_intf[j]) ==
+ NULL)
+ goto out;
+ }
}
}
if (detail) {
char *limit;
+ statistics = cYAML_create_object(item, "statistics");
+ if (statistics == NULL)
+ goto out;
+
+ if (cYAML_create_number(statistics, "send_count",
+ stats->send_count)
+ == NULL)
+ goto out;
+
+ if (cYAML_create_number(statistics, "recv_count",
+ stats->recv_count)
+ == NULL)
+ goto out;
+
+ if (cYAML_create_number(statistics, "drop_count",
+ stats->drop_count)
+ == NULL)
+ goto out;
+
tunables = cYAML_create_object(item, "tunables");
- if (tunables == NULL)
+ if (!tunables)
+ goto out;
+
+ rc = lustre_net_show_tunables(tunables, &lnd->lt_cmn);
+ if (rc != LUSTRE_CFG_RC_NO_ERR)
goto out;
- if (cYAML_create_number(tunables, "peer_timeout",
- data->cfg_config_u.cfg_net.
- net_peer_timeout) == NULL)
+ tunables = cYAML_create_object(item, "lnd tunables");
+ if (tunables == NULL)
goto out;
- if (cYAML_create_number(tunables, "peer_credits",
- data->cfg_config_u.cfg_net.
- net_peer_tx_credits) == NULL)
+ rc = lustre_ni_show_tunables(tunables, LNET_NETTYP(rc_net),
+ &lnd->lt_tun);
+ if (rc != LUSTRE_CFG_RC_NO_ERR)
goto out;
- if (cYAML_create_number(tunables,
- "peer_buffer_credits",
- data->cfg_config_u.cfg_net.
- net_peer_rtr_credits) == NULL)
+ if (cYAML_create_number(item, "tcp bonding",
+ ni_data->lic_tcp_bonding)
+ == NULL)
goto out;
- if (cYAML_create_number(tunables, "credits",
- data->cfg_config_u.cfg_net.
- net_max_tx_credits) == NULL)
+ if (cYAML_create_number(item, "dev cpt",
+ ni_data->lic_dev_cpt) == NULL)
goto out;
/* out put the CPTs in the format: "[x,x,x,...]" */
limit = str_buf + str_buf_len - 3;
pos += snprintf(pos, limit - pos, "\"[");
- for (j = 0 ; data->cfg_ncpts > 1 &&
- j < data->cfg_ncpts &&
+ for (j = 0 ; ni_data->lic_ncpts >= 1 &&
+ j < ni_data->lic_ncpts &&
pos < limit; j++) {
pos += snprintf(pos, limit - pos,
- "%d", net_config->ni_cpts[j]);
- if ((j + 1) < data->cfg_ncpts)
+ "%d", ni_data->lic_cpts[j]);
+ if ((j + 1) < ni_data->lic_ncpts)
pos += snprintf(pos, limit - pos, ",");
}
pos += snprintf(pos, 3, "]\"");
- if (data->cfg_ncpts > 1 &&
- cYAML_create_string(tunables, "CPT",
+ if (ni_data->lic_ncpts >= 1 &&
+ cYAML_create_string(item, "CPT",
str_buf) == NULL)
goto out;
}
return rc;
}
+int lustre_lnet_config_numa_range(int range, int seq_no, struct cYAML **err_rc)
+{
+ struct lnet_ioctl_numa_range data;
+ int rc = LUSTRE_CFG_RC_NO_ERR;
+ char err_str[LNET_MAX_STR_LEN];
+
+ snprintf(err_str, sizeof(err_str), "\"success\"");
+
+ if (range < 0) {
+ snprintf(err_str,
+ sizeof(err_str),
+ "\"range must be >= 0\"");
+ rc = LUSTRE_CFG_RC_OUT_OF_RANGE_PARAM;
+ goto out;
+ }
+
+ LIBCFS_IOC_INIT_V2(data, nr_hdr);
+ data.nr_range = range;
+
+ rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_SET_NUMA_RANGE, &data);
+ if (rc != 0) {
+ rc = -errno;
+ snprintf(err_str,
+ sizeof(err_str),
+ "\"cannot configure buffers: %s\"", strerror(errno));
+ goto out;
+ }
+
+out:
+ cYAML_build_error(rc, seq_no, ADD_CMD, "numa_range", err_str, err_rc);
+
+ return rc;
+}
+
int lustre_lnet_config_buffers(int tiny, int small, int large, int seq_no,
struct cYAML **err_rc)
{
return rc;
}
-int lustre_lnet_show_peer_credits(int seq_no, struct cYAML **show_rc,
- struct cYAML **err_rc)
+int lustre_lnet_show_peer(char *knid, int detail, int seq_no,
+ struct cYAML **show_rc, struct cYAML **err_rc)
{
- struct lnet_ioctl_peer peer_info;
+ /*
+ * TODO: This function is changing in a future patch to accommodate
+ * PEER_LIST and proper filtering on any nid of the peer
+ */
+ struct lnet_ioctl_peer_cfg *peer_info;
+ struct lnet_peer_ni_credit_info *lpni_cri;
+ struct lnet_ioctl_element_stats *lpni_stats;
int rc = LUSTRE_CFG_RC_OUT_OF_MEM, ncpt = 0, i = 0, j = 0;
int l_errno = 0;
- struct cYAML *root = NULL, *peer = NULL, *first_seq = NULL,
- *peer_root = NULL;
+ struct cYAML *root = NULL, *peer = NULL, *peer_ni = NULL,
+ *first_seq = NULL, *peer_root = NULL, *tmp = NULL;
char err_str[LNET_MAX_STR_LEN];
- bool ncpt_set = false;
+ lnet_nid_t prev_primary_nid = LNET_NID_ANY, primary_nid = LNET_NID_ANY;
+ int data_size = sizeof(*peer_info) + sizeof(*lpni_cri) +
+ sizeof(*lpni_stats);
+ char *data = calloc(data_size, 1);
+ bool new_peer = true;
snprintf(err_str, sizeof(err_str),
"\"out of memory\"");
+ if (data == NULL)
+ goto out;
+
+ peer_info = (struct lnet_ioctl_peer_cfg *)data;
+
/* create struct cYAML root object */
root = cYAML_create_object(NULL, NULL);
if (root == NULL)
if (peer_root == NULL)
goto out;
+ if (knid != NULL)
+ primary_nid = libcfs_str2nid(knid);
+
do {
for (i = 0;; i++) {
- LIBCFS_IOC_INIT_V2(peer_info, pr_hdr);
- peer_info.pr_count = i;
- peer_info.pr_lnd_u.pr_peer_credits.cr_ncpt = j;
+ memset(data, 0, data_size);
+ LIBCFS_IOC_INIT_V2(*peer_info, prcfg_hdr);
+ peer_info->prcfg_hdr.ioc_len = data_size;
+ peer_info->prcfg_idx = i;
+
rc = l_ioctl(LNET_DEV_ID,
- IOC_LIBCFS_GET_PEER_INFO, &peer_info);
+ IOC_LIBCFS_GET_PEER_NI, peer_info);
if (rc != 0) {
l_errno = errno;
break;
}
- if (ncpt_set != 0) {
- ncpt = peer_info.pr_lnd_u.pr_peer_credits.
- cr_ncpt;
- ncpt_set = true;
- }
+ if (primary_nid != LNET_NID_ANY &&
+ primary_nid != peer_info->prcfg_prim_nid)
+ continue;
+
+ lpni_cri = (struct lnet_peer_ni_credit_info*)peer_info->prcfg_bulk;
+ lpni_stats = (struct lnet_ioctl_element_stats *)
+ (peer_info->prcfg_bulk +
+ sizeof(*lpni_cri));
peer = cYAML_create_seq_item(peer_root);
if (peer == NULL)
goto out;
+ if (peer_info->prcfg_prim_nid != prev_primary_nid) {
+ prev_primary_nid = peer_info->prcfg_prim_nid;
+ new_peer = true;
+ }
+
+ if (new_peer) {
+ lnet_nid_t pnid = peer_info->prcfg_prim_nid;
+ if (cYAML_create_string(peer, "primary nid",
+ libcfs_nid2str(pnid))
+ == NULL)
+ goto out;
+ if (cYAML_create_string(peer, "Multi-Rail",
+ peer_info->prcfg_mr ?
+ "True" : "False")
+ == NULL)
+ goto out;
+ tmp = cYAML_create_seq(peer, "peer ni");
+ if (tmp == NULL)
+ goto out;
+ new_peer = false;
+ }
+
if (first_seq == NULL)
first_seq = peer;
- if (cYAML_create_string(peer, "nid",
- libcfs_nid2str
- (peer_info.pr_nid)) == NULL)
+ peer_ni = cYAML_create_seq_item(tmp);
+ if (peer_ni == NULL)
goto out;
- if (cYAML_create_string(peer, "state",
- peer_info.pr_lnd_u.
- pr_peer_credits.
- cr_aliveness) ==
- NULL)
+ if (cYAML_create_string(peer_ni, "nid",
+ libcfs_nid2str
+ (peer_info->prcfg_cfg_nid))
+ == NULL)
goto out;
- if (cYAML_create_number(peer, "refcount",
- peer_info.pr_lnd_u.
- pr_peer_credits.
- cr_refcount) == NULL)
+ if (cYAML_create_string(peer_ni, "state",
+ lpni_cri->cr_aliveness)
+ == NULL)
goto out;
- if (cYAML_create_number(peer, "max_ni_tx_credits",
- peer_info.pr_lnd_u.
- pr_peer_credits.
- cr_ni_peer_tx_credits)
+ if (!detail)
+ continue;
+
+ if (cYAML_create_number(peer_ni, "max_ni_tx_credits",
+ lpni_cri->cr_ni_peer_tx_credits)
== NULL)
goto out;
- if (cYAML_create_number(peer, "available_tx_credits",
- peer_info.pr_lnd_u.
- pr_peer_credits.
- cr_peer_tx_credits)
+ if (cYAML_create_number(peer_ni, "available_tx_credits",
+ lpni_cri->cr_peer_tx_credits)
== NULL)
goto out;
- if (cYAML_create_number(peer, "available_rtr_credits",
- peer_info.pr_lnd_u.
- pr_peer_credits.
- cr_peer_rtr_credits)
+ if (cYAML_create_number(peer_ni, "min_tx_credits",
+ lpni_cri->cr_peer_min_tx_credits)
== NULL)
goto out;
- if (cYAML_create_number(peer, "min_rtr_credits",
- peer_info.pr_lnd_u.
- pr_peer_credits.
- cr_peer_min_rtr_credits)
+ if (cYAML_create_number(peer_ni, "tx_q_num_of_buf",
+ lpni_cri->cr_peer_tx_qnob)
== NULL)
goto out;
- if (cYAML_create_number(peer, "tx_q_num_of_buf",
- peer_info.pr_lnd_u.
- pr_peer_credits.
- cr_peer_tx_qnob)
+ if (cYAML_create_number(peer_ni, "available_rtr_credits",
+ lpni_cri->cr_peer_rtr_credits)
== NULL)
goto out;
- }
+
+ if (cYAML_create_number(peer_ni, "min_rtr_credits",
+ lpni_cri->cr_peer_min_rtr_credits)
+ == NULL)
+ goto out;
+
+ if (cYAML_create_number(peer_ni, "send_count",
+ lpni_stats->send_count)
+ == NULL)
+ goto out;
+
+ if (cYAML_create_number(peer_ni, "recv_count",
+ lpni_stats->recv_count)
+ == NULL)
+ goto out;
+
+ if (cYAML_create_number(peer_ni, "drop_count",
+ lpni_stats->drop_count)
+ == NULL)
+ goto out;
+
+ if (cYAML_create_number(peer_ni, "refcount",
+ lpni_cri->cr_refcount) == NULL)
+ goto out;
+ }
if (l_errno != ENOENT) {
snprintf(err_str,
* insert one. Otherwise add to the one there
*/
show_node = cYAML_get_object_item(*show_rc,
- "peer_credits");
+ "peer");
if (show_node != NULL && cYAML_is_sequence(show_node)) {
cYAML_insert_child(show_node, first_seq);
free(peer_root);
*show_rc = root;
}
- cYAML_build_error(rc, seq_no, SHOW_CMD, "peer_credits", err_str,
+ cYAML_build_error(rc, seq_no, SHOW_CMD, "peer", err_str,
err_rc);
return rc;
}
+int lustre_lnet_show_numa_range(int seq_no, struct cYAML **show_rc,
+ struct cYAML **err_rc)
+{
+ struct lnet_ioctl_numa_range data;
+ int rc = LUSTRE_CFG_RC_OUT_OF_MEM;
+ int l_errno;
+ char err_str[LNET_MAX_STR_LEN];
+ struct cYAML *root = NULL, *range = NULL;
+
+ snprintf(err_str, sizeof(err_str), "\"out of memory\"");
+
+ LIBCFS_IOC_INIT_V2(data, nr_hdr);
+
+ rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_GET_NUMA_RANGE, &data);
+ if (rc != 0) {
+ l_errno = errno;
+ snprintf(err_str,
+ sizeof(err_str),
+ "\"cannot get numa range: %s\"",
+ strerror(l_errno));
+ rc = -l_errno;
+ goto out;
+ }
+
+ root = cYAML_create_object(NULL, NULL);
+ if (root == NULL)
+ goto out;
+
+ range = cYAML_create_object(root, "numa");
+ if (range == NULL)
+ goto out;
+
+ if (cYAML_create_number(range, "range",
+ data.nr_range) == NULL)
+ goto out;
+
+ if (show_rc == NULL)
+ cYAML_print_tree(root);
+
+ snprintf(err_str, sizeof(err_str), "\"success\"");
+out:
+ if (show_rc == NULL || rc != LUSTRE_CFG_RC_NO_ERR) {
+ cYAML_free_tree(root);
+ } else if (show_rc != NULL && *show_rc != NULL) {
+ cYAML_insert_sibling((*show_rc)->cy_child,
+ root->cy_child);
+ free(root);
+ } else {
+ *show_rc = root;
+ }
+
+ cYAML_build_error(rc, seq_no, SHOW_CMD, "numa", err_str, err_rc);
+
+ return rc;
+}
+
int lustre_lnet_show_stats(int seq_no, struct cYAML **show_rc,
struct cYAML **err_rc)
{
err_rc);
}
-static int handle_yaml_config_net(struct cYAML *tree, struct cYAML **show_rc,
- struct cYAML **err_rc)
+static void yaml_free_string_array(char **array, int num)
+{
+ int i;
+ char **sub_array = array;
+
+ for (i = 0; i < num; i++) {
+ if (*sub_array != NULL)
+ free(*sub_array);
+ sub_array++;
+ }
+ if (array)
+ free(array);
+}
+
+/*
+ * interfaces:
+ * 0: <intf_name>['['<expr>']']
+ * 1: <intf_name>['['<expr>']']
+ */
+static int yaml_copy_intf_info(struct cYAML *intf_tree,
+ struct lnet_dlc_network_descr *nw_descr)
+{
+ struct cYAML *child = NULL;
+ int intf_num = 0, rc = LUSTRE_CFG_RC_NO_ERR;
+ struct lnet_dlc_intf_descr *intf_descr, *tmp;
+
+ if (intf_tree == NULL || nw_descr == NULL)
+ return LUSTRE_CFG_RC_BAD_PARAM;
+
+ /* now grab all the interfaces and their cpts */
+ child = intf_tree->cy_child;
+ while (child != NULL) {
+ if (child->cy_valuestring == NULL) {
+ child = child->cy_next;
+ continue;
+ }
+
+ if (strlen(child->cy_valuestring) >= LNET_MAX_STR_LEN)
+ goto failed;
+
+ rc = lustre_lnet_add_intf_descr(&nw_descr->nw_intflist,
+ child->cy_valuestring,
+ strlen(child->cy_valuestring));
+ if (rc != LUSTRE_CFG_RC_NO_ERR)
+ goto failed;
+
+ intf_num++;
+ child = child->cy_next;
+ }
+
+ if (intf_num == 0)
+ return LUSTRE_CFG_RC_MISSING_PARAM;
+
+ return intf_num;
+
+failed:
+ list_for_each_entry_safe(intf_descr, tmp, &nw_descr->nw_intflist,
+ intf_on_network) {
+ list_del(&intf_descr->intf_on_network);
+ free_intf_descr(intf_descr);
+ }
+
+ return rc;
+}
+
+static bool
+yaml_extract_cmn_tunables(struct cYAML *tree,
+ struct lnet_ioctl_config_lnd_cmn_tunables *tunables,
+ struct cfs_expr_list **global_cpts)
+{
+ struct cYAML *tun, *item, *smp;
+ int rc;
+
+ tun = cYAML_get_object_item(tree, "tunables");
+ if (tun != NULL) {
+ item = cYAML_get_object_item(tun, "peer_timeout");
+ if (item != NULL)
+ tunables->lct_peer_timeout = item->cy_valueint;
+ item = cYAML_get_object_item(tun, "peer_credits");
+ if (item != NULL)
+ tunables->lct_peer_tx_credits = item->cy_valueint;
+ item = cYAML_get_object_item(tun, "peer_buffer_credits");
+ if (item != NULL)
+ tunables->lct_peer_rtr_credits = item->cy_valueint;
+ item = cYAML_get_object_item(tun, "credits");
+ if (item != NULL)
+ tunables->lct_max_tx_credits = item->cy_valueint;
+ smp = cYAML_get_object_item(tun, "CPT");
+ if (smp != NULL) {
+ rc = cfs_expr_list_parse(smp->cy_valuestring,
+ strlen(smp->cy_valuestring),
+ 0, UINT_MAX, global_cpts);
+ if (rc != 0)
+ *global_cpts = NULL;
+ }
+
+ return true;
+ }
+
+ return false;
+}
+
+static bool
+yaml_extract_tunables(struct cYAML *tree,
+ struct lnet_ioctl_config_lnd_tunables *tunables,
+ struct cfs_expr_list **global_cpts,
+ __u32 net_type)
+{
+ bool rc;
+
+ rc = yaml_extract_cmn_tunables(tree, &tunables->lt_cmn,
+ global_cpts);
+
+ if (!rc)
+ return rc;
+
+ lustre_yaml_extract_lnd_tunables(tree, net_type,
+ &tunables->lt_tun);
+
+ return rc;
+}
+
+/*
+ * net:
+ * - net type: <net>[<NUM>]
+ * local NI(s):
+ * - nid: <ip>@<net>[<NUM>]
+ * status: up
+ * interfaces:
+ * 0: <intf_name>['['<expr>']']
+ * 1: <intf_name>['['<expr>']']
+ * tunables:
+ * peer_timeout: <NUM>
+ * peer_credits: <NUM>
+ * peer_buffer_credits: <NUM>
+ * credits: <NUM>
+* lnd tunables:
+ * peercredits_hiw: <NUM>
+ * map_on_demand: <NUM>
+ * concurrent_sends: <NUM>
+ * fmr_pool_size: <NUM>
+ * fmr_flush_trigger: <NUM>
+ * fmr_cache: <NUM>
+ *
+ * At least one interface is required. If no interfaces are provided the
+ * network interface can not be configured.
+ */
+static int handle_yaml_config_ni(struct cYAML *tree, struct cYAML **show_rc,
+ struct cYAML **err_rc)
{
- struct cYAML *net, *intf, *tunables, *seq_no,
- *peer_to = NULL, *peer_buf_cr = NULL, *peer_cr = NULL,
- *credits = NULL, *ip2net = NULL, *smp = NULL, *child;
- struct lnet_ioctl_config_lnd_tunables *lnd_tunables_p = NULL;
- struct lnet_ioctl_config_lnd_tunables lnd_tunables;
- char devs[LNET_MAX_STR_LEN];
- char *loc = devs;
- int size = LNET_MAX_STR_LEN;
- int num;
- bool intf_found = false;
+ struct cYAML *net, *intf, *seq_no, *ip2net = NULL, *local_nis = NULL,
+ *item = NULL;
+ int num_entries = 0, rc;
+ struct lnet_dlc_network_descr nw_descr;
+ struct cfs_expr_list *global_cpts = NULL;
+ struct lnet_ioctl_config_lnd_tunables tunables;
+ bool found = false;
+
+ memset(&tunables, 0, sizeof(tunables));
+
+ INIT_LIST_HEAD(&nw_descr.network_on_rule);
+ INIT_LIST_HEAD(&nw_descr.nw_intflist);
ip2net = cYAML_get_object_item(tree, "ip2net");
- net = cYAML_get_object_item(tree, "net");
+ net = cYAML_get_object_item(tree, "net type");
+ if (net)
+ nw_descr.nw_id = libcfs_str2net(net->cy_valuestring);
+
+ /*
+ * if neither net nor ip2nets are present, then we can not
+ * configure the network.
+ */
+ if (!net && !ip2net)
+ return LUSTRE_CFG_RC_MISSING_PARAM;
+
+ local_nis = cYAML_get_object_item(tree, "local NI(s)");
+ if (local_nis == NULL)
+ return LUSTRE_CFG_RC_MISSING_PARAM;
+
+ if (!cYAML_is_sequence(local_nis))
+ return LUSTRE_CFG_RC_BAD_PARAM;
+
+ while (cYAML_get_next_seq_item(local_nis, &item) != NULL) {
+ intf = cYAML_get_object_item(item, "interfaces");
+ if (intf == NULL)
+ continue;
+ num_entries = yaml_copy_intf_info(intf, &nw_descr);
+ if (num_entries <= 0) {
+ cYAML_build_error(num_entries, -1, "ni", "add",
+ "bad interface list",
+ err_rc);
+ return LUSTRE_CFG_RC_BAD_PARAM;
+ }
+ }
+
+ found = yaml_extract_tunables(tree, &tunables, &global_cpts,
+ LNET_NETTYP(nw_descr.nw_id));
+ seq_no = cYAML_get_object_item(tree, "seq_no");
+
+ rc = lustre_lnet_config_ni(&nw_descr,
+ global_cpts,
+ (ip2net) ? ip2net->cy_valuestring : NULL,
+ (found) ? &tunables: NULL,
+ (seq_no) ? seq_no->cy_valueint : -1,
+ err_rc);
+
+ if (global_cpts != NULL)
+ cfs_expr_list_free(global_cpts);
+
+ return rc;
+}
+
+/*
+ * ip2nets:
+ * - net-spec: <tcp|o2ib|gni>[NUM]
+ * interfaces:
+ * 0: <intf name>['['<expr>']']
+ * 1: <intf name>['['<expr>']']
+ * ip-range:
+ * 0: <expr.expr.expr.expr>
+ * 1: <expr.expr.expr.expr>
+ */
+static int handle_yaml_config_ip2nets(struct cYAML *tree,
+ struct cYAML **show_rc,
+ struct cYAML **err_rc)
+{
+ struct cYAML *net, *ip_range, *item = NULL, *intf = NULL,
+ *seq_no = NULL;
+ struct lustre_lnet_ip2nets ip2nets;
+ struct lustre_lnet_ip_range_descr *ip_range_descr = NULL,
+ *tmp = NULL;
+ int rc = LUSTRE_CFG_RC_NO_ERR;
+ struct cfs_expr_list *global_cpts = NULL;
+ struct cfs_expr_list *el, *el_tmp;
+ struct lnet_ioctl_config_lnd_tunables tunables;
+ struct lnet_dlc_intf_descr *intf_descr, *intf_tmp;
+ bool found = false;
+
+ memset(&tunables, 0, sizeof(tunables));
+
+ /* initialize all lists */
+ INIT_LIST_HEAD(&ip2nets.ip2nets_ip_ranges);
+ INIT_LIST_HEAD(&ip2nets.ip2nets_net.network_on_rule);
+ INIT_LIST_HEAD(&ip2nets.ip2nets_net.nw_intflist);
+
+ net = cYAML_get_object_item(tree, "net-spec");
+ if (net == NULL)
+ return LUSTRE_CFG_RC_BAD_PARAM;
+
+ if (net != NULL && net->cy_valuestring == NULL)
+ return LUSTRE_CFG_RC_BAD_PARAM;
+
+ /* assign the network id */
+ ip2nets.ip2nets_net.nw_id = libcfs_str2net(net->cy_valuestring);
+ if (ip2nets.ip2nets_net.nw_id == LNET_NID_ANY)
+ return LUSTRE_CFG_RC_BAD_PARAM;
+
+ seq_no = cYAML_get_object_item(tree, "seq_no");
+
intf = cYAML_get_object_item(tree, "interfaces");
if (intf != NULL) {
- /* grab all the interfaces */
- child = intf->cy_child;
- while (child != NULL && size > 0) {
- struct cYAML *lnd_params;
-
- if (child->cy_valuestring == NULL)
- goto ignore_child;
-
- if (loc > devs)
- num = snprintf(loc, size, ",%s",
- child->cy_valuestring);
- else
- num = snprintf(loc, size, "%s",
- child->cy_valuestring);
- size -= num;
- loc += num;
- intf_found = true;
-
- lnd_params = cYAML_get_object_item(intf,
- "lnd tunables");
- if (lnd_params != NULL) {
- const char *dev_name = child->cy_valuestring;
- lnd_tunables_p = &lnd_tunables;
-
- lustre_interface_parse(lnd_params, dev_name,
- lnd_tunables_p);
+ rc = yaml_copy_intf_info(intf, &ip2nets.ip2nets_net);
+ if (rc <= 0)
+ return LUSTRE_CFG_RC_BAD_PARAM;
+ }
+
+ ip_range = cYAML_get_object_item(tree, "ip-range");
+ if (ip_range != NULL) {
+ item = ip_range->cy_child;
+ while (item != NULL) {
+ if (item->cy_valuestring == NULL) {
+ item = item->cy_next;
+ continue;
}
-ignore_child:
- child = child->cy_next;
+
+ rc = lustre_lnet_add_ip_range(&ip2nets.ip2nets_ip_ranges,
+ item->cy_valuestring);
+
+ if (rc != LUSTRE_CFG_RC_NO_ERR)
+ goto out;
+
+ item = item->cy_next;
+ }
+ }
+
+ found = yaml_extract_tunables(tree, &tunables, &global_cpts,
+ LNET_NETTYP(ip2nets.ip2nets_net.nw_id));
+
+ rc = lustre_lnet_config_ip2nets(&ip2nets,
+ (found) ? &tunables : NULL,
+ global_cpts,
+ (seq_no) ? seq_no->cy_valueint : -1,
+ err_rc);
+
+ /*
+ * don't stop because there was no match. Continue processing the
+ * rest of the rules. If non-match then nothing is configured
+ */
+ if (rc == LUSTRE_CFG_RC_NO_MATCH)
+ rc = LUSTRE_CFG_RC_NO_ERR;
+out:
+ list_for_each_entry_safe(intf_descr, intf_tmp,
+ &ip2nets.ip2nets_net.nw_intflist,
+ intf_on_network) {
+ list_del(&intf_descr->intf_on_network);
+ free_intf_descr(intf_descr);
+ }
+
+ list_for_each_entry_safe(ip_range_descr, tmp,
+ &ip2nets.ip2nets_ip_ranges,
+ ipr_entry) {
+ list_del(&ip_range_descr->ipr_entry);
+ list_for_each_entry_safe(el, el_tmp, &ip_range_descr->ipr_expr,
+ el_link) {
+ list_del(&el->el_link);
+ cfs_expr_list_free(el);
+ }
+ free(ip_range_descr);
+ }
+
+ return rc;
+}
+
+static int handle_yaml_del_ni(struct cYAML *tree, struct cYAML **show_rc,
+ struct cYAML **err_rc)
+{
+ struct cYAML *net = NULL, *intf = NULL, *seq_no = NULL, *item = NULL,
+ *local_nis = NULL;
+ int num_entries, rc;
+ struct lnet_dlc_network_descr nw_descr;
+
+ INIT_LIST_HEAD(&nw_descr.network_on_rule);
+ INIT_LIST_HEAD(&nw_descr.nw_intflist);
+
+ net = cYAML_get_object_item(tree, "net type");
+ if (net != NULL)
+ nw_descr.nw_id = libcfs_str2net(net->cy_valuestring);
+
+ local_nis = cYAML_get_object_item(tree, "local NI(s)");
+ if (local_nis == NULL)
+ return LUSTRE_CFG_RC_MISSING_PARAM;
+
+ if (!cYAML_is_sequence(local_nis))
+ return LUSTRE_CFG_RC_BAD_PARAM;
+
+ while (cYAML_get_next_seq_item(local_nis, &item) != NULL) {
+ intf = cYAML_get_object_item(item, "interfaces");
+ if (intf == NULL)
+ continue;
+ num_entries = yaml_copy_intf_info(intf, &nw_descr);
+ if (num_entries <= 0) {
+ cYAML_build_error(num_entries, -1, "ni", "add",
+ "bad interface list",
+ err_rc);
+ return LUSTRE_CFG_RC_BAD_PARAM;
}
}
- tunables = cYAML_get_object_item(tree, "tunables");
- if (tunables != NULL) {
- peer_to = cYAML_get_object_item(tunables, "peer_timeout");
- peer_cr = cYAML_get_object_item(tunables, "peer_credits");
- peer_buf_cr = cYAML_get_object_item(tunables,
- "peer_buffer_credits");
- credits = cYAML_get_object_item(tunables, "credits");
- smp = cYAML_get_object_item(tunables, "CPT");
+ seq_no = cYAML_get_object_item(tree, "seq_no");
+
+ rc = lustre_lnet_del_ni((net) ? &nw_descr : NULL,
+ (seq_no) ? seq_no->cy_valueint : -1,
+ err_rc);
+
+ return rc;
+}
+
+static int yaml_copy_peer_nids(struct cYAML *tree, char ***nidsppp)
+{
+ struct cYAML *nids_entry = NULL, *child = NULL, *entry = NULL;
+ char **nids = NULL;
+ int num = 0, rc = LUSTRE_CFG_RC_NO_ERR;
+
+ nids_entry = cYAML_get_object_item(tree, "peer ni");
+ if (cYAML_is_sequence(nids_entry)) {
+ while (cYAML_get_next_seq_item(nids_entry, &child))
+ num++;
+ }
+
+ if (num == 0)
+ return LUSTRE_CFG_RC_MISSING_PARAM;
+
+ nids = calloc(sizeof(*nids) * num, 1);
+ if (nids == NULL)
+ return LUSTRE_CFG_RC_OUT_OF_MEM;
+
+ /* now grab all the nids */
+ num = 0;
+ child = NULL;
+ while (cYAML_get_next_seq_item(nids_entry, &child)) {
+ entry = cYAML_get_object_item(child, "nid");
+ if (!entry)
+ continue;
+ nids[num] = calloc(strlen(entry->cy_valuestring) + 1, 1);
+ if (!nids[num]) {
+ rc = LUSTRE_CFG_RC_OUT_OF_MEM;
+ goto failed;
+ }
+ strncpy(nids[num], entry->cy_valuestring,
+ strlen(entry->cy_valuestring));
+ num++;
}
+ rc = num;
+
+ *nidsppp = nids;
+ return rc;
+
+failed:
+ if (nids != NULL)
+ yaml_free_string_array(nids, num);
+ *nidsppp = NULL;
+ return rc;
+}
+
+static int handle_yaml_config_peer(struct cYAML *tree, struct cYAML **show_rc,
+ struct cYAML **err_rc)
+{
+ char **nids = NULL;
+ int num, rc;
+ struct cYAML *seq_no, *prim_nid, *non_mr;
+
+ num = yaml_copy_peer_nids(tree, &nids);
+ if (num < 0)
+ return num;
+
seq_no = cYAML_get_object_item(tree, "seq_no");
+ prim_nid = cYAML_get_object_item(tree, "primary nid");
+ non_mr = cYAML_get_object_item(tree, "non_mr");
+
+ rc = lustre_lnet_config_peer_nid((prim_nid) ? prim_nid->cy_valuestring : NULL,
+ nids, num,
+ (non_mr) ? false : true,
+ (seq_no) ? seq_no->cy_valueint : -1,
+ err_rc);
+
+ yaml_free_string_array(nids, num);
+ return rc;
+}
+
+static int handle_yaml_del_peer(struct cYAML *tree, struct cYAML **show_rc,
+ struct cYAML **err_rc)
+{
+ char **nids = NULL;
+ int num, rc;
+ struct cYAML *seq_no, *prim_nid;
+
+ num = yaml_copy_peer_nids(tree, &nids);
+ if (num < 0)
+ return num;
- return lustre_lnet_config_net((net) ? net->cy_valuestring : NULL,
- (intf_found) ? devs : NULL,
- (ip2net) ? ip2net->cy_valuestring : NULL,
- (peer_to) ? peer_to->cy_valueint : -1,
- (peer_cr) ? peer_cr->cy_valueint : -1,
- (peer_buf_cr) ?
- peer_buf_cr->cy_valueint : -1,
- (credits) ? credits->cy_valueint : -1,
- (smp) ? smp->cy_valuestring : NULL,
+ seq_no = cYAML_get_object_item(tree, "seq_no");
+ prim_nid = cYAML_get_object_item(tree, "primary nid");
+
+ rc = lustre_lnet_del_peer_nid((prim_nid) ? prim_nid->cy_valuestring : NULL,
+ nids, num,
(seq_no) ? seq_no->cy_valueint : -1,
- lnd_tunables_p,
err_rc);
+
+ yaml_free_string_array(nids, num);
+ return rc;
}
static int handle_yaml_config_buffers(struct cYAML *tree,
err_rc);
}
-static int handle_yaml_del_net(struct cYAML *tree, struct cYAML **show_rc,
- struct cYAML **err_rc)
-{
- struct cYAML *net, *seq_no;
-
- net = cYAML_get_object_item(tree, "net");
- seq_no = cYAML_get_object_item(tree, "seq_no");
-
- return lustre_lnet_del_net((net) ? net->cy_valuestring : NULL,
- (seq_no) ? seq_no->cy_valueint : -1,
- err_rc);
-}
-
static int handle_yaml_del_routing(struct cYAML *tree, struct cYAML **show_rc,
struct cYAML **err_rc)
{
show_rc, err_rc);
}
-static int handle_yaml_show_credits(struct cYAML *tree, struct cYAML **show_rc,
- struct cYAML **err_rc)
+static int handle_yaml_show_peers(struct cYAML *tree, struct cYAML **show_rc,
+ struct cYAML **err_rc)
{
- struct cYAML *seq_no;
+ struct cYAML *seq_no, *nid, *detail;
seq_no = cYAML_get_object_item(tree, "seq_no");
+ detail = cYAML_get_object_item(tree, "detail");
+ nid = cYAML_get_object_item(tree, "nid");
- return lustre_lnet_show_peer_credits((seq_no) ?
- seq_no->cy_valueint : -1,
- show_rc, err_rc);
+ return lustre_lnet_show_peer((nid) ? nid->cy_valuestring : NULL,
+ (detail) ? detail->cy_valueint : 0,
+ (seq_no) ? seq_no->cy_valueint : -1,
+ show_rc, err_rc);
}
static int handle_yaml_show_stats(struct cYAML *tree, struct cYAML **show_rc,
show_rc, err_rc);
}
+static int handle_yaml_config_numa(struct cYAML *tree, struct cYAML **show_rc,
+ struct cYAML **err_rc)
+{
+ struct cYAML *seq_no, *range;
+
+ seq_no = cYAML_get_object_item(tree, "seq_no");
+ range = cYAML_get_object_item(tree, "range");
+
+ return lustre_lnet_config_numa_range(range ? range->cy_valueint : -1,
+ seq_no ? seq_no->cy_valueint : -1,
+ err_rc);
+}
+
+static int handle_yaml_del_numa(struct cYAML *tree, struct cYAML **show_rc,
+ struct cYAML **err_rc)
+{
+ struct cYAML *seq_no;
+
+ seq_no = cYAML_get_object_item(tree, "seq_no");
+
+ return lustre_lnet_config_numa_range(0, seq_no ? seq_no->cy_valueint : -1,
+ err_rc);
+}
+
+static int handle_yaml_show_numa(struct cYAML *tree, struct cYAML **show_rc,
+ struct cYAML **err_rc)
+{
+ struct cYAML *seq_no;
+
+ seq_no = cYAML_get_object_item(tree, "seq_no");
+
+ return lustre_lnet_show_numa_range(seq_no ? seq_no->cy_valueint : -1,
+ show_rc, err_rc);
+}
+
struct lookup_cmd_hdlr_tbl {
char *name;
cmd_handler_t cb;
static struct lookup_cmd_hdlr_tbl lookup_config_tbl[] = {
{"route", handle_yaml_config_route},
- {"net", handle_yaml_config_net},
+ {"net", handle_yaml_config_ni},
+ {"ip2nets", handle_yaml_config_ip2nets},
+ {"peer", handle_yaml_config_peer},
{"routing", handle_yaml_config_routing},
{"buffers", handle_yaml_config_buffers},
+ {"numa", handle_yaml_config_numa},
{NULL, NULL}
};
static struct lookup_cmd_hdlr_tbl lookup_del_tbl[] = {
{"route", handle_yaml_del_route},
- {"net", handle_yaml_del_net},
+ {"net", handle_yaml_del_ni},
+ {"peer", handle_yaml_del_peer},
{"routing", handle_yaml_del_routing},
+ {"numa", handle_yaml_del_numa},
{NULL, NULL}
};
{"net", handle_yaml_show_net},
{"buffers", handle_yaml_show_routing},
{"routing", handle_yaml_show_routing},
- {"credits", handle_yaml_show_credits},
+ {"peer", handle_yaml_show_peers},
{"statistics", handle_yaml_show_stats},
+ {"numa", handle_yaml_show_numa},
{NULL, NULL}
};
char err_str[LNET_MAX_STR_LEN];
int rc = LUSTRE_CFG_RC_NO_ERR, return_rc = LUSTRE_CFG_RC_NO_ERR;
- tree = cYAML_build_tree(f, NULL, 0, err_rc);
+ tree = cYAML_build_tree(f, NULL, 0, err_rc, false);
if (tree == NULL)
return LUSTRE_CFG_RC_BAD_PARAM;
return lustre_yaml_cb_helper(f, lookup_show_tbl,
show_rc, err_rc);
}
+
+int lustre_lnet_send_dbg_task(enum lnet_dbg_task dbg_task,
+ struct lnet_dbg_task_info *dbg_info,
+ struct cYAML **show_rc,
+ struct cYAML **err_rc)
+{
+ struct lnet_ioctl_dbg *dbg;
+ struct lnet_dbg_task_info *info;
+ int rc = LUSTRE_CFG_RC_NO_ERR;
+ char err_str[LNET_MAX_STR_LEN];
+
+ snprintf(err_str, sizeof(err_str), "\"success\"");
+
+ dbg = calloc(1, sizeof(*dbg) + sizeof(*info));
+ if (!dbg) {
+ snprintf(err_str, sizeof(err_str), "\"out of memory\"");
+ rc = LUSTRE_CFG_RC_OUT_OF_MEM;
+ goto out;
+ }
+
+ info = (struct lnet_dbg_task_info *)dbg->dbg_bulk;
+
+ LIBCFS_IOC_INIT_V2(*dbg, dbg_hdr);
+
+ dbg->dbg_task = dbg_task;
+ if (dbg_info)
+ memcpy(info, dbg_info, sizeof(*info));
+
+ rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_DBG, dbg);
+ if (rc != 0) {
+ rc = -errno;
+ snprintf(err_str,
+ sizeof(err_str),
+ "\"debug task failed %s\"", strerror(errno));
+ goto out;
+ }
+
+out:
+ cYAML_build_error(rc, -1, DBG_CMD,
+ "debug", err_str, err_rc);
+
+ return rc;
+}
+
#define LUSTRE_CFG_RC_OUT_OF_RANGE_PARAM -3
#define LUSTRE_CFG_RC_OUT_OF_MEM -4
#define LUSTRE_CFG_RC_GENERIC_ERR -5
+#define LUSTRE_CFG_RC_NO_MATCH -6
+#define LUSTRE_CFG_RC_MATCH -7
+
+#include <lnet/lnet.h>
+#include <libcfs/util/string.h>
+
+struct lnet_dlc_network_descr {
+ struct list_head network_on_rule;
+ __u32 nw_id;
+ struct list_head nw_intflist;
+};
+
+struct lnet_dlc_intf_descr {
+ struct list_head intf_on_network;
+ char intf_name[LNET_MAX_STR_LEN];
+ struct cfs_expr_list *cpt_expr;
+};
/* forward declaration of the cYAML structure. */
struct cYAML;
int lustre_lnet_config_lib_init();
/*
+ * lustre_lnet_config_lib_uninit
+ * Uninitialize the DLC Library
+ */
+void lustre_lnet_config_lib_uninit();
+
+/*
* lustre_lnet_config_ni_system
* Initialize/Uninitialize the lnet NI system.
*
struct cYAML **err_rc);
/*
- * lustre_lnet_config_net
- * Send down an IOCTL to configure a network.
+ * lustre_lnet_config_ni
+ * Send down an IOCTL to configure a network interface. It implicitly
+ * creates a network if one doesn't exist..
*
- * net - the network name
- * intf - the interface of the network of the form net_name(intf)
+ * nw_descr - network and interface descriptor
+ * global_cpts - globally defined CPTs
* ip2net - this parameter allows configuring multiple networks.
* it takes precedence over the net and intf parameters
- * peer_to - peer timeout
- * peer_cr - peer credit
- * peer_buf_cr - peer buffer credits
- * - the above are LND tunable parameters and are optional
- * credits - network interface credits
- * smp - cpu affinity
+ * tunables - LND tunables
* seq_no - sequence number of the request
* lnd_tunables - lnet specific tunable parameters
* err_rc - [OUT] struct cYAML tree describing the error. Freed by caller
*/
-int lustre_lnet_config_net(char *net, char *intf, char *ip2net,
- int peer_to, int peer_cr, int peer_buf_cr,
- int credits, char *smp, int seq_no,
- struct lnet_ioctl_config_lnd_tunables *lnd_tunables,
- struct cYAML **err_rc);
+int lustre_lnet_config_ni(struct lnet_dlc_network_descr *nw_descr,
+ struct cfs_expr_list *global_cpts,
+ char *ip2net,
+ struct lnet_ioctl_config_lnd_tunables *tunables,
+ int seq_no, struct cYAML **err_rc);
/*
- * lustre_lnet_del_net
- * Send down an IOCTL to delete a network.
+ * lustre_lnet_del_ni
+ * Send down an IOCTL to delete a network interface. It implicitly
+ * deletes a network if it becomes empty of nis
*
- * nw - network to delete.
+ * nw - network and interface list
* seq_no - sequence number of the request
* err_rc - [OUT] struct cYAML tree describing the error. Freed by caller
*/
-int lustre_lnet_del_net(char *nw, int seq_no,
- struct cYAML **err_rc);
+int lustre_lnet_del_ni(struct lnet_dlc_network_descr *nw,
+ int seq_no, struct cYAML **err_rc);
/*
* lustre_lnet_show_net
struct cYAML **err_rc);
/*
+ * lustre_lnet_config_numa_range
+ * Set the NUMA range which impacts the NIs to be selected
+ * during sending. If the NUMA range is large the NUMA
+ * distance between the message memory and the NI becomes
+ * less significant. The NUMA range is a relative number
+ * with no other meaning besides allowing a wider breadth
+ * for picking an NI to send from.
+ *
+ * range - numa range value.
+ * seq_no - sequence number of the request
+ * err_rc - [OUT] struct cYAML tree describing the error. Freed by
+ * caller
+ */
+int lustre_lnet_config_numa_range(int range, int seq_no,
+ struct cYAML **err_rc);
+
+/*
+ * lustre_lnet_show_num_range
+ * Get the currently set NUMA range
+ *
+ * seq_no - sequence number of the request
+ * show_rc - [OUT] struct cYAML tree containing NUMA range info
+ * err_rc - [OUT] struct cYAML tree describing the error. Freed by
+ * caller
+ */
+int lustre_lnet_show_numa_range(int seq_no, struct cYAML **show_rc,
+ struct cYAML **err_rc);
+
+/*
* lustre_lnet_config_buffers
* Send down an IOCTL to configure routing buffer sizes. A value of 0 means
* default that particular buffer to default size. A value of -1 means
struct cYAML **err_rc);
/*
- * lustre_lnet_show_peer_credits
- * Shows credit details on the peers in the system
+ * lustre_lnet_show_stats
+ * Shows internal LNET statistics. This is useful to display the
+ * current LNET activity, such as number of messages route, etc
*
* seq_no - sequence number of the command
* show_rc - YAML structure of the resultant show
* err_rc - YAML strucutre of the resultant return code.
*/
-int lustre_lnet_show_peer_credits(int seq_no, struct cYAML **show_rc,
- struct cYAML **err_rc);
+int lustre_lnet_show_stats(int seq_no, struct cYAML **show_rc,
+ struct cYAML **err_rc);
/*
- * lustre_lnet_show_stats
- * Shows internal LNET statistics. This is useful to display the
- * current LNET activity, such as number of messages route, etc
+ * lustre_lnet_config_peer_nid
+ * Add a peer nid to a peer with primary nid pnid. If no pnid is given
+ * then the first nid in the nid list becomes the primary nid for
+ * a newly created peer.
+ * Otherwise if pnid is provided and it's unique then a new peer is
+ * created with pnid as the primary NID and the nids in the nid list as
+ * secondary nids.
+ * If any of the peers nids provided in with exception to the pnid is
+ * not unique the operation fails. Some peer nids might have already
+ * been added. It's the role of the caller of this API to remove the
+ * added NIDs if they wish.
+ *
+ * pnid - Primary NID of the peer
+ * nid - list of nids to add
+ * num_nids - number of nids in the nid array
+ * mr - true if this peer is MR capable.
+ * seq_no - sequence number of the command
+ * err_rc - YAML strucutre of the resultant return code.
+ */
+int lustre_lnet_config_peer_nid(char *pnid, char **nid, int num_nids,
+ bool mr, int seq_no, struct cYAML **err_rc);
+
+/*
+ * lustre_lnet_del_peer_nid
+ * Delete the nids given in the nid list from the peer with primary NID
+ * pnid. If pnid is NULL or it doesn't identify a peer the operation
+ * fails and no change happens to the system.
+ * The operation is aborted on the first NID that fails to be deleted.
*
+ * pnid - Primary NID of the peer
+ * nid - list of nids to add
+ * num_nids - number of nids in the nid array
+ * seq_no - sequence number of the command
+ * err_rc - YAML strucutre of the resultant return code.
+ */
+int lustre_lnet_del_peer_nid(char *pnid, char **nid, int num_nids,
+ int seq_no, struct cYAML **err_rc);
+
+/*
+ * lustre_lnet_show_peer
+ * Show the peer identified by nid, knid. If knid is NULL all
+ * peers in the system are shown.
+ *
+ * knid - A NID of the peer
+ * detail - display detailed information
* seq_no - sequence number of the command
* show_rc - YAML structure of the resultant show
* err_rc - YAML strucutre of the resultant return code.
+ *
*/
-int lustre_lnet_show_stats(int seq_no, struct cYAML **show_rc,
- struct cYAML **err_rc);
+int lustre_lnet_show_peer(char *knid, int detail, int seq_no,
+ struct cYAML **show_rc, struct cYAML **err_rc);
/*
* lustre_yaml_config
int lustre_yaml_show(char *f, struct cYAML **show_rc,
struct cYAML **err_rc);
+/*
+ * lustre_lnet_init_nw_descr
+ * initialize the network descriptor structure for use
+ */
+void lustre_lnet_init_nw_descr(struct lnet_dlc_network_descr *nw_descr);
+
+/*
+ * lustre_lnet_parse_interfaces
+ * prase an interface string and populate descriptor structures
+ * intf_str - interface string of the format
+ * <intf>[<expr>], <intf>[<expr>],..
+ * nw_descr - network descriptor to populate
+ * init - True to initialize nw_descr
+ */
+int lustre_lnet_parse_interfaces(char *intf_str,
+ struct lnet_dlc_network_descr *nw_descr);
+
+/*
+ * lustre_lnet_parse_nids
+ * Parse a set of nids into a locally allocated array and return the
+ * pointer of the array to the caller. The caller is responsible for
+ * freeing the array. If an initial array is provided then copy over
+ * the contents of that array into the new array and append to it the
+ * new content.
+ * The nids can be of the form "nid [,nid, nid, nid]"
+ * nids: nids string to be parsed
+ * array: initial array of content
+ * size: num of elements in the array
+ * out_array: [OUT] new allocated array.
+ * Returns size of array
+ * sets the out_array to NULL on failure.
+ */
+int lustre_lnet_parse_nids(char *nids, char **array, int size,
+ char ***out_array);
+
+/*
+ * lustre_lnet_send_dbg_task
+ * send a debug task to be carried out in the kernel. This API will
+ * not be exposed to the user through lnetctl utility. It can only be
+ * executed by being called directly.
+ * dbg_task: The task to be carried out
+ * dbg_info: task specific information
+ */
+int lustre_lnet_send_dbg_task(enum lnet_dbg_task dbg_task,
+ struct lnet_dbg_task_info *dbg_info,
+ struct cYAML **show_rc,
+ struct cYAML **err_rc);
+
#endif /* LIB_LNET_CONFIG_API_H */
* Author:
* James Simmons <jsimmons@infradead.org>
*/
+
+#include <limits.h>
+#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <libcfs/util/ioctl.h>
#include "cyaml.h"
static int
-lustre_ko2iblnd_show_net(struct cYAML *lndparams,
- struct lnet_ioctl_config_lnd_tunables *tunables)
+lustre_o2iblnd_show_tun(struct cYAML *lndparams,
+ struct lnet_ioctl_config_o2iblnd_tunables *lnd_cfg)
{
- struct lnet_ioctl_config_o2iblnd_tunables *lnd_cfg;
-
- lnd_cfg = &tunables->lt_tun_u.lt_o2ib;
-
if (cYAML_create_number(lndparams, "peercredits_hiw",
lnd_cfg->lnd_peercredits_hiw) == NULL)
- return -1;
+ return LUSTRE_CFG_RC_OUT_OF_MEM;
if (cYAML_create_number(lndparams, "map_on_demand",
lnd_cfg->lnd_map_on_demand) == NULL)
- return -1;
+ return LUSTRE_CFG_RC_OUT_OF_MEM;
if (cYAML_create_number(lndparams, "concurrent_sends",
lnd_cfg->lnd_concurrent_sends) == NULL)
- return -1;
+ return LUSTRE_CFG_RC_OUT_OF_MEM;
if (cYAML_create_number(lndparams, "fmr_pool_size",
lnd_cfg->lnd_fmr_pool_size) == NULL)
- return -1;
+ return LUSTRE_CFG_RC_OUT_OF_MEM;
if (cYAML_create_number(lndparams, "fmr_flush_trigger",
lnd_cfg->lnd_fmr_flush_trigger) == NULL)
- return -1;
+ return LUSTRE_CFG_RC_OUT_OF_MEM;
if (cYAML_create_number(lndparams, "fmr_cache",
lnd_cfg->lnd_fmr_cache) == NULL)
- return -1;
- return 0;
+ return LUSTRE_CFG_RC_OUT_OF_MEM;
+
+ return LUSTRE_CFG_RC_NO_ERR;
}
int
-lustre_interface_show_net(struct cYAML *interfaces, unsigned int index,
- bool detail, struct lnet_ioctl_config_data *data,
- struct lnet_ioctl_net_config *net_config)
+lustre_net_show_tunables(struct cYAML *tunables,
+ struct lnet_ioctl_config_lnd_cmn_tunables *cmn)
{
- char ni_index[2]; /* LNET_MAX_INTERFACES is only 16 */
-
- if (strlen(net_config->ni_interfaces[index]) == 0)
- return 0;
-
- snprintf(ni_index, sizeof(ni_index), "%d", index);
- if (cYAML_create_string(interfaces, ni_index,
- net_config->ni_interfaces[index]) == NULL)
- return -1;
-
- if (detail) {
- __u32 net = LNET_NETTYP(LNET_NIDNET(data->cfg_nid));
- struct lnet_ioctl_config_lnd_tunables *lnd_cfg;
- struct cYAML *lndparams;
-
- if (data->cfg_config_u.cfg_net.net_interface_count == 0 ||
- net != O2IBLND)
- return 0;
-
- lndparams = cYAML_create_object(interfaces, "lnd tunables");
- if (lndparams == NULL)
- return -1;
-
- lnd_cfg = (struct lnet_ioctl_config_lnd_tunables *)net_config->cfg_bulk;
- if (lustre_ko2iblnd_show_net(lndparams, lnd_cfg) < 0)
- return -1;
- }
- return 0;
+
+
+ if (cYAML_create_number(tunables, "peer_timeout",
+ cmn->lct_peer_timeout)
+ == NULL)
+ goto out;
+
+ if (cYAML_create_number(tunables, "peer_credits",
+ cmn->lct_peer_tx_credits)
+ == NULL)
+ goto out;
+
+ if (cYAML_create_number(tunables,
+ "peer_buffer_credits",
+ cmn->lct_peer_rtr_credits)
+ == NULL)
+ goto out;
+
+ if (cYAML_create_number(tunables, "credits",
+ cmn->lct_max_tx_credits)
+ == NULL)
+ goto out;
+
+ return LUSTRE_CFG_RC_NO_ERR;
+
+out:
+ return LUSTRE_CFG_RC_OUT_OF_MEM;
+}
+
+int
+lustre_ni_show_tunables(struct cYAML *lnd_tunables,
+ __u32 net_type,
+ struct lnet_lnd_tunables *lnd)
+{
+ int rc = LUSTRE_CFG_RC_NO_ERR;
+
+ if (net_type == O2IBLND)
+ rc = lustre_o2iblnd_show_tun(lnd_tunables,
+ &lnd->lnd_tun_u.lnd_o2ib);
+
+ return rc;
}
static void
-lustre_ko2iblnd_parse_net(struct cYAML *lndparams,
- struct lnet_ioctl_config_lnd_tunables *lnd_cfg)
+yaml_extract_o2ib_tun(struct cYAML *tree,
+ struct lnet_ioctl_config_o2iblnd_tunables *lnd_cfg)
{
struct cYAML *map_on_demand = NULL, *concurrent_sends = NULL;
struct cYAML *fmr_pool_size = NULL, *fmr_cache = NULL;
- struct cYAML *fmr_flush_trigger = NULL;
+ struct cYAML *fmr_flush_trigger = NULL, *lndparams = NULL;
+
+ lndparams = cYAML_get_object_item(tree, "lnd tunables");
+ if (!lndparams)
+ return;
map_on_demand = cYAML_get_object_item(lndparams, "map_on_demand");
- lnd_cfg->lt_tun_u.lt_o2ib.lnd_map_on_demand =
+ lnd_cfg->lnd_map_on_demand =
(map_on_demand) ? map_on_demand->cy_valueint : 0;
concurrent_sends = cYAML_get_object_item(lndparams, "concurrent_sends");
- lnd_cfg->lt_tun_u.lt_o2ib.lnd_concurrent_sends =
+ lnd_cfg->lnd_concurrent_sends =
(concurrent_sends) ? concurrent_sends->cy_valueint : 0;
fmr_pool_size = cYAML_get_object_item(lndparams, "fmr_pool_size");
- lnd_cfg->lt_tun_u.lt_o2ib.lnd_fmr_pool_size =
+ lnd_cfg->lnd_fmr_pool_size =
(fmr_pool_size) ? fmr_pool_size->cy_valueint : 0;
fmr_flush_trigger = cYAML_get_object_item(lndparams,
"fmr_flush_trigger");
- lnd_cfg->lt_tun_u.lt_o2ib.lnd_fmr_flush_trigger =
+ lnd_cfg->lnd_fmr_flush_trigger =
(fmr_flush_trigger) ? fmr_flush_trigger->cy_valueint : 0;
fmr_cache = cYAML_get_object_item(lndparams, "fmr_cache");
- lnd_cfg->lt_tun_u.lt_o2ib.lnd_fmr_cache =
+ lnd_cfg->lnd_fmr_cache =
(fmr_cache) ? fmr_cache->cy_valueint : 0;
}
+
void
-lustre_interface_parse(struct cYAML *lndparams, const char *dev_name,
- struct lnet_ioctl_config_lnd_tunables *lnd_cfg)
+lustre_yaml_extract_lnd_tunables(struct cYAML *tree,
+ __u32 net_type,
+ struct lnet_lnd_tunables *tun)
{
- if (dev_name != NULL && strstr(dev_name, "ib"))
- lustre_ko2iblnd_parse_net(lndparams, lnd_cfg);
+ if (net_type == O2IBLND)
+ yaml_extract_o2ib_tun(tree,
+ &tun->lnd_tun_u.lnd_o2ib);
+
}
+
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <limits.h>
#include <libcfs/util/ioctl.h>
#include <libcfs/util/parser.h>
#include <lnet/lnetctl.h>
+#include <lnet/nidstr.h>
#include "cyaml/cyaml.h"
#include "lnetconfig/liblnetconfig.h"
static int jt_config_lnet(int argc, char **argv);
static int jt_unconfig_lnet(int argc, char **argv);
static int jt_add_route(int argc, char **argv);
-static int jt_add_net(int argc, char **argv);
+static int jt_add_ni(int argc, char **argv);
static int jt_set_routing(int argc, char **argv);
static int jt_del_route(int argc, char **argv);
-static int jt_del_net(int argc, char **argv);
+static int jt_del_ni(int argc, char **argv);
static int jt_show_route(int argc, char **argv);
static int jt_show_net(int argc, char **argv);
static int jt_show_routing(int argc, char **argv);
static int jt_show_stats(int argc, char **argv);
-static int jt_show_peer_credits(int argc, char **argv);
+static int jt_show_peer(int argc, char **argv);
+static int jt_show_numa(int argc, char **argv);
static int jt_set_tiny(int argc, char **argv);
static int jt_set_small(int argc, char **argv);
static int jt_set_large(int argc, char **argv);
+static int jt_set_numa(int argc, char **argv);
+static int jt_add_peer_nid(int argc, char **argv);
+static int jt_del_peer_nid(int argc, char **argv);
+/*static int jt_show_peer(int argc, char **argv);*/
command_t lnet_cmds[] = {
{"configure", jt_config_lnet, 0, "configure lnet\n"
};
command_t net_cmds[] = {
- {"add", jt_add_net, 0, "add a network\n"
+ {"add", jt_add_ni, 0, "add a network\n"
"\t--net: net name (e.g. tcp0)\n"
"\t--if: physical interface (e.g. eth0)\n"
"\t--ip2net: specify networks based on IP address patterns\n"
"\t--peer-buffer-credits: the number of buffer credits per peer\n"
"\t--credits: Network Interface credits\n"
"\t--cpt: CPU Partitions configured net uses (e.g. [0,1]\n"},
- {"del", jt_del_net, 0, "delete a network\n"
- "\t--net: net name (e.g. tcp0)\n"},
+ {"del", jt_del_ni, 0, "delete a network\n"
+ "\t--net: net name (e.g. tcp0)\n"
+ "\t--if: physical interface (e.g. eth0)\n"},
{"show", jt_show_net, 0, "show networks\n"
"\t--net: net name (e.g. tcp0) to filter on\n"
"\t--verbose: display detailed output per network\n"},
{ 0, 0, 0, NULL }
};
-command_t credits_cmds[] = {
- {"show", jt_show_peer_credits, 0, "show peer credits\n"},
+command_t numa_cmds[] = {
+ {"show", jt_show_numa, 0, "show NUMA range\n"},
{ 0, 0, 0, NULL }
};
{"routing", jt_set_routing, 0, "enable/disable routing\n"
"\t0 - disable routing\n"
"\t1 - enable routing\n"},
+ {"numa_range", jt_set_numa, 0, "set NUMA range for NI selection\n"
+ "\tVALUE must be at least 0\n"},
+ { 0, 0, 0, NULL }
+};
+
+command_t peer_cmds[] = {
+ {"add", jt_add_peer_nid, 0, "add a peer NID\n"
+ "\t--prim_nid: Primary NID of the peer. If not provided then the first\n"
+ "\t NID in the list becomes the Primary NID of a newly created\n"
+ "\t peer. \n"
+ "\t--nid: one or more peer NIDs\n"
+ "\t--non_mr: create this peer as not Multi-Rail capable\n"},
+ {"del", jt_del_peer_nid, 0, "delete a peer NID\n"
+ "\t--prim_nid: Primary NID of the peer.\n"
+ "\t--nid: list of NIDs to remove. If none provided,\n"
+ "\t peer is deleted\n"},
+ {"show", jt_show_peer, 0, "show peer information\n"
+ "\t--nid: NID of peer to filter on.\n"
+ "\t--verbose: Include extended statistics\n"},
{ 0, 0, 0, NULL }
};
return rc;
}
+static int jt_set_numa(int argc, char **argv)
+{
+ long int value;
+ int rc;
+ struct cYAML *err_rc = NULL;
+
+ if (handle_help(set_cmds, "set", "numa_range", argc, argv) == 0)
+ return 0;
+
+ rc = parse_long(argv[1], &value);
+ if (rc != 0) {
+ cYAML_build_error(-1, -1, "parser", "set",
+ "cannot parse numa_range value", &err_rc);
+ cYAML_print_tree2file(stderr, err_rc);
+ cYAML_free_tree(err_rc);
+ return -1;
+ }
+
+ rc = lustre_lnet_config_numa_range(value, -1, &err_rc);
+ if (rc != LUSTRE_CFG_RC_NO_ERR)
+ cYAML_print_tree2file(stderr, err_rc);
+
+ cYAML_free_tree(err_rc);
+
+ return rc;
+}
+
static int jt_set_tiny(int argc, char **argv)
{
long int value;
return rc;
}
-static int jt_add_net(int argc, char **argv)
+static int jt_add_ni(int argc, char **argv)
{
- char *network = NULL, *intf = NULL, *ip2net = NULL, *cpt = NULL;
+ char *ip2net = NULL;
long int pto = -1, pc = -1, pbc = -1, cre = -1;
struct cYAML *err_rc = NULL;
- int rc, opt;
+ int rc, opt, cpt_rc = -1;
+ struct lnet_dlc_network_descr nw_descr;
+ struct cfs_expr_list *global_cpts = NULL;
+ struct lnet_ioctl_config_lnd_tunables tunables;
+ bool found = false;
+
+ memset(&tunables, 0, sizeof(tunables));
+ lustre_lnet_init_nw_descr(&nw_descr);
const char *const short_options = "n:i:p:t:c:b:r:s:h";
const struct option long_options[] = {
long_options, NULL)) != -1) {
switch (opt) {
case 'n':
- network = optarg;
+ nw_descr.nw_id = libcfs_str2net(optarg);
break;
case 'i':
- intf = optarg;
+ rc = lustre_lnet_parse_interfaces(optarg, &nw_descr);
+ if (rc != 0) {
+ cYAML_build_error(-1, -1, "ni", "add",
+ "bad interface list",
+ &err_rc);
+ goto failed;
+ }
break;
case 'p':
ip2net = optarg;
}
break;
case 's':
- cpt = optarg;
+ cpt_rc = cfs_expr_list_parse(optarg,
+ strlen(optarg), 0,
+ UINT_MAX, &global_cpts);
break;
case 'h':
print_help(net_cmds, "net", "add");
}
}
- rc = lustre_lnet_config_net(network, intf, ip2net, pto, pc, pbc,
- cre, cpt, -1, NULL, &err_rc);
+ if (pto > 0 || pc > 0 || pbc > 0 || cre > 0) {
+ tunables.lt_cmn.lct_peer_timeout = pto;
+ tunables.lt_cmn.lct_peer_tx_credits = pc;
+ tunables.lt_cmn.lct_peer_rtr_credits = pbc;
+ tunables.lt_cmn.lct_max_tx_credits = cre;
+ found = true;
+ }
+
+ rc = lustre_lnet_config_ni(&nw_descr,
+ (cpt_rc == 0) ? global_cpts: NULL,
+ ip2net, (found) ? &tunables : NULL,
+ -1, &err_rc);
+
+ if (global_cpts != NULL)
+ cfs_expr_list_free(global_cpts);
+failed:
if (rc != LUSTRE_CFG_RC_NO_ERR)
cYAML_print_tree2file(stderr, err_rc);
return rc;
}
-static int jt_del_net(int argc, char **argv)
+static int jt_del_ni(int argc, char **argv)
{
- char *network = NULL;
struct cYAML *err_rc = NULL;
int rc, opt;
+ struct lnet_dlc_network_descr nw_descr;
+
+ lustre_lnet_init_nw_descr(&nw_descr);
- const char *const short_options = "n:h";
+ const char *const short_options = "n:i:h";
const struct option long_options[] = {
{ "net", 1, NULL, 'n' },
+ { "if", 1, NULL, 'i' },
{ "help", 0, NULL, 'h' },
{ NULL, 0, NULL, 0 },
};
long_options, NULL)) != -1) {
switch (opt) {
case 'n':
- network = optarg;
+ nw_descr.nw_id = libcfs_str2net(optarg);
+ break;
+ case 'i':
+ rc = lustre_lnet_parse_interfaces(optarg, &nw_descr);
+ if (rc != 0) {
+ cYAML_build_error(-1, -1, "ni", "add",
+ "bad interface list",
+ &err_rc);
+ goto out;
+ }
break;
case 'h':
print_help(net_cmds, "net", "del");
}
}
- rc = lustre_lnet_del_net(network, -1, &err_rc);
+ rc = lustre_lnet_del_ni(&nw_descr, -1, &err_rc);
+out:
if (rc != LUSTRE_CFG_RC_NO_ERR)
cYAML_print_tree2file(stderr, err_rc);
return rc;
}
-static int jt_show_peer_credits(int argc, char **argv)
+static int jt_show_numa(int argc, char **argv)
{
int rc;
struct cYAML *show_rc = NULL, *err_rc = NULL;
- if (handle_help(credits_cmds, "peer_credits", "show", argc, argv) == 0)
+ if (handle_help(numa_cmds, "numa", "show", argc, argv) == 0)
return 0;
- rc = lustre_lnet_show_peer_credits(-1, &show_rc, &err_rc);
+ rc = lustre_lnet_show_numa_range(-1, &show_rc, &err_rc);
if (rc != LUSTRE_CFG_RC_NO_ERR)
cYAML_print_tree2file(stderr, err_rc);
return Parser_execarg(argc - 1, &argv[1], stats_cmds);
}
-static inline int jt_peer_credits(int argc, char **argv)
+static inline int jt_numa(int argc, char **argv)
{
if (argc < 2)
return CMD_HELP;
if (argc == 2 &&
- handle_help(credits_cmds, "peer_credits", NULL, argc, argv) == 0)
+ handle_help(numa_cmds, "numa", NULL, argc, argv) == 0)
return 0;
- return Parser_execarg(argc - 1, &argv[1], credits_cmds);
+ return Parser_execarg(argc - 1, &argv[1], numa_cmds);
+}
+
+static inline int jt_peers(int argc, char **argv)
+{
+ if (argc < 2)
+ return CMD_HELP;
+
+ if (argc == 2 &&
+ handle_help(peer_cmds, "peer", NULL, argc, argv) == 0)
+ return 0;
+
+ return Parser_execarg(argc - 1, &argv[1], peer_cmds);
}
static inline int jt_set(int argc, char **argv)
break;
}
- if (rc != LUSTRE_CFG_RC_NO_ERR)
- cYAML_print_tree2file(stderr, err_rc);
+ cYAML_print_tree2file(stderr, err_rc);
cYAML_free_tree(err_rc);
cYAML_free_tree(err_rc);
}
+ rc = lustre_lnet_show_peer(NULL, 1, -1, &show_rc, &err_rc);
+ if (rc != LUSTRE_CFG_RC_NO_ERR) {
+ cYAML_print_tree2file(stderr, err_rc);
+ cYAML_free_tree(err_rc);
+ }
+
+ rc = lustre_lnet_show_numa_range(-1, &show_rc, &err_rc);
+ if (rc != LUSTRE_CFG_RC_NO_ERR) {
+ cYAML_print_tree2file(stderr, err_rc);
+ cYAML_free_tree(err_rc);
+ }
+
if (show_rc != NULL) {
cYAML_print_tree2file(f, show_rc);
cYAML_free_tree(show_rc);
return 0;
}
+static int jt_add_peer_nid(int argc, char **argv)
+{
+ char *prim_nid = NULL;
+ char **nids = NULL, **nids2 = NULL;
+ int size = 0;
+ struct cYAML *err_rc = NULL;
+ int rc = LUSTRE_CFG_RC_NO_ERR, opt, i;
+ bool non_mr = false;
+
+ const char *const short_options = "k:n:mh";
+ const struct option long_options[] = {
+ { "prim_nid", 1, NULL, 'k' },
+ { "nid", 1, NULL, 'n' },
+ { "non_mr", 0, NULL, 'm'},
+ { "help", 0, NULL, 'h' },
+ { NULL, 0, NULL, 0 },
+ };
+
+ while ((opt = getopt_long(argc, argv, short_options,
+ long_options, NULL)) != -1) {
+ switch (opt) {
+ case 'k':
+ prim_nid = optarg;
+ break;
+ case 'n':
+ size = lustre_lnet_parse_nids(optarg, nids, size,
+ &nids2);
+ if (nids2 == NULL)
+ goto failed;
+ nids = nids2;
+ rc = LUSTRE_CFG_RC_OUT_OF_MEM;
+ break;
+ case 'm':
+ non_mr = true;
+ break;
+ case 'h':
+ print_help(peer_cmds, "peer", "add");
+ return 0;
+ default:
+ return 0;
+ }
+ }
+
+ rc = lustre_lnet_config_peer_nid(prim_nid, nids, size,
+ !non_mr, -1, &err_rc);
+
+failed:
+ for (i = 0; i < size; i++)
+ free(nids[i]);
+ free(nids);
+
+ if (rc != LUSTRE_CFG_RC_NO_ERR)
+ cYAML_print_tree2file(stderr, err_rc);
+
+ cYAML_free_tree(err_rc);
+
+ return rc;
+}
+
+static int jt_del_peer_nid(int argc, char **argv)
+{
+ char *prim_nid = NULL;
+ char **nids = NULL, **nids2 = NULL;
+ struct cYAML *err_rc = NULL;
+ int rc = LUSTRE_CFG_RC_NO_ERR, opt, i, size = 0;
+
+ const char *const short_options = "k:n:h";
+ const struct option long_options[] = {
+ { "prim_nid", 1, NULL, 'k' },
+ { "nid", 1, NULL, 'n' },
+ { "help", 0, NULL, 'h' },
+ { NULL, 0, NULL, 0 },
+ };
+
+ while ((opt = getopt_long(argc, argv, short_options,
+ long_options, NULL)) != -1) {
+ switch (opt) {
+ case 'k':
+ prim_nid = optarg;
+ break;
+ case 'n':
+ size = lustre_lnet_parse_nids(optarg, nids, size,
+ &nids2);
+ if (nids2 == NULL)
+ goto failed;
+ nids = nids2;
+ rc = LUSTRE_CFG_RC_OUT_OF_MEM;
+ break;
+ case 'h':
+ print_help(peer_cmds, "peer", "del");
+ return 0;
+ default:
+ return 0;
+ }
+ }
+
+ rc = lustre_lnet_del_peer_nid(prim_nid, nids, size, -1, &err_rc);
+
+failed:
+ for (i = 0; i < size; i++)
+ free(nids[i]);
+ free(nids);
+
+ if (rc != LUSTRE_CFG_RC_NO_ERR)
+ cYAML_print_tree2file(stderr, err_rc);
+
+ cYAML_free_tree(err_rc);
+
+ return rc;
+}
+
+static int jt_show_peer(int argc, char **argv)
+{
+ char *nid = NULL;
+ int rc, opt;
+ struct cYAML *err_rc = NULL, *show_rc = NULL;
+ int detail = 0;
+
+ const char *const short_options = "n:vh";
+ const struct option long_options[] = {
+ { "nid", 1, NULL, 'n' },
+ { "verbose", 0, NULL, 'v' },
+ { "help", 0, NULL, 'h' },
+ { NULL, 0, NULL, 0 },
+ };
+
+ while ((opt = getopt_long(argc, argv, short_options,
+ long_options, NULL)) != -1) {
+ switch (opt) {
+ case 'n':
+ nid = optarg;
+ break;
+ case 'v':
+ detail = 1;
+ break;
+ case 'h':
+ print_help(peer_cmds, "peer", "show");
+ return 0;
+ default:
+ return 0;
+ }
+ }
+
+ rc = lustre_lnet_show_peer(nid, detail, -1, &show_rc, &err_rc);
+
+ if (rc != LUSTRE_CFG_RC_NO_ERR)
+ cYAML_print_tree2file(stderr, err_rc);
+ else if (show_rc)
+ cYAML_print_tree(show_rc);
+
+ cYAML_free_tree(err_rc);
+ cYAML_free_tree(show_rc);
+
+ return rc;
+}
+
command_t list[] = {
{"lnet", jt_lnet, 0, "lnet {configure | unconfigure} [--all]"},
{"route", jt_route, 0, "route {add | del | show | help}"},
"--help} FILE.yaml"},
{"export", jt_export, 0, "export {--help} FILE.yaml"},
{"stats", jt_stats, 0, "stats {show | help}"},
- {"peer_credits", jt_peer_credits, 0, "peer_credits {show | help}"},
+ {"numa", jt_numa, 0, "numa {show | help}"},
+ {"peer", jt_peers, 0, "peer {add | del | show | help}"},
{"help", Parser_help, 0, "help"},
{"exit", Parser_quit, 0, "quit"},
{"quit", Parser_quit, 0, "quit"},
.
.br
\-\-verbose: display detailed output per network
+
+.
+.SS "Peer Configuration"
+.TP
+\fBlnetctl peer\fR add
+Configure an LNET peer with at least one supplied NID\. By default, peers are marked as multi-rail capable\. If prim_nid is not specified, the first NID in this list is assumed to be the primary NID for the peer.
+.
+.br
+.
+\-\-nid: one or more peer NIDs to add to the peer\.
+.
+.br
+.
+\-\-prim_nid: Primary NID of the peer\.
+.
+.br
+\-\-non_mr: create this peer as not Multi-Rail capable\.
+.
+.br
+
+.TP
+\fBlnetctl peer\fR del
+Delete a peer NID. The primary NID must be specified. If the removed NID is the primary NID, the peer entry will be deleted.
+.
+.br
+.
+\-\-nid: one or more peer NIDs to remove from the peer\.
+.
+.br
+.
+\-\-prim_nid: Primary NID of the peer\.
+.
+.br
+
+.TP
+\fBlnetctl peer\fR show
+Show configured peers. By default, lists all peers and associated NIDs.
+.
+.br
+.
+\-\-nid: list of primary nids to filter on
+.
+.br
+.
+\-\-verbose: Include extended statistics, including credits and counters.
+.
+.br
+
.
.SS "Route Configuration"
.
.br
.
-.SS "Showing Peer Credits"
-.
-.TP
-\fBlnetctl peer_credits\fR
-Show details on configured peer credits
-.
-.br
-\-> Peer nid
-.
-.br
-\-> State
-.
-.br
-\-> Reference count on the peer
-.
-.br
-\-> Maximum transmit credits
-.
-.br
-\-> Available transmit credits
-.
-.br
-\-> Available router credits
-.
-.br
-\-> Minimum router credits\.
-.
.SH "EXAMPLES"
.
.SS "Initializing LNet after load"
.
.br
.
-.SS "Showing peer credits information"
+.SS "Showing peer information"
.
.IP "\(bu" 4
-lnetctl peer_credits show
+lnetctl peer show
.
.IP "" 0
.
peer:
.
.br
- \- nid: 192\.168\.205\.131@tcp1
+ \- primary nid: 10\.148\.0\.8@o2ib
+.
+.br
+ Multi\-Rail: True
+.
+.br
+ peer ni:
+.
+.br
+ \- nid: 10\.148\.0\.8@o2ib
+.
+.br
+ state: NA
.
.br
- state: down
+ \- primary nid: 10\.148\.0\.20@o2ib
.
.br
- refcount: 4
+ Multi\-Rail: True
.
.br
- max_ni_tx_credits: 8
+ peer ni:
.
.br
- available_tx_credits: 8
+ \- nid: 10\.148\.0\.20@o2ib
.
.br
- available_rtr_credits: 8
+ state: NA
.
.br
- min_rtr_credits: 7
+ \- nid: 10\.148\.0\.25@o2ib
.
.br
- tx_q_num_of_buf: 0
+ state: NA
.
.br
lnet_nid_t rq_self;
/** Peer description (the other side) */
lnet_process_id_t rq_peer;
+ /** Descriptor for the NID from which the peer sent the request. */
+ lnet_process_id_t rq_source;
/**
* service time estimate (secs)
* If the request is not served by this time, it is marked as timed out.
struct ptlrpc_connection *
ptlrpc_connection_get(lnet_process_id_t peer, lnet_nid_t self,
- struct obd_uuid *uuid)
+ struct obd_uuid *uuid)
{
- struct ptlrpc_connection *conn, *conn2;
- ENTRY;
+ struct ptlrpc_connection *conn, *conn2;
+ ENTRY;
- conn = cfs_hash_lookup(conn_hash, &peer);
- if (conn)
- GOTO(out, conn);
+ peer.nid = LNetPrimaryNID(peer.nid);
+ conn = cfs_hash_lookup(conn_hash, &peer);
+ if (conn)
+ GOTO(out, conn);
- OBD_ALLOC_PTR(conn);
- if (!conn)
- RETURN(NULL);
+ OBD_ALLOC_PTR(conn);
+ if (!conn)
+ RETURN(NULL);
- conn->c_peer = peer;
- conn->c_self = self;
+ conn->c_peer = peer;
+ conn->c_self = self;
INIT_HLIST_NODE(&conn->c_hash);
atomic_set(&conn->c_refcount, 1);
- if (uuid)
- obd_str2uuid(&conn->c_remote_uuid, uuid->uuid);
+ if (uuid)
+ obd_str2uuid(&conn->c_remote_uuid, uuid->uuid);
/*
* Add the newly created conn to the hash, on key collision we
* lost a racing addition and must destroy our newly allocated
- * connection. The object which exists in the has will be
+ * connection. The object which exists in the hash will be
* returned and may be compared against out object.
*/
/* In the function below, .hs_keycmp resolves to
if (ev->type == LNET_EVENT_PUT && ev->status == 0)
req->rq_reqdata_len = ev->mlength;
do_gettimeofday(&req->rq_arrival_time);
+ /* Multi-Rail: keep track of both initiator and source NID. */
req->rq_peer = ev->initiator;
+ req->rq_source = ev->source;
req->rq_self = ev->target.nid;
req->rq_rqbd = rqbd;
req->rq_phase = RQ_PHASE_NEW;
CDEBUG(D_INFO, "incoming req@%p x%llu msgsize %u\n",
req, req->rq_xid, ev->mlength);
- CDEBUG(D_RPCTRACE, "peer: %s\n", libcfs_id2str(req->rq_peer));
+ CDEBUG(D_RPCTRACE, "peer: %s (source: %s)\n",
+ libcfs_id2str(req->rq_peer), libcfs_id2str(req->rq_source));
spin_lock(&svcpt->scp_lock);
* over \a conn connection to portal \a portal.
* Returns 0 on success or error code.
*/
-static int ptl_send_buf (lnet_handle_md_t *mdh, void *base, int len,
- lnet_ack_req_t ack, struct ptlrpc_cb_id *cbid,
- struct ptlrpc_connection *conn, int portal, __u64 xid,
- unsigned int offset)
+static int ptl_send_buf(lnet_handle_md_t *mdh, void *base, int len,
+ lnet_ack_req_t ack, struct ptlrpc_cb_id *cbid,
+ lnet_nid_t self, lnet_process_id_t peer_id,
+ int portal, __u64 xid, unsigned int offset,
+ lnet_handle_md_t *bulk_cookie)
{
- int rc;
- lnet_md_t md;
- ENTRY;
+ int rc;
+ lnet_md_t md;
+ ENTRY;
- LASSERT (portal != 0);
- LASSERT (conn != NULL);
- CDEBUG (D_INFO, "conn=%p id %s\n", conn, libcfs_id2str(conn->c_peer));
- md.start = base;
- md.length = len;
- md.threshold = (ack == LNET_ACK_REQ) ? 2 : 1;
- md.options = PTLRPC_MD_OPTIONS;
- md.user_ptr = cbid;
- md.eq_handle = ptlrpc_eq_h;
+ LASSERT (portal != 0);
+ CDEBUG (D_INFO, "peer_id %s\n", libcfs_id2str(peer_id));
+ md.start = base;
+ md.length = len;
+ md.threshold = (ack == LNET_ACK_REQ) ? 2 : 1;
+ md.options = PTLRPC_MD_OPTIONS;
+ md.user_ptr = cbid;
+ md.eq_handle = ptlrpc_eq_h;
+ LNetInvalidateHandle(&md.bulk_handle);
- if (unlikely(ack == LNET_ACK_REQ &&
- OBD_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_ACK, OBD_FAIL_ONCE))){
- /* don't ask for the ack to simulate failing client */
- ack = LNET_NOACK_REQ;
- }
+ if (bulk_cookie) {
+ md.bulk_handle = *bulk_cookie;
+ md.options |= LNET_MD_BULK_HANDLE;
+ }
- rc = LNetMDBind (md, LNET_UNLINK, mdh);
- if (unlikely(rc != 0)) {
- CERROR ("LNetMDBind failed: %d\n", rc);
- LASSERT (rc == -ENOMEM);
- RETURN (-ENOMEM);
- }
+ if (unlikely(ack == LNET_ACK_REQ &&
+ OBD_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_ACK, OBD_FAIL_ONCE))){
+ /* don't ask for the ack to simulate failing client */
+ ack = LNET_NOACK_REQ;
+ }
+
+ rc = LNetMDBind (md, LNET_UNLINK, mdh);
+ if (unlikely(rc != 0)) {
+ CERROR ("LNetMDBind failed: %d\n", rc);
+ LASSERT (rc == -ENOMEM);
+ RETURN (-ENOMEM);
+ }
CDEBUG(D_NET, "Sending %d bytes to portal %d, xid %lld, offset %u\n",
- len, portal, xid, offset);
-
- rc = LNetPut (conn->c_self, *mdh, ack,
- conn->c_peer, portal, xid, offset, 0);
- if (unlikely(rc != 0)) {
- int rc2;
- /* We're going to get an UNLINK event when I unlink below,
- * which will complete just like any other failed send, so
- * I fall through and return success here! */
+ len, portal, xid, offset);
+
+ rc = LNetPut(self, *mdh, ack,
+ peer_id, portal, xid, offset, 0);
+ if (unlikely(rc != 0)) {
+ int rc2;
+ /* We're going to get an UNLINK event when I unlink below,
+ * which will complete just like any other failed send, so
+ * I fall through and return success here! */
CERROR("LNetPut(%s, %d, %lld) failed: %d\n",
- libcfs_id2str(conn->c_peer), portal, xid, rc);
- rc2 = LNetMDUnlink(*mdh);
- LASSERTF(rc2 == 0, "rc2 = %d\n", rc2);
- }
+ libcfs_id2str(peer_id), portal, xid, rc);
+ rc2 = LNetMDUnlink(*mdh);
+ LASSERTF(rc2 == 0, "rc2 = %d\n", rc2);
+ }
- RETURN (0);
+ RETURN (0);
}
static void mdunlink_iterate_helper(lnet_handle_md_t *bd_mds, int count)
int ptlrpc_start_bulk_transfer(struct ptlrpc_bulk_desc *desc)
{
struct obd_export *exp = desc->bd_export;
- struct ptlrpc_connection *conn = exp->exp_connection;
+ lnet_nid_t self_nid;
+ lnet_process_id_t peer_id;
int rc = 0;
__u64 mbits;
int posted_md;
LASSERT(desc->bd_cbid.cbid_fn == server_bulk_callback);
LASSERT(desc->bd_cbid.cbid_arg == desc);
+ /*
+ * Multi-Rail: get the preferred self and peer NIDs from the
+ * request, so they are based on the route taken by the
+ * message.
+ */
+ self_nid = desc->bd_req->rq_self;
+ peer_id = desc->bd_req->rq_source;
+
/* NB total length may be 0 for a read past EOF, so we send 0
* length bulks, since the client expects bulk events.
*
/* Network is about to get at the memory */
if (ptlrpc_is_bulk_put_source(desc->bd_type))
- rc = LNetPut(conn->c_self, desc->bd_mds[posted_md],
- LNET_ACK_REQ, conn->c_peer,
+ rc = LNetPut(self_nid, desc->bd_mds[posted_md],
+ LNET_ACK_REQ, peer_id,
desc->bd_portal, mbits, 0, 0);
else
- rc = LNetGet(conn->c_self, desc->bd_mds[posted_md],
- conn->c_peer, desc->bd_portal, mbits, 0);
+ rc = LNetGet(self_nid, desc->bd_mds[posted_md],
+ peer_id, desc->bd_portal, mbits, 0);
posted_md++;
if (rc != 0) {
CERROR("%s: failed bulk transfer with %s:%u x%llu: "
"rc = %d\n", exp->exp_obd->obd_name,
- libcfs_id2str(conn->c_peer), desc->bd_portal,
+ libcfs_id2str(peer_id), desc->bd_portal,
mbits, rc);
break;
}
CDEBUG(D_NET, "Transferring %u pages %u bytes via portal %d "
"id %s mbits %#llx-%#llx\n", desc->bd_iov_count,
- desc->bd_nob, desc->bd_portal, libcfs_id2str(conn->c_peer),
+ desc->bd_nob, desc->bd_portal, libcfs_id2str(peer_id),
mbits - posted_md, mbits - 1);
RETURN(0);
*/
int ptlrpc_send_reply(struct ptlrpc_request *req, int flags)
{
- struct ptlrpc_reply_state *rs = req->rq_reply_state;
- struct ptlrpc_connection *conn;
- int rc;
+ struct ptlrpc_reply_state *rs = req->rq_reply_state;
+ struct ptlrpc_connection *conn;
+ int rc;
/* We must already have a reply buffer (only ptlrpc_error() may be
* called without one). The reply generated by sptlrpc layer (e.g.
req->rq_sent = cfs_time_current_sec();
- rc = ptl_send_buf (&rs->rs_md_h, rs->rs_repbuf, rs->rs_repdata_len,
- (rs->rs_difficult && !rs->rs_no_ack) ?
- LNET_ACK_REQ : LNET_NOACK_REQ,
- &rs->rs_cb_id, conn,
- ptlrpc_req2svc(req)->srv_rep_portal,
- req->rq_xid, req->rq_reply_off);
+ rc = ptl_send_buf(&rs->rs_md_h, rs->rs_repbuf, rs->rs_repdata_len,
+ (rs->rs_difficult && !rs->rs_no_ack) ?
+ LNET_ACK_REQ : LNET_NOACK_REQ,
+ &rs->rs_cb_id, req->rq_self, req->rq_source,
+ ptlrpc_req2svc(req)->srv_rep_portal,
+ req->rq_xid, req->rq_reply_off, NULL);
out:
if (unlikely(rc != 0))
ptlrpc_req_drop_rs(req);
*/
int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
{
- int rc;
- int rc2;
- int mpflag = 0;
- struct ptlrpc_connection *connection;
- lnet_handle_me_t reply_me_h;
- lnet_md_t reply_md;
+ int rc;
+ int rc2;
+ int mpflag = 0;
+ lnet_handle_md_t bulk_cookie;
+ struct ptlrpc_connection *connection;
+ lnet_handle_me_t reply_me_h;
+ lnet_md_t reply_md;
struct obd_import *imp = request->rq_import;
struct obd_device *obd = imp->imp_obd;
- ENTRY;
+ ENTRY;
+
+ LNetInvalidateHandle(&bulk_cookie);
if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DROP_RPC))
RETURN(0);
if (rc)
GOTO(out, rc);
- /* bulk register should be done after wrap_request() */
- if (request->rq_bulk != NULL) {
- rc = ptlrpc_register_bulk (request);
- if (rc != 0)
- GOTO(out, rc);
- }
+ /* bulk register should be done after wrap_request() */
+ if (request->rq_bulk != NULL) {
+ rc = ptlrpc_register_bulk (request);
+ if (rc != 0)
+ GOTO(out, rc);
+ /*
+ * All the mds in the request will have the same cpt
+ * encoded in the cookie. So we can just get the first
+ * one.
+ */
+ bulk_cookie = request->rq_bulk->bd_mds[0];
+ }
if (!noreply) {
LASSERT (request->rq_replen != 0);
ptlrpc_pinger_sending_on_import(imp);
- DEBUG_REQ(D_INFO, request, "send flg=%x",
- lustre_msg_get_flags(request->rq_reqmsg));
- rc = ptl_send_buf(&request->rq_req_md_h,
- request->rq_reqbuf, request->rq_reqdata_len,
- LNET_NOACK_REQ, &request->rq_req_cbid,
- connection,
- request->rq_request_portal,
- request->rq_xid, 0);
+ DEBUG_REQ(D_INFO, request, "send flg=%x",
+ lustre_msg_get_flags(request->rq_reqmsg));
+ rc = ptl_send_buf(&request->rq_req_md_h,
+ request->rq_reqbuf, request->rq_reqdata_len,
+ LNET_NOACK_REQ, &request->rq_req_cbid,
+ LNET_NID_ANY, connection->c_peer,
+ request->rq_request_portal,
+ request->rq_xid, 0, &bulk_cookie);
if (likely(rc == 0))
GOTO(out, rc);