Whamcloud - gitweb
671be86ac406939fb3006c392efee7cf8d2254be
[fs/lustre-release.git] / lnet / lnet / lnet_rdma.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /* This file is part of Lustre, http://www.lustre.org/ */
4
5 #ifdef WITH_GDS
6 #include "nvfs-dma.h"
7 #else
8 #include <lnet/lnet_gds.h>
9 #endif
10
11 #include <lnet/lnet_rdma.h>
12 #include <libcfs/libcfs.h>
13
14 /* MAX / MIN conflict */
15 #include <lnet/lib-lnet.h>
16
17 #define REGSTR2(x) x##_register_nvfs_dma_ops
18 #define REGSTR(x)  REGSTR2(x)
19
20 #define UNREGSTR2(x) x##_unregister_nvfs_dma_ops
21 #define UNREGSTR(x)  UNREGSTR2(x)
22
23 #define MODULE_PREFIX lustre_v1
24
25 #define REGISTER_FUNC REGSTR(MODULE_PREFIX)
26 #define UNREGISTER_FUNC UNREGSTR(MODULE_PREFIX)
27
28 #define NVFS_IO_ERR                     -1
29 #define NVFS_CPU_REQ                    -2
30
31 #define NVFS_HOLD_TIME_MS 1000
32
33 #define ERROR_PRINT_DEADLINE 3600
34
35 atomic_t nvfs_shutdown = ATOMIC_INIT(1);
36 struct nvfs_dma_rw_ops *nvfs_ops = NULL;
37 struct percpu_counter nvfs_n_ops;
38
39 static inline long nvfs_count_ops(void)
40 {
41         return percpu_counter_sum(&nvfs_n_ops);
42 }
43
44 static struct nvfs_dma_rw_ops *nvfs_get_ops(void)
45 {
46         if (!nvfs_ops || atomic_read(&nvfs_shutdown))
47                 return NULL;
48
49         percpu_counter_inc(&nvfs_n_ops);
50
51         return nvfs_ops;
52 }
53
54 static inline void nvfs_put_ops(void)
55 {
56         percpu_counter_dec(&nvfs_n_ops);
57 }
58
59 static inline bool nvfs_check_feature_set(struct nvfs_dma_rw_ops *ops)
60 {
61         bool supported = true;
62         static time64_t last_printed;
63
64         if (unlikely(!NVIDIA_FS_CHECK_FT_SGLIST_PREP(ops))) {
65                 if ((ktime_get_seconds() - last_printed) > ERROR_PRINT_DEADLINE)
66                         CDEBUG(D_CONSOLE,
67                                "NVFS sg list preparation callback missing\n");
68                 supported = false;
69         }
70         if (unlikely(!NVIDIA_FS_CHECK_FT_SGLIST_DMA(ops))) {
71                 if ((ktime_get_seconds() - last_printed) > ERROR_PRINT_DEADLINE)
72                         CDEBUG(D_CONSOLE,
73                                "NVFS DMA mapping callbacks missing\n");
74                 supported = false;
75         }
76         if (unlikely(!NVIDIA_FS_CHECK_FT_GPU_PAGE(ops))) {
77                 if ((ktime_get_seconds() - last_printed) > ERROR_PRINT_DEADLINE)
78                         CDEBUG(D_CONSOLE,
79                                "NVFS page identification callback missing\n");
80                 supported = false;
81         }
82         if (unlikely(!NVIDIA_FS_CHECK_FT_DEVICE_PRIORITY(ops))) {
83                 if ((ktime_get_seconds() - last_printed) > ERROR_PRINT_DEADLINE)
84                         CDEBUG(D_CONSOLE,
85                                "NVFS device priority callback not missing\n");
86                 supported = false;
87         }
88
89         if (unlikely(!supported &&
90                      ((ktime_get_seconds() - last_printed) > ERROR_PRINT_DEADLINE)))
91                 last_printed = ktime_get_seconds();
92         else if (supported)
93                 last_printed = 0;
94
95         return supported;
96 }
97
98 int REGISTER_FUNC(struct nvfs_dma_rw_ops *ops)
99 {
100         if (!ops || !nvfs_check_feature_set(ops))
101                 return -EINVAL;
102
103         nvfs_ops = ops;
104         (void)percpu_counter_init(&nvfs_n_ops, 0, GFP_KERNEL);
105         atomic_set(&nvfs_shutdown, 0);
106         CDEBUG(D_NET, "registering nvfs %p\n", ops);
107         return 0;
108 }
109 EXPORT_SYMBOL(REGISTER_FUNC);
110
111 void UNREGISTER_FUNC(void)
112 {
113         (void)atomic_cmpxchg(&nvfs_shutdown, 0, 1);
114         do {
115                 CDEBUG(D_NET, "Attempting to de-register nvfs: %ld\n",
116                        nvfs_count_ops());
117                 msleep(NVFS_HOLD_TIME_MS);
118         } while (nvfs_count_ops());
119         nvfs_ops = NULL;
120         percpu_counter_destroy(&nvfs_n_ops);
121 }
122 EXPORT_SYMBOL(UNREGISTER_FUNC);
123
124 unsigned int
125 lnet_get_dev_prio(struct device *dev, unsigned int dev_idx)
126 {
127         unsigned int dev_prio = UINT_MAX;
128         struct nvfs_dma_rw_ops *nvfs_ops;
129
130         if (!dev)
131                 return dev_prio;
132
133         nvfs_ops = nvfs_get_ops();
134         if (!nvfs_ops)
135                 return dev_prio;
136
137         dev_prio = nvfs_ops->nvfs_device_priority (dev, dev_idx);
138
139         nvfs_put_ops();
140         return dev_prio;
141 }
142 EXPORT_SYMBOL(lnet_get_dev_prio);
143
144 unsigned int
145 lnet_get_dev_idx(struct page *page)
146 {
147         unsigned int dev_idx = UINT_MAX;
148         struct nvfs_dma_rw_ops *nvfs_ops;
149
150         nvfs_ops = nvfs_get_ops();
151         if (!nvfs_ops)
152                 return dev_idx;
153
154         dev_idx = nvfs_ops->nvfs_gpu_index(page);
155
156         nvfs_put_ops();
157         return dev_idx;
158 }
159
160 int lnet_rdma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
161                            int nents, enum dma_data_direction direction)
162 {
163         struct nvfs_dma_rw_ops *nvfs_ops = nvfs_get_ops();
164
165         if (nvfs_ops) {
166                 int count;
167
168                 count = nvfs_ops->nvfs_dma_map_sg_attrs(dev,
169                                 sg, nents, direction,
170                                 DMA_ATTR_NO_WARN);
171
172                 if (unlikely((count == NVFS_IO_ERR))) {
173                         nvfs_put_ops();
174                         return -EIO;
175                 }
176
177                 if (unlikely(count == NVFS_CPU_REQ))
178                         nvfs_put_ops();
179                 else
180                         return count;
181         }
182
183         return 0;
184 }
185 EXPORT_SYMBOL(lnet_rdma_map_sg_attrs);
186
187 int lnet_rdma_unmap_sg(struct device *dev,
188                        struct scatterlist *sg, int nents,
189                        enum dma_data_direction direction)
190 {
191         struct nvfs_dma_rw_ops *nvfs_ops = nvfs_get_ops();
192
193         if (nvfs_ops) {
194                 int count;
195
196                 count = nvfs_ops->nvfs_dma_unmap_sg(dev, sg,
197                                                     nents, direction);
198
199                 /* drop the count we got by calling nvfs_get_ops() */
200                 nvfs_put_ops();
201
202                 if (count) {
203                         nvfs_put_ops();
204                         return count;
205                 }
206         }
207
208         return 0;
209 }
210 EXPORT_SYMBOL(lnet_rdma_unmap_sg);
211
212 bool
213 lnet_is_rdma_only_page(struct page *page)
214 {
215         bool is_gpu_page = false;
216         struct nvfs_dma_rw_ops *nvfs_ops;
217
218         LASSERT(page != NULL);
219
220         nvfs_ops = nvfs_get_ops();
221         if (nvfs_ops != NULL) {
222                 is_gpu_page = nvfs_ops->nvfs_is_gpu_page(page);
223                 nvfs_put_ops();
224         }
225         return is_gpu_page;
226 }
227 EXPORT_SYMBOL(lnet_is_rdma_only_page);