Whamcloud - gitweb
LU-14798 lnet: add LNet GPU Direct Support
[fs/lustre-release.git] / lnet / lnet / lnet_rdma.c
1 #include <lnet/lnet_rdma.h>
2 #include <libcfs/libcfs.h>
3 #include <lnet/lib-lnet.h>
4
5 #define ERROR_PRINT_DEADLINE 3600
6
7 atomic_t nvfs_shutdown = ATOMIC_INIT(1);
8 struct nvfs_dma_rw_ops *nvfs_ops = NULL;
9 struct percpu_counter nvfs_n_ops;
10
11 static inline long nvfs_count_ops(void)
12 {
13         return percpu_counter_sum(&nvfs_n_ops);
14 }
15
16 static struct nvfs_dma_rw_ops *nvfs_get_ops(void)
17 {
18         if (!nvfs_ops || atomic_read(&nvfs_shutdown))
19                 return NULL;
20
21         percpu_counter_inc(&nvfs_n_ops);
22
23         return nvfs_ops;
24 }
25
26 static inline void nvfs_put_ops(void)
27 {
28         percpu_counter_dec(&nvfs_n_ops);
29 }
30
31 static inline bool nvfs_check_feature_set(struct nvfs_dma_rw_ops *ops)
32 {
33         bool supported = true;
34         static time64_t last_printed;
35
36         if (unlikely(!NVIDIA_FS_CHECK_FT_SGLIST_PREP(ops))) {
37                 if ((ktime_get_seconds() - last_printed) > ERROR_PRINT_DEADLINE)
38                         CDEBUG(D_CONSOLE,
39                                "NVFS sg list preparation callback missing\n");
40                 supported = false;
41         }
42         if (unlikely(!NVIDIA_FS_CHECK_FT_SGLIST_DMA(ops))) {
43                 if ((ktime_get_seconds() - last_printed) > ERROR_PRINT_DEADLINE)
44                         CDEBUG(D_CONSOLE,
45                                "NVFS DMA mapping callbacks missing\n");
46                 supported = false;
47         }
48         if (unlikely(!NVIDIA_FS_CHECK_FT_GPU_PAGE(ops))) {
49                 if ((ktime_get_seconds() - last_printed) > ERROR_PRINT_DEADLINE)
50                         CDEBUG(D_CONSOLE,
51                                "NVFS page identification callback missing\n");
52                 supported = false;
53         }
54         if (unlikely(!NVIDIA_FS_CHECK_FT_DEVICE_PRIORITY(ops))) {
55                 if ((ktime_get_seconds() - last_printed) > ERROR_PRINT_DEADLINE)
56                         CDEBUG(D_CONSOLE,
57                                "NVFS device priority callback not missing\n");
58                 supported = false;
59         }
60
61         if (unlikely(!supported &&
62                      ((ktime_get_seconds() - last_printed) > ERROR_PRINT_DEADLINE)))
63                 last_printed = ktime_get_seconds();
64         else if (supported)
65                 last_printed = 0;
66
67         return supported;
68 }
69
70 int REGISTER_FUNC(struct nvfs_dma_rw_ops *ops)
71 {
72         if (!ops || !nvfs_check_feature_set(ops))
73                 return -EINVAL;
74
75         nvfs_ops = ops;
76         (void)percpu_counter_init(&nvfs_n_ops, 0, GFP_KERNEL);
77         atomic_set(&nvfs_shutdown, 0);
78         CDEBUG(D_NET, "registering nvfs %p\n", ops);
79         return 0;
80 }
81 EXPORT_SYMBOL(REGISTER_FUNC);
82
83 void UNREGISTER_FUNC(void)
84 {
85         (void)atomic_cmpxchg(&nvfs_shutdown, 0, 1);
86         do {
87                 CDEBUG(D_NET, "Attempting to de-register nvfs: %ld\n",
88                        nvfs_count_ops());
89                 msleep(NVFS_HOLD_TIME_MS);
90         } while (nvfs_count_ops());
91         nvfs_ops = NULL;
92         percpu_counter_destroy(&nvfs_n_ops);
93 }
94 EXPORT_SYMBOL(UNREGISTER_FUNC);
95
96 unsigned int
97 lnet_get_dev_prio(struct device *dev, unsigned int dev_idx)
98 {
99         unsigned int dev_prio = UINT_MAX;
100         struct nvfs_dma_rw_ops *nvfs_ops;
101
102         if (!dev)
103                 return dev_prio;
104
105         nvfs_ops = nvfs_get_ops();
106         if (!nvfs_ops)
107                 return dev_prio;
108
109         dev_prio = nvfs_ops->nvfs_device_priority (dev, dev_idx);
110
111         nvfs_put_ops();
112         return dev_prio;
113 }
114 EXPORT_SYMBOL(lnet_get_dev_prio);
115
116 int lnet_rdma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
117                            int nents, enum dma_data_direction direction)
118 {
119         struct nvfs_dma_rw_ops *nvfs_ops = nvfs_get_ops();
120
121         if (nvfs_ops) {
122                 int count;
123
124                 count = nvfs_ops->nvfs_dma_map_sg_attrs(dev,
125                                 sg, nents, direction,
126                                 DMA_ATTR_NO_WARN);
127
128                 if (unlikely((count == NVFS_IO_ERR))) {
129                         nvfs_put_ops();
130                         return -EIO;
131                 }
132
133                 if (unlikely(count == NVFS_CPU_REQ))
134                         nvfs_put_ops();
135                 else
136                         return count;
137         }
138
139         return 0;
140 }
141 EXPORT_SYMBOL(lnet_rdma_map_sg_attrs);
142
143 int lnet_rdma_unmap_sg(struct device *dev,
144                        struct scatterlist *sg, int nents,
145                        enum dma_data_direction direction)
146 {
147         struct nvfs_dma_rw_ops *nvfs_ops = nvfs_get_ops();
148
149         if (nvfs_ops) {
150                 int count;
151
152                 count = nvfs_ops->nvfs_dma_unmap_sg(dev, sg,
153                                                     nents, direction);
154
155                 /* drop the count we got by calling nvfs_get_ops() */
156                 nvfs_put_ops();
157
158                 if (count) {
159                         nvfs_put_ops();
160                         return count;
161                 }
162         }
163
164         return 0;
165 }
166 EXPORT_SYMBOL(lnet_rdma_unmap_sg);
167
168 bool
169 lnet_is_rdma_only_page(struct page *page)
170 {
171         bool found = false;
172         struct nvfs_dma_rw_ops *nvfs_ops;
173
174         if (!page)
175                 return found;
176
177         nvfs_ops = nvfs_get_ops();
178         if (!nvfs_ops)
179                 return found;
180
181         if (!nvfs_ops->nvfs_is_gpu_page(page))
182                 goto out;
183
184         found = true;
185
186 out:
187         nvfs_put_ops();
188         return found;
189 }
190 EXPORT_SYMBOL(lnet_is_rdma_only_page);
191
192 unsigned int
193 lnet_get_dev_idx(struct page *page)
194 {
195         unsigned int dev_idx = UINT_MAX;
196         struct nvfs_dma_rw_ops *nvfs_ops;
197
198         nvfs_ops = nvfs_get_ops();
199         if (!nvfs_ops)
200                 return dev_idx;
201
202         dev_idx = nvfs_ops->nvfs_gpu_index(page);
203
204         nvfs_put_ops();
205         return dev_idx;
206 }
207 EXPORT_SYMBOL(lnet_get_dev_idx);
208