Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/net/core/devmem.c
29280 views
1
// SPDX-License-Identifier: GPL-2.0-or-later
2
/*
3
* Devmem TCP
4
*
5
* Authors: Mina Almasry <[email protected]>
6
* Willem de Bruijn <[email protected]>
7
* Kaiyuan Zhang <[email protected]
8
*/
9
10
#include <linux/dma-buf.h>
11
#include <linux/genalloc.h>
12
#include <linux/mm.h>
13
#include <linux/netdevice.h>
14
#include <linux/types.h>
15
#include <net/netdev_queues.h>
16
#include <net/netdev_rx_queue.h>
17
#include <net/page_pool/helpers.h>
18
#include <net/page_pool/memory_provider.h>
19
#include <net/sock.h>
20
#include <trace/events/page_pool.h>
21
22
#include "devmem.h"
23
#include "mp_dmabuf_devmem.h"
24
#include "page_pool_priv.h"
25
26
/* Device memory support */
27
28
static DEFINE_XARRAY_FLAGS(net_devmem_dmabuf_bindings, XA_FLAGS_ALLOC1);
29
30
static const struct memory_provider_ops dmabuf_devmem_ops;
31
32
bool net_is_devmem_iov(struct net_iov *niov)
33
{
34
return niov->type == NET_IOV_DMABUF;
35
}
36
37
static void net_devmem_dmabuf_free_chunk_owner(struct gen_pool *genpool,
38
struct gen_pool_chunk *chunk,
39
void *not_used)
40
{
41
struct dmabuf_genpool_chunk_owner *owner = chunk->owner;
42
43
kvfree(owner->area.niovs);
44
kfree(owner);
45
}
46
47
static dma_addr_t net_devmem_get_dma_addr(const struct net_iov *niov)
48
{
49
struct dmabuf_genpool_chunk_owner *owner;
50
51
owner = net_devmem_iov_to_chunk_owner(niov);
52
return owner->base_dma_addr +
53
((dma_addr_t)net_iov_idx(niov) << PAGE_SHIFT);
54
}
55
56
void __net_devmem_dmabuf_binding_free(struct work_struct *wq)
57
{
58
struct net_devmem_dmabuf_binding *binding = container_of(wq, typeof(*binding), unbind_w);
59
60
size_t size, avail;
61
62
gen_pool_for_each_chunk(binding->chunk_pool,
63
net_devmem_dmabuf_free_chunk_owner, NULL);
64
65
size = gen_pool_size(binding->chunk_pool);
66
avail = gen_pool_avail(binding->chunk_pool);
67
68
if (!WARN(size != avail, "can't destroy genpool. size=%zu, avail=%zu",
69
size, avail))
70
gen_pool_destroy(binding->chunk_pool);
71
72
dma_buf_unmap_attachment_unlocked(binding->attachment, binding->sgt,
73
binding->direction);
74
dma_buf_detach(binding->dmabuf, binding->attachment);
75
dma_buf_put(binding->dmabuf);
76
xa_destroy(&binding->bound_rxqs);
77
kvfree(binding->tx_vec);
78
kfree(binding);
79
}
80
81
struct net_iov *
82
net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding)
83
{
84
struct dmabuf_genpool_chunk_owner *owner;
85
unsigned long dma_addr;
86
struct net_iov *niov;
87
ssize_t offset;
88
ssize_t index;
89
90
dma_addr = gen_pool_alloc_owner(binding->chunk_pool, PAGE_SIZE,
91
(void **)&owner);
92
if (!dma_addr)
93
return NULL;
94
95
offset = dma_addr - owner->base_dma_addr;
96
index = offset / PAGE_SIZE;
97
niov = &owner->area.niovs[index];
98
99
niov->pp_magic = 0;
100
niov->pp = NULL;
101
atomic_long_set(&niov->pp_ref_count, 0);
102
103
return niov;
104
}
105
106
void net_devmem_free_dmabuf(struct net_iov *niov)
107
{
108
struct net_devmem_dmabuf_binding *binding = net_devmem_iov_binding(niov);
109
unsigned long dma_addr = net_devmem_get_dma_addr(niov);
110
111
if (WARN_ON(!gen_pool_has_addr(binding->chunk_pool, dma_addr,
112
PAGE_SIZE)))
113
return;
114
115
gen_pool_free(binding->chunk_pool, dma_addr, PAGE_SIZE);
116
}
117
118
void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding)
119
{
120
struct netdev_rx_queue *rxq;
121
unsigned long xa_idx;
122
unsigned int rxq_idx;
123
124
xa_erase(&net_devmem_dmabuf_bindings, binding->id);
125
126
/* Ensure no tx net_devmem_lookup_dmabuf() are in flight after the
127
* erase.
128
*/
129
synchronize_net();
130
131
if (binding->list.next)
132
list_del(&binding->list);
133
134
xa_for_each(&binding->bound_rxqs, xa_idx, rxq) {
135
const struct pp_memory_provider_params mp_params = {
136
.mp_priv = binding,
137
.mp_ops = &dmabuf_devmem_ops,
138
};
139
140
rxq_idx = get_netdev_rx_queue_index(rxq);
141
142
__net_mp_close_rxq(binding->dev, rxq_idx, &mp_params);
143
}
144
145
net_devmem_dmabuf_binding_put(binding);
146
}
147
148
int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,
149
struct net_devmem_dmabuf_binding *binding,
150
struct netlink_ext_ack *extack)
151
{
152
struct pp_memory_provider_params mp_params = {
153
.mp_priv = binding,
154
.mp_ops = &dmabuf_devmem_ops,
155
};
156
struct netdev_rx_queue *rxq;
157
u32 xa_idx;
158
int err;
159
160
err = __net_mp_open_rxq(dev, rxq_idx, &mp_params, extack);
161
if (err)
162
return err;
163
164
rxq = __netif_get_rx_queue(dev, rxq_idx);
165
err = xa_alloc(&binding->bound_rxqs, &xa_idx, rxq, xa_limit_32b,
166
GFP_KERNEL);
167
if (err)
168
goto err_close_rxq;
169
170
return 0;
171
172
err_close_rxq:
173
__net_mp_close_rxq(dev, rxq_idx, &mp_params);
174
return err;
175
}
176
177
struct net_devmem_dmabuf_binding *
178
net_devmem_bind_dmabuf(struct net_device *dev,
179
struct device *dma_dev,
180
enum dma_data_direction direction,
181
unsigned int dmabuf_fd, struct netdev_nl_sock *priv,
182
struct netlink_ext_ack *extack)
183
{
184
struct net_devmem_dmabuf_binding *binding;
185
static u32 id_alloc_next;
186
struct scatterlist *sg;
187
struct dma_buf *dmabuf;
188
unsigned int sg_idx, i;
189
unsigned long virtual;
190
int err;
191
192
if (!dma_dev) {
193
NL_SET_ERR_MSG(extack, "Device doesn't support DMA");
194
return ERR_PTR(-EOPNOTSUPP);
195
}
196
197
dmabuf = dma_buf_get(dmabuf_fd);
198
if (IS_ERR(dmabuf))
199
return ERR_CAST(dmabuf);
200
201
binding = kzalloc_node(sizeof(*binding), GFP_KERNEL,
202
dev_to_node(&dev->dev));
203
if (!binding) {
204
err = -ENOMEM;
205
goto err_put_dmabuf;
206
}
207
208
binding->dev = dev;
209
xa_init_flags(&binding->bound_rxqs, XA_FLAGS_ALLOC);
210
211
refcount_set(&binding->ref, 1);
212
213
mutex_init(&binding->lock);
214
215
binding->dmabuf = dmabuf;
216
binding->direction = direction;
217
218
binding->attachment = dma_buf_attach(binding->dmabuf, dma_dev);
219
if (IS_ERR(binding->attachment)) {
220
err = PTR_ERR(binding->attachment);
221
NL_SET_ERR_MSG(extack, "Failed to bind dmabuf to device");
222
goto err_free_binding;
223
}
224
225
binding->sgt = dma_buf_map_attachment_unlocked(binding->attachment,
226
direction);
227
if (IS_ERR(binding->sgt)) {
228
err = PTR_ERR(binding->sgt);
229
NL_SET_ERR_MSG(extack, "Failed to map dmabuf attachment");
230
goto err_detach;
231
}
232
233
if (direction == DMA_TO_DEVICE) {
234
binding->tx_vec = kvmalloc_array(dmabuf->size / PAGE_SIZE,
235
sizeof(struct net_iov *),
236
GFP_KERNEL);
237
if (!binding->tx_vec) {
238
err = -ENOMEM;
239
goto err_unmap;
240
}
241
}
242
243
/* For simplicity we expect to make PAGE_SIZE allocations, but the
244
* binding can be much more flexible than that. We may be able to
245
* allocate MTU sized chunks here. Leave that for future work...
246
*/
247
binding->chunk_pool = gen_pool_create(PAGE_SHIFT,
248
dev_to_node(&dev->dev));
249
if (!binding->chunk_pool) {
250
err = -ENOMEM;
251
goto err_tx_vec;
252
}
253
254
virtual = 0;
255
for_each_sgtable_dma_sg(binding->sgt, sg, sg_idx) {
256
dma_addr_t dma_addr = sg_dma_address(sg);
257
struct dmabuf_genpool_chunk_owner *owner;
258
size_t len = sg_dma_len(sg);
259
struct net_iov *niov;
260
261
owner = kzalloc_node(sizeof(*owner), GFP_KERNEL,
262
dev_to_node(&dev->dev));
263
if (!owner) {
264
err = -ENOMEM;
265
goto err_free_chunks;
266
}
267
268
owner->area.base_virtual = virtual;
269
owner->base_dma_addr = dma_addr;
270
owner->area.num_niovs = len / PAGE_SIZE;
271
owner->binding = binding;
272
273
err = gen_pool_add_owner(binding->chunk_pool, dma_addr,
274
dma_addr, len, dev_to_node(&dev->dev),
275
owner);
276
if (err) {
277
kfree(owner);
278
err = -EINVAL;
279
goto err_free_chunks;
280
}
281
282
owner->area.niovs = kvmalloc_array(owner->area.num_niovs,
283
sizeof(*owner->area.niovs),
284
GFP_KERNEL);
285
if (!owner->area.niovs) {
286
err = -ENOMEM;
287
goto err_free_chunks;
288
}
289
290
for (i = 0; i < owner->area.num_niovs; i++) {
291
niov = &owner->area.niovs[i];
292
niov->type = NET_IOV_DMABUF;
293
niov->owner = &owner->area;
294
page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov),
295
net_devmem_get_dma_addr(niov));
296
if (direction == DMA_TO_DEVICE)
297
binding->tx_vec[owner->area.base_virtual / PAGE_SIZE + i] = niov;
298
}
299
300
virtual += len;
301
}
302
303
err = xa_alloc_cyclic(&net_devmem_dmabuf_bindings, &binding->id,
304
binding, xa_limit_32b, &id_alloc_next,
305
GFP_KERNEL);
306
if (err < 0)
307
goto err_free_chunks;
308
309
list_add(&binding->list, &priv->bindings);
310
311
return binding;
312
313
err_free_chunks:
314
gen_pool_for_each_chunk(binding->chunk_pool,
315
net_devmem_dmabuf_free_chunk_owner, NULL);
316
gen_pool_destroy(binding->chunk_pool);
317
err_tx_vec:
318
kvfree(binding->tx_vec);
319
err_unmap:
320
dma_buf_unmap_attachment_unlocked(binding->attachment, binding->sgt,
321
direction);
322
err_detach:
323
dma_buf_detach(dmabuf, binding->attachment);
324
err_free_binding:
325
kfree(binding);
326
err_put_dmabuf:
327
dma_buf_put(dmabuf);
328
return ERR_PTR(err);
329
}
330
331
struct net_devmem_dmabuf_binding *net_devmem_lookup_dmabuf(u32 id)
332
{
333
struct net_devmem_dmabuf_binding *binding;
334
335
rcu_read_lock();
336
binding = xa_load(&net_devmem_dmabuf_bindings, id);
337
if (binding) {
338
if (!net_devmem_dmabuf_binding_get(binding))
339
binding = NULL;
340
}
341
rcu_read_unlock();
342
343
return binding;
344
}
345
346
void net_devmem_get_net_iov(struct net_iov *niov)
347
{
348
net_devmem_dmabuf_binding_get(net_devmem_iov_binding(niov));
349
}
350
351
void net_devmem_put_net_iov(struct net_iov *niov)
352
{
353
net_devmem_dmabuf_binding_put(net_devmem_iov_binding(niov));
354
}
355
356
struct net_devmem_dmabuf_binding *net_devmem_get_binding(struct sock *sk,
357
unsigned int dmabuf_id)
358
{
359
struct net_devmem_dmabuf_binding *binding;
360
struct dst_entry *dst = __sk_dst_get(sk);
361
int err = 0;
362
363
binding = net_devmem_lookup_dmabuf(dmabuf_id);
364
if (!binding || !binding->tx_vec) {
365
err = -EINVAL;
366
goto out_err;
367
}
368
369
/* The dma-addrs in this binding are only reachable to the corresponding
370
* net_device.
371
*/
372
if (!dst || !dst->dev || dst->dev->ifindex != binding->dev->ifindex) {
373
err = -ENODEV;
374
goto out_err;
375
}
376
377
return binding;
378
379
out_err:
380
if (binding)
381
net_devmem_dmabuf_binding_put(binding);
382
383
return ERR_PTR(err);
384
}
385
386
struct net_iov *
387
net_devmem_get_niov_at(struct net_devmem_dmabuf_binding *binding,
388
size_t virt_addr, size_t *off, size_t *size)
389
{
390
if (virt_addr >= binding->dmabuf->size)
391
return NULL;
392
393
*off = virt_addr % PAGE_SIZE;
394
*size = PAGE_SIZE - *off;
395
396
return binding->tx_vec[virt_addr / PAGE_SIZE];
397
}
398
399
/*** "Dmabuf devmem memory provider" ***/
400
401
int mp_dmabuf_devmem_init(struct page_pool *pool)
402
{
403
struct net_devmem_dmabuf_binding *binding = pool->mp_priv;
404
405
if (!binding)
406
return -EINVAL;
407
408
/* dma-buf dma addresses do not need and should not be used with
409
* dma_sync_for_cpu/device. Force disable dma_sync.
410
*/
411
pool->dma_sync = false;
412
pool->dma_sync_for_cpu = false;
413
414
if (pool->p.order != 0)
415
return -E2BIG;
416
417
net_devmem_dmabuf_binding_get(binding);
418
return 0;
419
}
420
421
netmem_ref mp_dmabuf_devmem_alloc_netmems(struct page_pool *pool, gfp_t gfp)
422
{
423
struct net_devmem_dmabuf_binding *binding = pool->mp_priv;
424
struct net_iov *niov;
425
netmem_ref netmem;
426
427
niov = net_devmem_alloc_dmabuf(binding);
428
if (!niov)
429
return 0;
430
431
netmem = net_iov_to_netmem(niov);
432
433
page_pool_set_pp_info(pool, netmem);
434
435
pool->pages_state_hold_cnt++;
436
trace_page_pool_state_hold(pool, netmem, pool->pages_state_hold_cnt);
437
return netmem;
438
}
439
440
void mp_dmabuf_devmem_destroy(struct page_pool *pool)
441
{
442
struct net_devmem_dmabuf_binding *binding = pool->mp_priv;
443
444
net_devmem_dmabuf_binding_put(binding);
445
}
446
447
bool mp_dmabuf_devmem_release_page(struct page_pool *pool, netmem_ref netmem)
448
{
449
long refcount = atomic_long_read(netmem_get_pp_ref_count_ref(netmem));
450
451
if (WARN_ON_ONCE(!netmem_is_net_iov(netmem)))
452
return false;
453
454
if (WARN_ON_ONCE(refcount != 1))
455
return false;
456
457
page_pool_clear_pp_info(netmem);
458
459
net_devmem_free_dmabuf(netmem_to_net_iov(netmem));
460
461
/* We don't want the page pool put_page()ing our net_iovs. */
462
return false;
463
}
464
465
static int mp_dmabuf_devmem_nl_fill(void *mp_priv, struct sk_buff *rsp,
466
struct netdev_rx_queue *rxq)
467
{
468
const struct net_devmem_dmabuf_binding *binding = mp_priv;
469
int type = rxq ? NETDEV_A_QUEUE_DMABUF : NETDEV_A_PAGE_POOL_DMABUF;
470
471
return nla_put_u32(rsp, type, binding->id);
472
}
473
474
static void mp_dmabuf_devmem_uninstall(void *mp_priv,
475
struct netdev_rx_queue *rxq)
476
{
477
struct net_devmem_dmabuf_binding *binding = mp_priv;
478
struct netdev_rx_queue *bound_rxq;
479
unsigned long xa_idx;
480
481
xa_for_each(&binding->bound_rxqs, xa_idx, bound_rxq) {
482
if (bound_rxq == rxq) {
483
xa_erase(&binding->bound_rxqs, xa_idx);
484
if (xa_empty(&binding->bound_rxqs)) {
485
mutex_lock(&binding->lock);
486
binding->dev = NULL;
487
mutex_unlock(&binding->lock);
488
}
489
break;
490
}
491
}
492
}
493
494
static const struct memory_provider_ops dmabuf_devmem_ops = {
495
.init = mp_dmabuf_devmem_init,
496
.destroy = mp_dmabuf_devmem_destroy,
497
.alloc_netmems = mp_dmabuf_devmem_alloc_netmems,
498
.release_netmem = mp_dmabuf_devmem_release_page,
499
.nl_fill = mp_dmabuf_devmem_nl_fill,
500
.uninstall = mp_dmabuf_devmem_uninstall,
501
};
502
503