Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/block/blk-mq-dma.c
29264 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* Copyright (C) 2025 Christoph Hellwig
4
*/
5
#include <linux/blk-integrity.h>
6
#include <linux/blk-mq-dma.h>
7
#include "blk.h"
8
9
struct phys_vec {
10
phys_addr_t paddr;
11
u32 len;
12
};
13
14
static bool __blk_map_iter_next(struct blk_map_iter *iter)
15
{
16
if (iter->iter.bi_size)
17
return true;
18
if (!iter->bio || !iter->bio->bi_next)
19
return false;
20
21
iter->bio = iter->bio->bi_next;
22
if (iter->is_integrity) {
23
iter->iter = bio_integrity(iter->bio)->bip_iter;
24
iter->bvecs = bio_integrity(iter->bio)->bip_vec;
25
} else {
26
iter->iter = iter->bio->bi_iter;
27
iter->bvecs = iter->bio->bi_io_vec;
28
}
29
return true;
30
}
31
32
static bool blk_map_iter_next(struct request *req, struct blk_map_iter *iter,
33
struct phys_vec *vec)
34
{
35
unsigned int max_size;
36
struct bio_vec bv;
37
38
if (!iter->iter.bi_size)
39
return false;
40
41
bv = mp_bvec_iter_bvec(iter->bvecs, iter->iter);
42
vec->paddr = bvec_phys(&bv);
43
max_size = get_max_segment_size(&req->q->limits, vec->paddr, UINT_MAX);
44
bv.bv_len = min(bv.bv_len, max_size);
45
bvec_iter_advance_single(iter->bvecs, &iter->iter, bv.bv_len);
46
47
/*
48
* If we are entirely done with this bi_io_vec entry, check if the next
49
* one could be merged into it. This typically happens when moving to
50
* the next bio, but some callers also don't pack bvecs tight.
51
*/
52
while (!iter->iter.bi_size || !iter->iter.bi_bvec_done) {
53
struct bio_vec next;
54
55
if (!__blk_map_iter_next(iter))
56
break;
57
58
next = mp_bvec_iter_bvec(iter->bvecs, iter->iter);
59
if (bv.bv_len + next.bv_len > max_size ||
60
!biovec_phys_mergeable(req->q, &bv, &next))
61
break;
62
63
bv.bv_len += next.bv_len;
64
bvec_iter_advance_single(iter->bvecs, &iter->iter, next.bv_len);
65
}
66
67
vec->len = bv.bv_len;
68
return true;
69
}
70
71
/*
72
* The IOVA-based DMA API wants to be able to coalesce at the minimal IOMMU page
73
* size granularity (which is guaranteed to be <= PAGE_SIZE and usually 4k), so
74
* we need to ensure our segments are aligned to this as well.
75
*
76
* Note that there is no point in using the slightly more complicated IOVA based
77
* path for single segment mappings.
78
*/
79
static inline bool blk_can_dma_map_iova(struct request *req,
80
struct device *dma_dev)
81
{
82
return !((queue_virt_boundary(req->q) + 1) &
83
dma_get_merge_boundary(dma_dev));
84
}
85
86
static bool blk_dma_map_bus(struct blk_dma_iter *iter, struct phys_vec *vec)
87
{
88
iter->addr = pci_p2pdma_bus_addr_map(&iter->p2pdma, vec->paddr);
89
iter->len = vec->len;
90
return true;
91
}
92
93
static bool blk_dma_map_direct(struct request *req, struct device *dma_dev,
94
struct blk_dma_iter *iter, struct phys_vec *vec)
95
{
96
iter->addr = dma_map_page(dma_dev, phys_to_page(vec->paddr),
97
offset_in_page(vec->paddr), vec->len, rq_dma_dir(req));
98
if (dma_mapping_error(dma_dev, iter->addr)) {
99
iter->status = BLK_STS_RESOURCE;
100
return false;
101
}
102
iter->len = vec->len;
103
return true;
104
}
105
106
static bool blk_rq_dma_map_iova(struct request *req, struct device *dma_dev,
107
struct dma_iova_state *state, struct blk_dma_iter *iter,
108
struct phys_vec *vec)
109
{
110
enum dma_data_direction dir = rq_dma_dir(req);
111
unsigned int mapped = 0;
112
int error;
113
114
iter->addr = state->addr;
115
iter->len = dma_iova_size(state);
116
117
do {
118
error = dma_iova_link(dma_dev, state, vec->paddr, mapped,
119
vec->len, dir, 0);
120
if (error)
121
break;
122
mapped += vec->len;
123
} while (blk_map_iter_next(req, &iter->iter, vec));
124
125
error = dma_iova_sync(dma_dev, state, 0, mapped);
126
if (error) {
127
iter->status = errno_to_blk_status(error);
128
return false;
129
}
130
131
return true;
132
}
133
134
static inline void blk_rq_map_iter_init(struct request *rq,
135
struct blk_map_iter *iter)
136
{
137
struct bio *bio = rq->bio;
138
139
if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) {
140
*iter = (struct blk_map_iter) {
141
.bvecs = &rq->special_vec,
142
.iter = {
143
.bi_size = rq->special_vec.bv_len,
144
}
145
};
146
} else if (bio) {
147
*iter = (struct blk_map_iter) {
148
.bio = bio,
149
.bvecs = bio->bi_io_vec,
150
.iter = bio->bi_iter,
151
};
152
} else {
153
/* the internal flush request may not have bio attached */
154
*iter = (struct blk_map_iter) {};
155
}
156
}
157
158
static bool blk_dma_map_iter_start(struct request *req, struct device *dma_dev,
159
struct dma_iova_state *state, struct blk_dma_iter *iter,
160
unsigned int total_len)
161
{
162
struct phys_vec vec;
163
164
memset(&iter->p2pdma, 0, sizeof(iter->p2pdma));
165
iter->status = BLK_STS_OK;
166
167
/*
168
* Grab the first segment ASAP because we'll need it to check for P2P
169
* transfers.
170
*/
171
if (!blk_map_iter_next(req, &iter->iter, &vec))
172
return false;
173
174
switch (pci_p2pdma_state(&iter->p2pdma, dma_dev,
175
phys_to_page(vec.paddr))) {
176
case PCI_P2PDMA_MAP_BUS_ADDR:
177
if (iter->iter.is_integrity)
178
bio_integrity(req->bio)->bip_flags |= BIP_P2P_DMA;
179
else
180
req->cmd_flags |= REQ_P2PDMA;
181
return blk_dma_map_bus(iter, &vec);
182
case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE:
183
/*
184
* P2P transfers through the host bridge are treated the
185
* same as non-P2P transfers below and during unmap.
186
*/
187
case PCI_P2PDMA_MAP_NONE:
188
break;
189
default:
190
iter->status = BLK_STS_INVAL;
191
return false;
192
}
193
194
if (blk_can_dma_map_iova(req, dma_dev) &&
195
dma_iova_try_alloc(dma_dev, state, vec.paddr, total_len))
196
return blk_rq_dma_map_iova(req, dma_dev, state, iter, &vec);
197
return blk_dma_map_direct(req, dma_dev, iter, &vec);
198
}
199
200
/**
201
* blk_rq_dma_map_iter_start - map the first DMA segment for a request
202
* @req: request to map
203
* @dma_dev: device to map to
204
* @state: DMA IOVA state
205
* @iter: block layer DMA iterator
206
*
207
* Start DMA mapping @req to @dma_dev. @state and @iter are provided by the
208
* caller and don't need to be initialized. @state needs to be stored for use
209
* at unmap time, @iter is only needed at map time.
210
*
211
* Returns %false if there is no segment to map, including due to an error, or
212
* %true ft it did map a segment.
213
*
214
* If a segment was mapped, the DMA address for it is returned in @iter.addr and
215
* the length in @iter.len. If no segment was mapped the status code is
216
* returned in @iter.status.
217
*
218
* The caller can call blk_rq_dma_map_coalesce() to check if further segments
219
* need to be mapped after this, or go straight to blk_rq_dma_map_iter_next()
220
* to try to map the following segments.
221
*/
222
bool blk_rq_dma_map_iter_start(struct request *req, struct device *dma_dev,
223
struct dma_iova_state *state, struct blk_dma_iter *iter)
224
{
225
blk_rq_map_iter_init(req, &iter->iter);
226
return blk_dma_map_iter_start(req, dma_dev, state, iter,
227
blk_rq_payload_bytes(req));
228
}
229
EXPORT_SYMBOL_GPL(blk_rq_dma_map_iter_start);
230
231
/**
232
* blk_rq_dma_map_iter_next - map the next DMA segment for a request
233
* @req: request to map
234
* @dma_dev: device to map to
235
* @state: DMA IOVA state
236
* @iter: block layer DMA iterator
237
*
238
* Iterate to the next mapping after a previous call to
239
* blk_rq_dma_map_iter_start(). See there for a detailed description of the
240
* arguments.
241
*
242
* Returns %false if there is no segment to map, including due to an error, or
243
* %true ft it did map a segment.
244
*
245
* If a segment was mapped, the DMA address for it is returned in @iter.addr and
246
* the length in @iter.len. If no segment was mapped the status code is
247
* returned in @iter.status.
248
*/
249
bool blk_rq_dma_map_iter_next(struct request *req, struct device *dma_dev,
250
struct dma_iova_state *state, struct blk_dma_iter *iter)
251
{
252
struct phys_vec vec;
253
254
if (!blk_map_iter_next(req, &iter->iter, &vec))
255
return false;
256
257
if (iter->p2pdma.map == PCI_P2PDMA_MAP_BUS_ADDR)
258
return blk_dma_map_bus(iter, &vec);
259
return blk_dma_map_direct(req, dma_dev, iter, &vec);
260
}
261
EXPORT_SYMBOL_GPL(blk_rq_dma_map_iter_next);
262
263
static inline struct scatterlist *
264
blk_next_sg(struct scatterlist **sg, struct scatterlist *sglist)
265
{
266
if (!*sg)
267
return sglist;
268
269
/*
270
* If the driver previously mapped a shorter list, we could see a
271
* termination bit prematurely unless it fully inits the sg table
272
* on each mapping. We KNOW that there must be more entries here
273
* or the driver would be buggy, so force clear the termination bit
274
* to avoid doing a full sg_init_table() in drivers for each command.
275
*/
276
sg_unmark_end(*sg);
277
return sg_next(*sg);
278
}
279
280
/*
281
* Map a request to scatterlist, return number of sg entries setup. Caller
282
* must make sure sg can hold rq->nr_phys_segments entries.
283
*/
284
int __blk_rq_map_sg(struct request *rq, struct scatterlist *sglist,
285
struct scatterlist **last_sg)
286
{
287
struct blk_map_iter iter;
288
struct phys_vec vec;
289
int nsegs = 0;
290
291
blk_rq_map_iter_init(rq, &iter);
292
while (blk_map_iter_next(rq, &iter, &vec)) {
293
*last_sg = blk_next_sg(last_sg, sglist);
294
sg_set_page(*last_sg, phys_to_page(vec.paddr), vec.len,
295
offset_in_page(vec.paddr));
296
nsegs++;
297
}
298
299
if (*last_sg)
300
sg_mark_end(*last_sg);
301
302
/*
303
* Something must have been wrong if the figured number of
304
* segment is bigger than number of req's physical segments
305
*/
306
WARN_ON(nsegs > blk_rq_nr_phys_segments(rq));
307
308
return nsegs;
309
}
310
EXPORT_SYMBOL(__blk_rq_map_sg);
311
312
#ifdef CONFIG_BLK_DEV_INTEGRITY
313
/**
314
* blk_rq_integrity_dma_map_iter_start - map the first integrity DMA segment
315
* for a request
316
* @req: request to map
317
* @dma_dev: device to map to
318
* @state: DMA IOVA state
319
* @iter: block layer DMA iterator
320
*
321
* Start DMA mapping @req integrity data to @dma_dev. @state and @iter are
322
* provided by the caller and don't need to be initialized. @state needs to be
323
* stored for use at unmap time, @iter is only needed at map time.
324
*
325
* Returns %false if there is no segment to map, including due to an error, or
326
* %true if it did map a segment.
327
*
328
* If a segment was mapped, the DMA address for it is returned in @iter.addr
329
* and the length in @iter.len. If no segment was mapped the status code is
330
* returned in @iter.status.
331
*
332
* The caller can call blk_rq_dma_map_coalesce() to check if further segments
333
* need to be mapped after this, or go straight to blk_rq_dma_map_iter_next()
334
* to try to map the following segments.
335
*/
336
bool blk_rq_integrity_dma_map_iter_start(struct request *req,
337
struct device *dma_dev, struct dma_iova_state *state,
338
struct blk_dma_iter *iter)
339
{
340
unsigned len = bio_integrity_bytes(&req->q->limits.integrity,
341
blk_rq_sectors(req));
342
struct bio *bio = req->bio;
343
344
iter->iter = (struct blk_map_iter) {
345
.bio = bio,
346
.iter = bio_integrity(bio)->bip_iter,
347
.bvecs = bio_integrity(bio)->bip_vec,
348
.is_integrity = true,
349
};
350
return blk_dma_map_iter_start(req, dma_dev, state, iter, len);
351
}
352
EXPORT_SYMBOL_GPL(blk_rq_integrity_dma_map_iter_start);
353
354
/**
355
* blk_rq_integrity_dma_map_iter_start - map the next integrity DMA segment for
356
* a request
357
* @req: request to map
358
* @dma_dev: device to map to
359
* @state: DMA IOVA state
360
* @iter: block layer DMA iterator
361
*
362
* Iterate to the next integrity mapping after a previous call to
363
* blk_rq_integrity_dma_map_iter_start(). See there for a detailed description
364
* of the arguments.
365
*
366
* Returns %false if there is no segment to map, including due to an error, or
367
* %true if it did map a segment.
368
*
369
* If a segment was mapped, the DMA address for it is returned in @iter.addr and
370
* the length in @iter.len. If no segment was mapped the status code is
371
* returned in @iter.status.
372
*/
373
bool blk_rq_integrity_dma_map_iter_next(struct request *req,
374
struct device *dma_dev, struct blk_dma_iter *iter)
375
{
376
struct phys_vec vec;
377
378
if (!blk_map_iter_next(req, &iter->iter, &vec))
379
return false;
380
381
if (iter->p2pdma.map == PCI_P2PDMA_MAP_BUS_ADDR)
382
return blk_dma_map_bus(iter, &vec);
383
return blk_dma_map_direct(req, dma_dev, iter, &vec);
384
}
385
EXPORT_SYMBOL_GPL(blk_rq_integrity_dma_map_iter_next);
386
387
/**
388
* blk_rq_map_integrity_sg - Map integrity metadata into a scatterlist
389
* @rq: request to map
390
* @sglist: target scatterlist
391
*
392
* Description: Map the integrity vectors in request into a
393
* scatterlist. The scatterlist must be big enough to hold all
394
* elements. I.e. sized using blk_rq_count_integrity_sg() or
395
* rq->nr_integrity_segments.
396
*/
397
int blk_rq_map_integrity_sg(struct request *rq, struct scatterlist *sglist)
398
{
399
struct request_queue *q = rq->q;
400
struct scatterlist *sg = NULL;
401
struct bio *bio = rq->bio;
402
unsigned int segments = 0;
403
struct phys_vec vec;
404
405
struct blk_map_iter iter = {
406
.bio = bio,
407
.iter = bio_integrity(bio)->bip_iter,
408
.bvecs = bio_integrity(bio)->bip_vec,
409
.is_integrity = true,
410
};
411
412
while (blk_map_iter_next(rq, &iter, &vec)) {
413
sg = blk_next_sg(&sg, sglist);
414
sg_set_page(sg, phys_to_page(vec.paddr), vec.len,
415
offset_in_page(vec.paddr));
416
segments++;
417
}
418
419
if (sg)
420
sg_mark_end(sg);
421
422
/*
423
* Something must have been wrong if the figured number of segment
424
* is bigger than number of req's physical integrity segments
425
*/
426
BUG_ON(segments > rq->nr_integrity_segments);
427
BUG_ON(segments > queue_max_integrity_segments(q));
428
return segments;
429
}
430
EXPORT_SYMBOL(blk_rq_map_integrity_sg);
431
#endif
432
433