Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/drivers/block/brd.c
29266 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* Ram backed block device driver.
4
*
5
* Copyright (C) 2007 Nick Piggin
6
* Copyright (C) 2007 Novell Inc.
7
*
8
* Parts derived from drivers/block/rd.c, and drivers/block/loop.c, copyright
9
* of their respective owners.
10
*/
11
12
#include <linux/init.h>
13
#include <linux/initrd.h>
14
#include <linux/module.h>
15
#include <linux/moduleparam.h>
16
#include <linux/major.h>
17
#include <linux/blkdev.h>
18
#include <linux/bio.h>
19
#include <linux/highmem.h>
20
#include <linux/mutex.h>
21
#include <linux/pagemap.h>
22
#include <linux/xarray.h>
23
#include <linux/fs.h>
24
#include <linux/slab.h>
25
#include <linux/backing-dev.h>
26
#include <linux/debugfs.h>
27
28
#include <linux/uaccess.h>
29
30
/*
31
* Each block ramdisk device has a xarray brd_pages of pages that stores
32
* the pages containing the block device's contents.
33
*/
34
struct brd_device {
35
int brd_number;
36
struct gendisk *brd_disk;
37
struct list_head brd_list;
38
39
/*
40
* Backing store of pages. This is the contents of the block device.
41
*/
42
struct xarray brd_pages;
43
u64 brd_nr_pages;
44
};
45
46
/*
47
* Look up and return a brd's page with reference grabbed for a given sector.
48
*/
49
static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector)
50
{
51
struct page *page;
52
XA_STATE(xas, &brd->brd_pages, sector >> PAGE_SECTORS_SHIFT);
53
54
rcu_read_lock();
55
repeat:
56
page = xas_load(&xas);
57
if (xas_retry(&xas, page)) {
58
xas_reset(&xas);
59
goto repeat;
60
}
61
62
if (!page)
63
goto out;
64
65
if (!get_page_unless_zero(page)) {
66
xas_reset(&xas);
67
goto repeat;
68
}
69
70
if (unlikely(page != xas_reload(&xas))) {
71
put_page(page);
72
xas_reset(&xas);
73
goto repeat;
74
}
75
out:
76
rcu_read_unlock();
77
78
return page;
79
}
80
81
/*
82
* Insert a new page for a given sector, if one does not already exist.
83
* The returned page will grab reference.
84
*/
85
static struct page *brd_insert_page(struct brd_device *brd, sector_t sector,
86
blk_opf_t opf)
87
{
88
gfp_t gfp = (opf & REQ_NOWAIT) ? GFP_NOWAIT : GFP_NOIO;
89
struct page *page, *ret;
90
91
page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM);
92
if (!page)
93
return ERR_PTR(-ENOMEM);
94
95
xa_lock(&brd->brd_pages);
96
ret = __xa_cmpxchg(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT, NULL,
97
page, gfp);
98
if (!ret) {
99
brd->brd_nr_pages++;
100
get_page(page);
101
xa_unlock(&brd->brd_pages);
102
return page;
103
}
104
105
if (!xa_is_err(ret)) {
106
get_page(ret);
107
xa_unlock(&brd->brd_pages);
108
put_page(page);
109
return ret;
110
}
111
112
xa_unlock(&brd->brd_pages);
113
put_page(page);
114
return ERR_PTR(xa_err(ret));
115
}
116
117
/*
118
* Free all backing store pages and xarray. This must only be called when
119
* there are no other users of the device.
120
*/
121
static void brd_free_pages(struct brd_device *brd)
122
{
123
struct page *page;
124
pgoff_t idx;
125
126
xa_for_each(&brd->brd_pages, idx, page) {
127
put_page(page);
128
cond_resched();
129
}
130
131
xa_destroy(&brd->brd_pages);
132
}
133
134
/*
135
* Process a single segment. The segment is capped to not cross page boundaries
136
* in both the bio and the brd backing memory.
137
*/
138
static bool brd_rw_bvec(struct brd_device *brd, struct bio *bio)
139
{
140
struct bio_vec bv = bio_iter_iovec(bio, bio->bi_iter);
141
sector_t sector = bio->bi_iter.bi_sector;
142
u32 offset = (sector & (PAGE_SECTORS - 1)) << SECTOR_SHIFT;
143
blk_opf_t opf = bio->bi_opf;
144
struct page *page;
145
void *kaddr;
146
147
bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset);
148
149
page = brd_lookup_page(brd, sector);
150
if (!page && op_is_write(opf)) {
151
page = brd_insert_page(brd, sector, opf);
152
if (IS_ERR(page))
153
goto out_error;
154
}
155
156
kaddr = bvec_kmap_local(&bv);
157
if (op_is_write(opf)) {
158
memcpy_to_page(page, offset, kaddr, bv.bv_len);
159
} else {
160
if (page)
161
memcpy_from_page(kaddr, page, offset, bv.bv_len);
162
else
163
memset(kaddr, 0, bv.bv_len);
164
}
165
kunmap_local(kaddr);
166
167
bio_advance_iter_single(bio, &bio->bi_iter, bv.bv_len);
168
if (page)
169
put_page(page);
170
return true;
171
172
out_error:
173
if (PTR_ERR(page) == -ENOMEM && (opf & REQ_NOWAIT))
174
bio_wouldblock_error(bio);
175
else
176
bio_io_error(bio);
177
return false;
178
}
179
180
static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size)
181
{
182
sector_t aligned_sector = round_up(sector, PAGE_SECTORS);
183
sector_t aligned_end = round_down(
184
sector + (size >> SECTOR_SHIFT), PAGE_SECTORS);
185
struct page *page;
186
187
if (aligned_end <= aligned_sector)
188
return;
189
190
xa_lock(&brd->brd_pages);
191
while (aligned_sector < aligned_end && aligned_sector < rd_size * 2) {
192
page = __xa_erase(&brd->brd_pages, aligned_sector >> PAGE_SECTORS_SHIFT);
193
if (page) {
194
put_page(page);
195
brd->brd_nr_pages--;
196
}
197
aligned_sector += PAGE_SECTORS;
198
}
199
xa_unlock(&brd->brd_pages);
200
}
201
202
static void brd_submit_bio(struct bio *bio)
203
{
204
struct brd_device *brd = bio->bi_bdev->bd_disk->private_data;
205
206
if (unlikely(op_is_discard(bio->bi_opf))) {
207
brd_do_discard(brd, bio->bi_iter.bi_sector,
208
bio->bi_iter.bi_size);
209
bio_endio(bio);
210
return;
211
}
212
213
do {
214
if (!brd_rw_bvec(brd, bio))
215
return;
216
} while (bio->bi_iter.bi_size);
217
218
bio_endio(bio);
219
}
220
221
static const struct block_device_operations brd_fops = {
222
.owner = THIS_MODULE,
223
.submit_bio = brd_submit_bio,
224
};
225
226
/*
227
* And now the modules code and kernel interface.
228
*/
229
static int rd_nr = CONFIG_BLK_DEV_RAM_COUNT;
230
module_param(rd_nr, int, 0444);
231
MODULE_PARM_DESC(rd_nr, "Maximum number of brd devices");
232
233
unsigned long rd_size = CONFIG_BLK_DEV_RAM_SIZE;
234
module_param(rd_size, ulong, 0444);
235
MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes.");
236
237
static int max_part = 1;
238
module_param(max_part, int, 0444);
239
MODULE_PARM_DESC(max_part, "Num Minors to reserve between devices");
240
241
MODULE_DESCRIPTION("Ram backed block device driver");
242
MODULE_LICENSE("GPL");
243
MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR);
244
MODULE_ALIAS("rd");
245
246
#ifndef MODULE
247
/* Legacy boot options - nonmodular */
248
static int __init ramdisk_size(char *str)
249
{
250
rd_size = simple_strtol(str, NULL, 0);
251
return 1;
252
}
253
__setup("ramdisk_size=", ramdisk_size);
254
#endif
255
256
/*
257
* The device scheme is derived from loop.c. Keep them in synch where possible
258
* (should share code eventually).
259
*/
260
static LIST_HEAD(brd_devices);
261
static DEFINE_MUTEX(brd_devices_mutex);
262
static struct dentry *brd_debugfs_dir;
263
264
static struct brd_device *brd_find_or_alloc_device(int i)
265
{
266
struct brd_device *brd;
267
268
mutex_lock(&brd_devices_mutex);
269
list_for_each_entry(brd, &brd_devices, brd_list) {
270
if (brd->brd_number == i) {
271
mutex_unlock(&brd_devices_mutex);
272
return ERR_PTR(-EEXIST);
273
}
274
}
275
276
brd = kzalloc(sizeof(*brd), GFP_KERNEL);
277
if (!brd) {
278
mutex_unlock(&brd_devices_mutex);
279
return ERR_PTR(-ENOMEM);
280
}
281
brd->brd_number = i;
282
list_add_tail(&brd->brd_list, &brd_devices);
283
mutex_unlock(&brd_devices_mutex);
284
return brd;
285
}
286
287
static void brd_free_device(struct brd_device *brd)
288
{
289
mutex_lock(&brd_devices_mutex);
290
list_del(&brd->brd_list);
291
mutex_unlock(&brd_devices_mutex);
292
kfree(brd);
293
}
294
295
static int brd_alloc(int i)
296
{
297
struct brd_device *brd;
298
struct gendisk *disk;
299
char buf[DISK_NAME_LEN];
300
int err = -ENOMEM;
301
struct queue_limits lim = {
302
/*
303
* This is so fdisk will align partitions on 4k, because of
304
* direct_access API needing 4k alignment, returning a PFN
305
* (This is only a problem on very small devices <= 4M,
306
* otherwise fdisk will align on 1M. Regardless this call
307
* is harmless)
308
*/
309
.physical_block_size = PAGE_SIZE,
310
.max_hw_discard_sectors = UINT_MAX,
311
.max_discard_segments = 1,
312
.discard_granularity = PAGE_SIZE,
313
.features = BLK_FEAT_SYNCHRONOUS |
314
BLK_FEAT_NOWAIT,
315
};
316
317
brd = brd_find_or_alloc_device(i);
318
if (IS_ERR(brd))
319
return PTR_ERR(brd);
320
321
xa_init(&brd->brd_pages);
322
323
snprintf(buf, DISK_NAME_LEN, "ram%d", i);
324
if (!IS_ERR_OR_NULL(brd_debugfs_dir))
325
debugfs_create_u64(buf, 0444, brd_debugfs_dir,
326
&brd->brd_nr_pages);
327
328
disk = brd->brd_disk = blk_alloc_disk(&lim, NUMA_NO_NODE);
329
if (IS_ERR(disk)) {
330
err = PTR_ERR(disk);
331
goto out_free_dev;
332
}
333
disk->major = RAMDISK_MAJOR;
334
disk->first_minor = i * max_part;
335
disk->minors = max_part;
336
disk->fops = &brd_fops;
337
disk->private_data = brd;
338
strscpy(disk->disk_name, buf, DISK_NAME_LEN);
339
set_capacity(disk, rd_size * 2);
340
341
err = add_disk(disk);
342
if (err)
343
goto out_cleanup_disk;
344
345
return 0;
346
347
out_cleanup_disk:
348
put_disk(disk);
349
out_free_dev:
350
brd_free_device(brd);
351
return err;
352
}
353
354
static void brd_probe(dev_t dev)
355
{
356
brd_alloc(MINOR(dev) / max_part);
357
}
358
359
static void brd_cleanup(void)
360
{
361
struct brd_device *brd, *next;
362
363
debugfs_remove_recursive(brd_debugfs_dir);
364
365
list_for_each_entry_safe(brd, next, &brd_devices, brd_list) {
366
del_gendisk(brd->brd_disk);
367
put_disk(brd->brd_disk);
368
brd_free_pages(brd);
369
brd_free_device(brd);
370
}
371
}
372
373
static inline void brd_check_and_reset_par(void)
374
{
375
if (unlikely(!max_part))
376
max_part = 1;
377
378
/*
379
* make sure 'max_part' can be divided exactly by (1U << MINORBITS),
380
* otherwise, it is possiable to get same dev_t when adding partitions.
381
*/
382
if ((1U << MINORBITS) % max_part != 0)
383
max_part = 1UL << fls(max_part);
384
385
if (max_part > DISK_MAX_PARTS) {
386
pr_info("brd: max_part can't be larger than %d, reset max_part = %d.\n",
387
DISK_MAX_PARTS, DISK_MAX_PARTS);
388
max_part = DISK_MAX_PARTS;
389
}
390
}
391
392
static int __init brd_init(void)
393
{
394
int err, i;
395
396
/*
397
* brd module now has a feature to instantiate underlying device
398
* structure on-demand, provided that there is an access dev node.
399
*
400
* (1) if rd_nr is specified, create that many upfront. else
401
* it defaults to CONFIG_BLK_DEV_RAM_COUNT
402
* (2) User can further extend brd devices by create dev node themselves
403
* and have kernel automatically instantiate actual device
404
* on-demand. Example:
405
* mknod /path/devnod_name b 1 X # 1 is the rd major
406
* fdisk -l /path/devnod_name
407
* If (X / max_part) was not already created it will be created
408
* dynamically.
409
*/
410
411
brd_check_and_reset_par();
412
413
brd_debugfs_dir = debugfs_create_dir("ramdisk_pages", NULL);
414
415
if (__register_blkdev(RAMDISK_MAJOR, "ramdisk", brd_probe)) {
416
err = -EIO;
417
goto out_free;
418
}
419
420
for (i = 0; i < rd_nr; i++)
421
brd_alloc(i);
422
423
pr_info("brd: module loaded\n");
424
return 0;
425
426
out_free:
427
brd_cleanup();
428
429
pr_info("brd: module NOT loaded !!!\n");
430
return err;
431
}
432
433
static void __exit brd_exit(void)
434
{
435
436
unregister_blkdev(RAMDISK_MAJOR, "ramdisk");
437
brd_cleanup();
438
439
pr_info("brd: module unloaded\n");
440
}
441
442
module_init(brd_init);
443
module_exit(brd_exit);
444
445
446