Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/fs/ext2/ialloc.c
29267 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* linux/fs/ext2/ialloc.c
4
*
5
* Copyright (C) 1992, 1993, 1994, 1995
6
* Remy Card ([email protected])
7
* Laboratoire MASI - Institut Blaise Pascal
8
* Universite Pierre et Marie Curie (Paris VI)
9
*
10
* BSD ufs-inspired inode and directory allocation by
11
* Stephen Tweedie ([email protected]), 1993
12
* Big-endian to little-endian byte-swapping/bitmaps by
13
* David S. Miller ([email protected]), 1995
14
*/
15
16
#include <linux/quotaops.h>
17
#include <linux/sched.h>
18
#include <linux/backing-dev.h>
19
#include <linux/buffer_head.h>
20
#include <linux/random.h>
21
#include "ext2.h"
22
#include "xattr.h"
23
#include "acl.h"
24
25
/*
26
* ialloc.c contains the inodes allocation and deallocation routines
27
*/
28
29
/*
30
* The free inodes are managed by bitmaps. A file system contains several
31
* blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap
32
* block for inodes, N blocks for the inode table and data blocks.
33
*
34
* The file system contains group descriptors which are located after the
35
* super block. Each descriptor contains the number of the bitmap block and
36
* the free blocks count in the block.
37
*/
38
39
40
/*
41
* Read the inode allocation bitmap for a given block_group, reading
42
* into the specified slot in the superblock's bitmap cache.
43
*
44
* Return buffer_head of bitmap on success or NULL.
45
*/
46
static struct buffer_head *
47
read_inode_bitmap(struct super_block * sb, unsigned long block_group)
48
{
49
struct ext2_group_desc *desc;
50
struct buffer_head *bh = NULL;
51
52
desc = ext2_get_group_desc(sb, block_group, NULL);
53
if (!desc)
54
goto error_out;
55
56
bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap));
57
if (!bh)
58
ext2_error(sb, "read_inode_bitmap",
59
"Cannot read inode bitmap - "
60
"block_group = %lu, inode_bitmap = %u",
61
block_group, le32_to_cpu(desc->bg_inode_bitmap));
62
error_out:
63
return bh;
64
}
65
66
static void ext2_release_inode(struct super_block *sb, int group, int dir)
67
{
68
struct ext2_group_desc * desc;
69
struct buffer_head *bh;
70
71
desc = ext2_get_group_desc(sb, group, &bh);
72
if (!desc) {
73
ext2_error(sb, "ext2_release_inode",
74
"can't get descriptor for group %d", group);
75
return;
76
}
77
78
spin_lock(sb_bgl_lock(EXT2_SB(sb), group));
79
le16_add_cpu(&desc->bg_free_inodes_count, 1);
80
if (dir)
81
le16_add_cpu(&desc->bg_used_dirs_count, -1);
82
spin_unlock(sb_bgl_lock(EXT2_SB(sb), group));
83
percpu_counter_inc(&EXT2_SB(sb)->s_freeinodes_counter);
84
if (dir)
85
percpu_counter_dec(&EXT2_SB(sb)->s_dirs_counter);
86
mark_buffer_dirty(bh);
87
}
88
89
/*
90
* NOTE! When we get the inode, we're the only people
91
* that have access to it, and as such there are no
92
* race conditions we have to worry about. The inode
93
* is not on the hash-lists, and it cannot be reached
94
* through the filesystem because the directory entry
95
* has been deleted earlier.
96
*
97
* HOWEVER: we must make sure that we get no aliases,
98
* which means that we have to call "clear_inode()"
99
* _before_ we mark the inode not in use in the inode
100
* bitmaps. Otherwise a newly created file might use
101
* the same inode number (not actually the same pointer
102
* though), and then we'd have two inodes sharing the
103
* same inode number and space on the harddisk.
104
*/
105
void ext2_free_inode (struct inode * inode)
106
{
107
struct super_block * sb = inode->i_sb;
108
int is_directory;
109
unsigned long ino;
110
struct buffer_head *bitmap_bh;
111
unsigned long block_group;
112
unsigned long bit;
113
struct ext2_super_block * es;
114
115
ino = inode->i_ino;
116
ext2_debug ("freeing inode %lu\n", ino);
117
118
/*
119
* Note: we must free any quota before locking the superblock,
120
* as writing the quota to disk may need the lock as well.
121
*/
122
/* Quota is already initialized in iput() */
123
dquot_free_inode(inode);
124
dquot_drop(inode);
125
126
es = EXT2_SB(sb)->s_es;
127
is_directory = S_ISDIR(inode->i_mode);
128
129
if (ino < EXT2_FIRST_INO(sb) ||
130
ino > le32_to_cpu(es->s_inodes_count)) {
131
ext2_error (sb, "ext2_free_inode",
132
"reserved or nonexistent inode %lu", ino);
133
return;
134
}
135
block_group = (ino - 1) / EXT2_INODES_PER_GROUP(sb);
136
bit = (ino - 1) % EXT2_INODES_PER_GROUP(sb);
137
bitmap_bh = read_inode_bitmap(sb, block_group);
138
if (!bitmap_bh)
139
return;
140
141
/* Ok, now we can actually update the inode bitmaps.. */
142
if (!ext2_clear_bit_atomic(sb_bgl_lock(EXT2_SB(sb), block_group),
143
bit, (void *) bitmap_bh->b_data))
144
ext2_error (sb, "ext2_free_inode",
145
"bit already cleared for inode %lu", ino);
146
else
147
ext2_release_inode(sb, block_group, is_directory);
148
mark_buffer_dirty(bitmap_bh);
149
if (sb->s_flags & SB_SYNCHRONOUS)
150
sync_dirty_buffer(bitmap_bh);
151
152
brelse(bitmap_bh);
153
}
154
155
/*
156
* We perform asynchronous prereading of the new inode's inode block when
157
* we create the inode, in the expectation that the inode will be written
158
* back soon. There are two reasons:
159
*
160
* - When creating a large number of files, the async prereads will be
161
* nicely merged into large reads
162
* - When writing out a large number of inodes, we don't need to keep on
163
* stalling the writes while we read the inode block.
164
*
165
* FIXME: ext2_get_group_desc() needs to be simplified.
166
*/
167
static void ext2_preread_inode(struct inode *inode)
168
{
169
unsigned long block_group;
170
unsigned long offset;
171
unsigned long block;
172
struct ext2_group_desc * gdp;
173
174
block_group = (inode->i_ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb);
175
gdp = ext2_get_group_desc(inode->i_sb, block_group, NULL);
176
if (gdp == NULL)
177
return;
178
179
/*
180
* Figure out the offset within the block group inode table
181
*/
182
offset = ((inode->i_ino - 1) % EXT2_INODES_PER_GROUP(inode->i_sb)) *
183
EXT2_INODE_SIZE(inode->i_sb);
184
block = le32_to_cpu(gdp->bg_inode_table) +
185
(offset >> EXT2_BLOCK_SIZE_BITS(inode->i_sb));
186
sb_breadahead(inode->i_sb, block);
187
}
188
189
/*
190
* There are two policies for allocating an inode. If the new inode is
191
* a directory, then a forward search is made for a block group with both
192
* free space and a low directory-to-inode ratio; if that fails, then of
193
* the groups with above-average free space, that group with the fewest
194
* directories already is chosen.
195
*
196
* For other inodes, search forward from the parent directory\'s block
197
* group to find a free inode.
198
*/
199
static int find_group_dir(struct super_block *sb, struct inode *parent)
200
{
201
int ngroups = EXT2_SB(sb)->s_groups_count;
202
int avefreei = ext2_count_free_inodes(sb) / ngroups;
203
struct ext2_group_desc *desc, *best_desc = NULL;
204
int group, best_group = -1;
205
206
for (group = 0; group < ngroups; group++) {
207
desc = ext2_get_group_desc (sb, group, NULL);
208
if (!desc || !desc->bg_free_inodes_count)
209
continue;
210
if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
211
continue;
212
if (!best_desc ||
213
(le16_to_cpu(desc->bg_free_blocks_count) >
214
le16_to_cpu(best_desc->bg_free_blocks_count))) {
215
best_group = group;
216
best_desc = desc;
217
}
218
}
219
220
return best_group;
221
}
222
223
/*
224
* Orlov's allocator for directories.
225
*
226
* We always try to spread first-level directories.
227
*
228
* If there are blockgroups with both free inodes and free blocks counts
229
* not worse than average we return one with smallest directory count.
230
* Otherwise we simply return a random group.
231
*
232
* For the rest rules look so:
233
*
234
* It's OK to put directory into a group unless
235
* it has too many directories already (max_dirs) or
236
* it has too few free inodes left (min_inodes) or
237
* it has too few free blocks left (min_blocks) or
238
* it's already running too large debt (max_debt).
239
* Parent's group is preferred, if it doesn't satisfy these
240
* conditions we search cyclically through the rest. If none
241
* of the groups look good we just look for a group with more
242
* free inodes than average (starting at parent's group).
243
*
244
* Debt is incremented each time we allocate a directory and decremented
245
* when we allocate an inode, within 0--255.
246
*/
247
248
#define INODE_COST 64
249
#define BLOCK_COST 256
250
251
static int find_group_orlov(struct super_block *sb, struct inode *parent)
252
{
253
int parent_group = EXT2_I(parent)->i_block_group;
254
struct ext2_sb_info *sbi = EXT2_SB(sb);
255
struct ext2_super_block *es = sbi->s_es;
256
int ngroups = sbi->s_groups_count;
257
int inodes_per_group = EXT2_INODES_PER_GROUP(sb);
258
int freei;
259
int avefreei;
260
int free_blocks;
261
int avefreeb;
262
int blocks_per_dir;
263
int ndirs;
264
int max_debt, max_dirs, min_blocks, min_inodes;
265
int group = -1, i;
266
struct ext2_group_desc *desc;
267
268
freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
269
avefreei = freei / ngroups;
270
free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
271
avefreeb = free_blocks / ngroups;
272
ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
273
274
if ((parent == d_inode(sb->s_root)) ||
275
(EXT2_I(parent)->i_flags & EXT2_TOPDIR_FL)) {
276
int best_ndir = inodes_per_group;
277
int best_group = -1;
278
279
parent_group = get_random_u32_below(ngroups);
280
for (i = 0; i < ngroups; i++) {
281
group = (parent_group + i) % ngroups;
282
desc = ext2_get_group_desc (sb, group, NULL);
283
if (!desc || !desc->bg_free_inodes_count)
284
continue;
285
if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir)
286
continue;
287
if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
288
continue;
289
if (le16_to_cpu(desc->bg_free_blocks_count) < avefreeb)
290
continue;
291
best_group = group;
292
best_ndir = le16_to_cpu(desc->bg_used_dirs_count);
293
}
294
if (best_group >= 0) {
295
group = best_group;
296
goto found;
297
}
298
goto fallback;
299
}
300
301
if (ndirs == 0)
302
ndirs = 1; /* percpu_counters are approximate... */
303
304
blocks_per_dir = (le32_to_cpu(es->s_blocks_count)-free_blocks) / ndirs;
305
306
max_dirs = ndirs / ngroups + inodes_per_group / 16;
307
min_inodes = avefreei - inodes_per_group / 4;
308
min_blocks = avefreeb - EXT2_BLOCKS_PER_GROUP(sb) / 4;
309
310
max_debt = EXT2_BLOCKS_PER_GROUP(sb) / max(blocks_per_dir, BLOCK_COST);
311
if (max_debt * INODE_COST > inodes_per_group)
312
max_debt = inodes_per_group / INODE_COST;
313
if (max_debt > 255)
314
max_debt = 255;
315
if (max_debt == 0)
316
max_debt = 1;
317
318
for (i = 0; i < ngroups; i++) {
319
group = (parent_group + i) % ngroups;
320
desc = ext2_get_group_desc (sb, group, NULL);
321
if (!desc || !desc->bg_free_inodes_count)
322
continue;
323
if (sbi->s_debts[group] >= max_debt)
324
continue;
325
if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs)
326
continue;
327
if (le16_to_cpu(desc->bg_free_inodes_count) < min_inodes)
328
continue;
329
if (le16_to_cpu(desc->bg_free_blocks_count) < min_blocks)
330
continue;
331
goto found;
332
}
333
334
fallback:
335
for (i = 0; i < ngroups; i++) {
336
group = (parent_group + i) % ngroups;
337
desc = ext2_get_group_desc (sb, group, NULL);
338
if (!desc || !desc->bg_free_inodes_count)
339
continue;
340
if (le16_to_cpu(desc->bg_free_inodes_count) >= avefreei)
341
goto found;
342
}
343
344
if (avefreei) {
345
/*
346
* The free-inodes counter is approximate, and for really small
347
* filesystems the above test can fail to find any blockgroups
348
*/
349
avefreei = 0;
350
goto fallback;
351
}
352
353
return -1;
354
355
found:
356
return group;
357
}
358
359
static int find_group_other(struct super_block *sb, struct inode *parent)
360
{
361
int parent_group = EXT2_I(parent)->i_block_group;
362
int ngroups = EXT2_SB(sb)->s_groups_count;
363
struct ext2_group_desc *desc;
364
int group, i;
365
366
/*
367
* Try to place the inode in its parent directory
368
*/
369
group = parent_group;
370
desc = ext2_get_group_desc (sb, group, NULL);
371
if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
372
le16_to_cpu(desc->bg_free_blocks_count))
373
goto found;
374
375
/*
376
* We're going to place this inode in a different blockgroup from its
377
* parent. We want to cause files in a common directory to all land in
378
* the same blockgroup. But we want files which are in a different
379
* directory which shares a blockgroup with our parent to land in a
380
* different blockgroup.
381
*
382
* So add our directory's i_ino into the starting point for the hash.
383
*/
384
group = (group + parent->i_ino) % ngroups;
385
386
/*
387
* Use a quadratic hash to find a group with a free inode and some
388
* free blocks.
389
*/
390
for (i = 1; i < ngroups; i <<= 1) {
391
group += i;
392
if (group >= ngroups)
393
group -= ngroups;
394
desc = ext2_get_group_desc (sb, group, NULL);
395
if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
396
le16_to_cpu(desc->bg_free_blocks_count))
397
goto found;
398
}
399
400
/*
401
* That failed: try linear search for a free inode, even if that group
402
* has no free blocks.
403
*/
404
group = parent_group;
405
for (i = 0; i < ngroups; i++) {
406
if (++group >= ngroups)
407
group = 0;
408
desc = ext2_get_group_desc (sb, group, NULL);
409
if (desc && le16_to_cpu(desc->bg_free_inodes_count))
410
goto found;
411
}
412
413
return -1;
414
415
found:
416
return group;
417
}
418
419
struct inode *ext2_new_inode(struct inode *dir, umode_t mode,
420
const struct qstr *qstr)
421
{
422
struct super_block *sb;
423
struct buffer_head *bitmap_bh = NULL;
424
struct buffer_head *bh2;
425
int group, i;
426
ino_t ino = 0;
427
struct inode * inode;
428
struct ext2_group_desc *gdp;
429
struct ext2_super_block *es;
430
struct ext2_inode_info *ei;
431
struct ext2_sb_info *sbi;
432
int err;
433
434
sb = dir->i_sb;
435
inode = new_inode(sb);
436
if (!inode)
437
return ERR_PTR(-ENOMEM);
438
439
ei = EXT2_I(inode);
440
sbi = EXT2_SB(sb);
441
es = sbi->s_es;
442
if (S_ISDIR(mode)) {
443
if (test_opt(sb, OLDALLOC))
444
group = find_group_dir(sb, dir);
445
else
446
group = find_group_orlov(sb, dir);
447
} else
448
group = find_group_other(sb, dir);
449
450
if (group == -1) {
451
err = -ENOSPC;
452
goto fail;
453
}
454
455
for (i = 0; i < sbi->s_groups_count; i++) {
456
gdp = ext2_get_group_desc(sb, group, &bh2);
457
if (!gdp) {
458
if (++group == sbi->s_groups_count)
459
group = 0;
460
continue;
461
}
462
brelse(bitmap_bh);
463
bitmap_bh = read_inode_bitmap(sb, group);
464
if (!bitmap_bh) {
465
err = -EIO;
466
goto fail;
467
}
468
ino = 0;
469
470
repeat_in_this_group:
471
ino = ext2_find_next_zero_bit((unsigned long *)bitmap_bh->b_data,
472
EXT2_INODES_PER_GROUP(sb), ino);
473
if (ino >= EXT2_INODES_PER_GROUP(sb)) {
474
/*
475
* Rare race: find_group_xx() decided that there were
476
* free inodes in this group, but by the time we tried
477
* to allocate one, they're all gone. This can also
478
* occur because the counters which find_group_orlov()
479
* uses are approximate. So just go and search the
480
* next block group.
481
*/
482
if (++group == sbi->s_groups_count)
483
group = 0;
484
continue;
485
}
486
if (ext2_set_bit_atomic(sb_bgl_lock(sbi, group),
487
ino, bitmap_bh->b_data)) {
488
/* we lost this inode */
489
if (++ino >= EXT2_INODES_PER_GROUP(sb)) {
490
/* this group is exhausted, try next group */
491
if (++group == sbi->s_groups_count)
492
group = 0;
493
continue;
494
}
495
/* try to find free inode in the same group */
496
goto repeat_in_this_group;
497
}
498
goto got;
499
}
500
501
/*
502
* Scanned all blockgroups.
503
*/
504
brelse(bitmap_bh);
505
err = -ENOSPC;
506
goto fail;
507
got:
508
mark_buffer_dirty(bitmap_bh);
509
if (sb->s_flags & SB_SYNCHRONOUS)
510
sync_dirty_buffer(bitmap_bh);
511
brelse(bitmap_bh);
512
513
ino += group * EXT2_INODES_PER_GROUP(sb) + 1;
514
if (ino < EXT2_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
515
ext2_error (sb, "ext2_new_inode",
516
"reserved inode or inode > inodes count - "
517
"block_group = %d,inode=%lu", group,
518
(unsigned long) ino);
519
err = -EIO;
520
goto fail;
521
}
522
523
percpu_counter_dec(&sbi->s_freeinodes_counter);
524
if (S_ISDIR(mode))
525
percpu_counter_inc(&sbi->s_dirs_counter);
526
527
spin_lock(sb_bgl_lock(sbi, group));
528
le16_add_cpu(&gdp->bg_free_inodes_count, -1);
529
if (S_ISDIR(mode)) {
530
if (sbi->s_debts[group] < 255)
531
sbi->s_debts[group]++;
532
le16_add_cpu(&gdp->bg_used_dirs_count, 1);
533
} else {
534
if (sbi->s_debts[group])
535
sbi->s_debts[group]--;
536
}
537
spin_unlock(sb_bgl_lock(sbi, group));
538
539
mark_buffer_dirty(bh2);
540
if (test_opt(sb, GRPID)) {
541
inode->i_mode = mode;
542
inode->i_uid = current_fsuid();
543
inode->i_gid = dir->i_gid;
544
} else
545
inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
546
547
inode->i_ino = ino;
548
inode->i_blocks = 0;
549
simple_inode_init_ts(inode);
550
memset(ei->i_data, 0, sizeof(ei->i_data));
551
ei->i_flags =
552
ext2_mask_flags(mode, EXT2_I(dir)->i_flags & EXT2_FL_INHERITED);
553
ei->i_faddr = 0;
554
ei->i_frag_no = 0;
555
ei->i_frag_size = 0;
556
ei->i_file_acl = 0;
557
ei->i_dir_acl = 0;
558
ei->i_dtime = 0;
559
ei->i_block_alloc_info = NULL;
560
ei->i_block_group = group;
561
ei->i_dir_start_lookup = 0;
562
ei->i_state = EXT2_STATE_NEW;
563
ext2_set_inode_flags(inode);
564
spin_lock(&sbi->s_next_gen_lock);
565
inode->i_generation = sbi->s_next_generation++;
566
spin_unlock(&sbi->s_next_gen_lock);
567
if (insert_inode_locked(inode) < 0) {
568
ext2_error(sb, "ext2_new_inode",
569
"inode number already in use - inode=%lu",
570
(unsigned long) ino);
571
err = -EIO;
572
goto fail;
573
}
574
575
err = dquot_initialize(inode);
576
if (err)
577
goto fail_drop;
578
579
err = dquot_alloc_inode(inode);
580
if (err)
581
goto fail_drop;
582
583
err = ext2_init_acl(inode, dir);
584
if (err)
585
goto fail_free_drop;
586
587
err = ext2_init_security(inode, dir, qstr);
588
if (err)
589
goto fail_free_drop;
590
591
mark_inode_dirty(inode);
592
ext2_debug("allocating inode %lu\n", inode->i_ino);
593
ext2_preread_inode(inode);
594
return inode;
595
596
fail_free_drop:
597
dquot_free_inode(inode);
598
599
fail_drop:
600
dquot_drop(inode);
601
inode->i_flags |= S_NOQUOTA;
602
clear_nlink(inode);
603
discard_new_inode(inode);
604
return ERR_PTR(err);
605
606
fail:
607
make_bad_inode(inode);
608
iput(inode);
609
return ERR_PTR(err);
610
}
611
612
unsigned long ext2_count_free_inodes (struct super_block * sb)
613
{
614
struct ext2_group_desc *desc;
615
unsigned long desc_count = 0;
616
int i;
617
618
#ifdef EXT2FS_DEBUG
619
struct ext2_super_block *es;
620
unsigned long bitmap_count = 0;
621
struct buffer_head *bitmap_bh = NULL;
622
623
es = EXT2_SB(sb)->s_es;
624
for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
625
unsigned x;
626
627
desc = ext2_get_group_desc (sb, i, NULL);
628
if (!desc)
629
continue;
630
desc_count += le16_to_cpu(desc->bg_free_inodes_count);
631
brelse(bitmap_bh);
632
bitmap_bh = read_inode_bitmap(sb, i);
633
if (!bitmap_bh)
634
continue;
635
636
x = ext2_count_free(bitmap_bh, EXT2_INODES_PER_GROUP(sb) / 8);
637
printk("group %d: stored = %d, counted = %u\n",
638
i, le16_to_cpu(desc->bg_free_inodes_count), x);
639
bitmap_count += x;
640
}
641
brelse(bitmap_bh);
642
printk("ext2_count_free_inodes: stored = %lu, computed = %lu, %lu\n",
643
(unsigned long)
644
percpu_counter_read(&EXT2_SB(sb)->s_freeinodes_counter),
645
desc_count, bitmap_count);
646
return desc_count;
647
#else
648
for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
649
desc = ext2_get_group_desc (sb, i, NULL);
650
if (!desc)
651
continue;
652
desc_count += le16_to_cpu(desc->bg_free_inodes_count);
653
}
654
return desc_count;
655
#endif
656
}
657
658
/* Called at mount-time, super-block is locked */
659
unsigned long ext2_count_dirs (struct super_block * sb)
660
{
661
unsigned long count = 0;
662
int i;
663
664
for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
665
struct ext2_group_desc *gdp = ext2_get_group_desc (sb, i, NULL);
666
if (!gdp)
667
continue;
668
count += le16_to_cpu(gdp->bg_used_dirs_count);
669
}
670
return count;
671
}
672
673
674