Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/kernel/bpf/arraymap.c
29280 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
3
* Copyright (c) 2016,2017 Facebook
4
*/
5
#include <linux/bpf.h>
6
#include <linux/btf.h>
7
#include <linux/err.h>
8
#include <linux/slab.h>
9
#include <linux/mm.h>
10
#include <linux/filter.h>
11
#include <linux/perf_event.h>
12
#include <uapi/linux/btf.h>
13
#include <linux/rcupdate_trace.h>
14
#include <linux/btf_ids.h>
15
#include <crypto/sha2.h>
16
17
#include "map_in_map.h"
18
19
#define ARRAY_CREATE_FLAG_MASK \
20
(BPF_F_NUMA_NODE | BPF_F_MMAPABLE | BPF_F_ACCESS_MASK | \
21
BPF_F_PRESERVE_ELEMS | BPF_F_INNER_MAP)
22
23
static void bpf_array_free_percpu(struct bpf_array *array)
24
{
25
int i;
26
27
for (i = 0; i < array->map.max_entries; i++) {
28
free_percpu(array->pptrs[i]);
29
cond_resched();
30
}
31
}
32
33
static int bpf_array_alloc_percpu(struct bpf_array *array)
34
{
35
void __percpu *ptr;
36
int i;
37
38
for (i = 0; i < array->map.max_entries; i++) {
39
ptr = bpf_map_alloc_percpu(&array->map, array->elem_size, 8,
40
GFP_USER | __GFP_NOWARN);
41
if (!ptr) {
42
bpf_array_free_percpu(array);
43
return -ENOMEM;
44
}
45
array->pptrs[i] = ptr;
46
cond_resched();
47
}
48
49
return 0;
50
}
51
52
/* Called from syscall */
53
int array_map_alloc_check(union bpf_attr *attr)
54
{
55
bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
56
int numa_node = bpf_map_attr_numa_node(attr);
57
58
/* check sanity of attributes */
59
if (attr->max_entries == 0 || attr->key_size != 4 ||
60
attr->value_size == 0 ||
61
attr->map_flags & ~ARRAY_CREATE_FLAG_MASK ||
62
!bpf_map_flags_access_ok(attr->map_flags) ||
63
(percpu && numa_node != NUMA_NO_NODE))
64
return -EINVAL;
65
66
if (attr->map_type != BPF_MAP_TYPE_ARRAY &&
67
attr->map_flags & (BPF_F_MMAPABLE | BPF_F_INNER_MAP))
68
return -EINVAL;
69
70
if (attr->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY &&
71
attr->map_flags & BPF_F_PRESERVE_ELEMS)
72
return -EINVAL;
73
74
/* avoid overflow on round_up(map->value_size) */
75
if (attr->value_size > INT_MAX)
76
return -E2BIG;
77
/* percpu map value size is bound by PCPU_MIN_UNIT_SIZE */
78
if (percpu && round_up(attr->value_size, 8) > PCPU_MIN_UNIT_SIZE)
79
return -E2BIG;
80
81
return 0;
82
}
83
84
static struct bpf_map *array_map_alloc(union bpf_attr *attr)
85
{
86
bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
87
int numa_node = bpf_map_attr_numa_node(attr);
88
u32 elem_size, index_mask, max_entries;
89
bool bypass_spec_v1 = bpf_bypass_spec_v1(NULL);
90
u64 array_size, mask64;
91
struct bpf_array *array;
92
93
elem_size = round_up(attr->value_size, 8);
94
95
max_entries = attr->max_entries;
96
97
/* On 32 bit archs roundup_pow_of_two() with max_entries that has
98
* upper most bit set in u32 space is undefined behavior due to
99
* resulting 1U << 32, so do it manually here in u64 space.
100
*/
101
mask64 = fls_long(max_entries - 1);
102
mask64 = 1ULL << mask64;
103
mask64 -= 1;
104
105
index_mask = mask64;
106
if (!bypass_spec_v1) {
107
/* round up array size to nearest power of 2,
108
* since cpu will speculate within index_mask limits
109
*/
110
max_entries = index_mask + 1;
111
/* Check for overflows. */
112
if (max_entries < attr->max_entries)
113
return ERR_PTR(-E2BIG);
114
}
115
116
array_size = sizeof(*array);
117
if (percpu) {
118
array_size += (u64) max_entries * sizeof(void *);
119
} else {
120
/* rely on vmalloc() to return page-aligned memory and
121
* ensure array->value is exactly page-aligned
122
*/
123
if (attr->map_flags & BPF_F_MMAPABLE) {
124
array_size = PAGE_ALIGN(array_size);
125
array_size += PAGE_ALIGN((u64) max_entries * elem_size);
126
} else {
127
array_size += (u64) max_entries * elem_size;
128
}
129
}
130
131
/* allocate all map elements and zero-initialize them */
132
if (attr->map_flags & BPF_F_MMAPABLE) {
133
void *data;
134
135
/* kmalloc'ed memory can't be mmap'ed, use explicit vmalloc */
136
data = bpf_map_area_mmapable_alloc(array_size, numa_node);
137
if (!data)
138
return ERR_PTR(-ENOMEM);
139
array = data + PAGE_ALIGN(sizeof(struct bpf_array))
140
- offsetof(struct bpf_array, value);
141
} else {
142
array = bpf_map_area_alloc(array_size, numa_node);
143
}
144
if (!array)
145
return ERR_PTR(-ENOMEM);
146
array->index_mask = index_mask;
147
array->map.bypass_spec_v1 = bypass_spec_v1;
148
149
/* copy mandatory map attributes */
150
bpf_map_init_from_attr(&array->map, attr);
151
array->elem_size = elem_size;
152
153
if (percpu && bpf_array_alloc_percpu(array)) {
154
bpf_map_area_free(array);
155
return ERR_PTR(-ENOMEM);
156
}
157
158
return &array->map;
159
}
160
161
static void *array_map_elem_ptr(struct bpf_array* array, u32 index)
162
{
163
return array->value + (u64)array->elem_size * index;
164
}
165
166
/* Called from syscall or from eBPF program */
167
static void *array_map_lookup_elem(struct bpf_map *map, void *key)
168
{
169
struct bpf_array *array = container_of(map, struct bpf_array, map);
170
u32 index = *(u32 *)key;
171
172
if (unlikely(index >= array->map.max_entries))
173
return NULL;
174
175
return array->value + (u64)array->elem_size * (index & array->index_mask);
176
}
177
178
static int array_map_get_hash(struct bpf_map *map, u32 hash_buf_size,
179
void *hash_buf)
180
{
181
struct bpf_array *array = container_of(map, struct bpf_array, map);
182
183
sha256(array->value, (u64)array->elem_size * array->map.max_entries,
184
hash_buf);
185
memcpy(array->map.sha, hash_buf, sizeof(array->map.sha));
186
return 0;
187
}
188
189
static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm,
190
u32 off)
191
{
192
struct bpf_array *array = container_of(map, struct bpf_array, map);
193
194
if (map->max_entries != 1)
195
return -ENOTSUPP;
196
if (off >= map->value_size)
197
return -EINVAL;
198
199
*imm = (unsigned long)array->value;
200
return 0;
201
}
202
203
static int array_map_direct_value_meta(const struct bpf_map *map, u64 imm,
204
u32 *off)
205
{
206
struct bpf_array *array = container_of(map, struct bpf_array, map);
207
u64 base = (unsigned long)array->value;
208
u64 range = array->elem_size;
209
210
if (map->max_entries != 1)
211
return -ENOTSUPP;
212
if (imm < base || imm >= base + range)
213
return -ENOENT;
214
215
*off = imm - base;
216
return 0;
217
}
218
219
/* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
220
static int array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
221
{
222
struct bpf_array *array = container_of(map, struct bpf_array, map);
223
struct bpf_insn *insn = insn_buf;
224
u32 elem_size = array->elem_size;
225
const int ret = BPF_REG_0;
226
const int map_ptr = BPF_REG_1;
227
const int index = BPF_REG_2;
228
229
if (map->map_flags & BPF_F_INNER_MAP)
230
return -EOPNOTSUPP;
231
232
*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
233
*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
234
if (!map->bypass_spec_v1) {
235
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
236
*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
237
} else {
238
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
239
}
240
241
if (is_power_of_2(elem_size)) {
242
*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
243
} else {
244
*insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
245
}
246
*insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
247
*insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
248
*insn++ = BPF_MOV64_IMM(ret, 0);
249
return insn - insn_buf;
250
}
251
252
/* Called from eBPF program */
253
static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
254
{
255
struct bpf_array *array = container_of(map, struct bpf_array, map);
256
u32 index = *(u32 *)key;
257
258
if (unlikely(index >= array->map.max_entries))
259
return NULL;
260
261
return this_cpu_ptr(array->pptrs[index & array->index_mask]);
262
}
263
264
/* emit BPF instructions equivalent to C code of percpu_array_map_lookup_elem() */
265
static int percpu_array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
266
{
267
struct bpf_array *array = container_of(map, struct bpf_array, map);
268
struct bpf_insn *insn = insn_buf;
269
270
if (!bpf_jit_supports_percpu_insn())
271
return -EOPNOTSUPP;
272
273
if (map->map_flags & BPF_F_INNER_MAP)
274
return -EOPNOTSUPP;
275
276
BUILD_BUG_ON(offsetof(struct bpf_array, map) != 0);
277
*insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct bpf_array, pptrs));
278
279
*insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0);
280
if (!map->bypass_spec_v1) {
281
*insn++ = BPF_JMP_IMM(BPF_JGE, BPF_REG_0, map->max_entries, 6);
282
*insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_0, array->index_mask);
283
} else {
284
*insn++ = BPF_JMP_IMM(BPF_JGE, BPF_REG_0, map->max_entries, 5);
285
}
286
287
*insn++ = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
288
*insn++ = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1);
289
*insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0);
290
*insn++ = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0);
291
*insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
292
*insn++ = BPF_MOV64_IMM(BPF_REG_0, 0);
293
return insn - insn_buf;
294
}
295
296
static void *percpu_array_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu)
297
{
298
struct bpf_array *array = container_of(map, struct bpf_array, map);
299
u32 index = *(u32 *)key;
300
301
if (cpu >= nr_cpu_ids)
302
return NULL;
303
304
if (unlikely(index >= array->map.max_entries))
305
return NULL;
306
307
return per_cpu_ptr(array->pptrs[index & array->index_mask], cpu);
308
}
309
310
int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
311
{
312
struct bpf_array *array = container_of(map, struct bpf_array, map);
313
u32 index = *(u32 *)key;
314
void __percpu *pptr;
315
int cpu, off = 0;
316
u32 size;
317
318
if (unlikely(index >= array->map.max_entries))
319
return -ENOENT;
320
321
/* per_cpu areas are zero-filled and bpf programs can only
322
* access 'value_size' of them, so copying rounded areas
323
* will not leak any kernel data
324
*/
325
size = array->elem_size;
326
rcu_read_lock();
327
pptr = array->pptrs[index & array->index_mask];
328
for_each_possible_cpu(cpu) {
329
copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu));
330
check_and_init_map_value(map, value + off);
331
off += size;
332
}
333
rcu_read_unlock();
334
return 0;
335
}
336
337
/* Called from syscall */
338
static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
339
{
340
struct bpf_array *array = container_of(map, struct bpf_array, map);
341
u32 index = key ? *(u32 *)key : U32_MAX;
342
u32 *next = (u32 *)next_key;
343
344
if (index >= array->map.max_entries) {
345
*next = 0;
346
return 0;
347
}
348
349
if (index == array->map.max_entries - 1)
350
return -ENOENT;
351
352
*next = index + 1;
353
return 0;
354
}
355
356
/* Called from syscall or from eBPF program */
357
static long array_map_update_elem(struct bpf_map *map, void *key, void *value,
358
u64 map_flags)
359
{
360
struct bpf_array *array = container_of(map, struct bpf_array, map);
361
u32 index = *(u32 *)key;
362
char *val;
363
364
if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST))
365
/* unknown flags */
366
return -EINVAL;
367
368
if (unlikely(index >= array->map.max_entries))
369
/* all elements were pre-allocated, cannot insert a new one */
370
return -E2BIG;
371
372
if (unlikely(map_flags & BPF_NOEXIST))
373
/* all elements already exist */
374
return -EEXIST;
375
376
if (unlikely((map_flags & BPF_F_LOCK) &&
377
!btf_record_has_field(map->record, BPF_SPIN_LOCK)))
378
return -EINVAL;
379
380
if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
381
val = this_cpu_ptr(array->pptrs[index & array->index_mask]);
382
copy_map_value(map, val, value);
383
bpf_obj_free_fields(array->map.record, val);
384
} else {
385
val = array->value +
386
(u64)array->elem_size * (index & array->index_mask);
387
if (map_flags & BPF_F_LOCK)
388
copy_map_value_locked(map, val, value, false);
389
else
390
copy_map_value(map, val, value);
391
bpf_obj_free_fields(array->map.record, val);
392
}
393
return 0;
394
}
395
396
int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
397
u64 map_flags)
398
{
399
struct bpf_array *array = container_of(map, struct bpf_array, map);
400
u32 index = *(u32 *)key;
401
void __percpu *pptr;
402
int cpu, off = 0;
403
u32 size;
404
405
if (unlikely(map_flags > BPF_EXIST))
406
/* unknown flags */
407
return -EINVAL;
408
409
if (unlikely(index >= array->map.max_entries))
410
/* all elements were pre-allocated, cannot insert a new one */
411
return -E2BIG;
412
413
if (unlikely(map_flags == BPF_NOEXIST))
414
/* all elements already exist */
415
return -EEXIST;
416
417
/* the user space will provide round_up(value_size, 8) bytes that
418
* will be copied into per-cpu area. bpf programs can only access
419
* value_size of it. During lookup the same extra bytes will be
420
* returned or zeros which were zero-filled by percpu_alloc,
421
* so no kernel data leaks possible
422
*/
423
size = array->elem_size;
424
rcu_read_lock();
425
pptr = array->pptrs[index & array->index_mask];
426
for_each_possible_cpu(cpu) {
427
copy_map_value_long(map, per_cpu_ptr(pptr, cpu), value + off);
428
bpf_obj_free_fields(array->map.record, per_cpu_ptr(pptr, cpu));
429
off += size;
430
}
431
rcu_read_unlock();
432
return 0;
433
}
434
435
/* Called from syscall or from eBPF program */
436
static long array_map_delete_elem(struct bpf_map *map, void *key)
437
{
438
return -EINVAL;
439
}
440
441
static void *array_map_vmalloc_addr(struct bpf_array *array)
442
{
443
return (void *)round_down((unsigned long)array, PAGE_SIZE);
444
}
445
446
static void array_map_free_internal_structs(struct bpf_map *map)
447
{
448
struct bpf_array *array = container_of(map, struct bpf_array, map);
449
int i;
450
451
/* We don't reset or free fields other than timer and workqueue
452
* on uref dropping to zero.
453
*/
454
if (btf_record_has_field(map->record, BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK)) {
455
for (i = 0; i < array->map.max_entries; i++) {
456
if (btf_record_has_field(map->record, BPF_TIMER))
457
bpf_obj_free_timer(map->record, array_map_elem_ptr(array, i));
458
if (btf_record_has_field(map->record, BPF_WORKQUEUE))
459
bpf_obj_free_workqueue(map->record, array_map_elem_ptr(array, i));
460
if (btf_record_has_field(map->record, BPF_TASK_WORK))
461
bpf_obj_free_task_work(map->record, array_map_elem_ptr(array, i));
462
}
463
}
464
}
465
466
/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
467
static void array_map_free(struct bpf_map *map)
468
{
469
struct bpf_array *array = container_of(map, struct bpf_array, map);
470
int i;
471
472
if (!IS_ERR_OR_NULL(map->record)) {
473
if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
474
for (i = 0; i < array->map.max_entries; i++) {
475
void __percpu *pptr = array->pptrs[i & array->index_mask];
476
int cpu;
477
478
for_each_possible_cpu(cpu) {
479
bpf_obj_free_fields(map->record, per_cpu_ptr(pptr, cpu));
480
cond_resched();
481
}
482
}
483
} else {
484
for (i = 0; i < array->map.max_entries; i++)
485
bpf_obj_free_fields(map->record, array_map_elem_ptr(array, i));
486
}
487
}
488
489
if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
490
bpf_array_free_percpu(array);
491
492
if (array->map.map_flags & BPF_F_MMAPABLE)
493
bpf_map_area_free(array_map_vmalloc_addr(array));
494
else
495
bpf_map_area_free(array);
496
}
497
498
static void array_map_seq_show_elem(struct bpf_map *map, void *key,
499
struct seq_file *m)
500
{
501
void *value;
502
503
rcu_read_lock();
504
505
value = array_map_lookup_elem(map, key);
506
if (!value) {
507
rcu_read_unlock();
508
return;
509
}
510
511
if (map->btf_key_type_id)
512
seq_printf(m, "%u: ", *(u32 *)key);
513
btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
514
seq_putc(m, '\n');
515
516
rcu_read_unlock();
517
}
518
519
static void percpu_array_map_seq_show_elem(struct bpf_map *map, void *key,
520
struct seq_file *m)
521
{
522
struct bpf_array *array = container_of(map, struct bpf_array, map);
523
u32 index = *(u32 *)key;
524
void __percpu *pptr;
525
int cpu;
526
527
rcu_read_lock();
528
529
seq_printf(m, "%u: {\n", *(u32 *)key);
530
pptr = array->pptrs[index & array->index_mask];
531
for_each_possible_cpu(cpu) {
532
seq_printf(m, "\tcpu%d: ", cpu);
533
btf_type_seq_show(map->btf, map->btf_value_type_id,
534
per_cpu_ptr(pptr, cpu), m);
535
seq_putc(m, '\n');
536
}
537
seq_puts(m, "}\n");
538
539
rcu_read_unlock();
540
}
541
542
static int array_map_check_btf(const struct bpf_map *map,
543
const struct btf *btf,
544
const struct btf_type *key_type,
545
const struct btf_type *value_type)
546
{
547
/* One exception for keyless BTF: .bss/.data/.rodata map */
548
if (btf_type_is_void(key_type)) {
549
if (map->map_type != BPF_MAP_TYPE_ARRAY ||
550
map->max_entries != 1)
551
return -EINVAL;
552
553
if (BTF_INFO_KIND(value_type->info) != BTF_KIND_DATASEC)
554
return -EINVAL;
555
556
return 0;
557
}
558
559
/*
560
* Bpf array can only take a u32 key. This check makes sure
561
* that the btf matches the attr used during map_create.
562
*/
563
if (!btf_type_is_i32(key_type))
564
return -EINVAL;
565
566
return 0;
567
}
568
569
static int array_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
570
{
571
struct bpf_array *array = container_of(map, struct bpf_array, map);
572
pgoff_t pgoff = PAGE_ALIGN(sizeof(*array)) >> PAGE_SHIFT;
573
574
if (!(map->map_flags & BPF_F_MMAPABLE))
575
return -EINVAL;
576
577
if (vma->vm_pgoff * PAGE_SIZE + (vma->vm_end - vma->vm_start) >
578
PAGE_ALIGN((u64)array->map.max_entries * array->elem_size))
579
return -EINVAL;
580
581
return remap_vmalloc_range(vma, array_map_vmalloc_addr(array),
582
vma->vm_pgoff + pgoff);
583
}
584
585
static bool array_map_meta_equal(const struct bpf_map *meta0,
586
const struct bpf_map *meta1)
587
{
588
if (!bpf_map_meta_equal(meta0, meta1))
589
return false;
590
return meta0->map_flags & BPF_F_INNER_MAP ? true :
591
meta0->max_entries == meta1->max_entries;
592
}
593
594
struct bpf_iter_seq_array_map_info {
595
struct bpf_map *map;
596
void *percpu_value_buf;
597
u32 index;
598
};
599
600
static void *bpf_array_map_seq_start(struct seq_file *seq, loff_t *pos)
601
{
602
struct bpf_iter_seq_array_map_info *info = seq->private;
603
struct bpf_map *map = info->map;
604
struct bpf_array *array;
605
u32 index;
606
607
if (info->index >= map->max_entries)
608
return NULL;
609
610
if (*pos == 0)
611
++*pos;
612
array = container_of(map, struct bpf_array, map);
613
index = info->index & array->index_mask;
614
if (info->percpu_value_buf)
615
return (void *)(uintptr_t)array->pptrs[index];
616
return array_map_elem_ptr(array, index);
617
}
618
619
static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
620
{
621
struct bpf_iter_seq_array_map_info *info = seq->private;
622
struct bpf_map *map = info->map;
623
struct bpf_array *array;
624
u32 index;
625
626
++*pos;
627
++info->index;
628
if (info->index >= map->max_entries)
629
return NULL;
630
631
array = container_of(map, struct bpf_array, map);
632
index = info->index & array->index_mask;
633
if (info->percpu_value_buf)
634
return (void *)(uintptr_t)array->pptrs[index];
635
return array_map_elem_ptr(array, index);
636
}
637
638
static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
639
{
640
struct bpf_iter_seq_array_map_info *info = seq->private;
641
struct bpf_iter__bpf_map_elem ctx = {};
642
struct bpf_map *map = info->map;
643
struct bpf_array *array = container_of(map, struct bpf_array, map);
644
struct bpf_iter_meta meta;
645
struct bpf_prog *prog;
646
int off = 0, cpu = 0;
647
void __percpu *pptr;
648
u32 size;
649
650
meta.seq = seq;
651
prog = bpf_iter_get_info(&meta, v == NULL);
652
if (!prog)
653
return 0;
654
655
ctx.meta = &meta;
656
ctx.map = info->map;
657
if (v) {
658
ctx.key = &info->index;
659
660
if (!info->percpu_value_buf) {
661
ctx.value = v;
662
} else {
663
pptr = (void __percpu *)(uintptr_t)v;
664
size = array->elem_size;
665
for_each_possible_cpu(cpu) {
666
copy_map_value_long(map, info->percpu_value_buf + off,
667
per_cpu_ptr(pptr, cpu));
668
check_and_init_map_value(map, info->percpu_value_buf + off);
669
off += size;
670
}
671
ctx.value = info->percpu_value_buf;
672
}
673
}
674
675
return bpf_iter_run_prog(prog, &ctx);
676
}
677
678
static int bpf_array_map_seq_show(struct seq_file *seq, void *v)
679
{
680
return __bpf_array_map_seq_show(seq, v);
681
}
682
683
static void bpf_array_map_seq_stop(struct seq_file *seq, void *v)
684
{
685
if (!v)
686
(void)__bpf_array_map_seq_show(seq, NULL);
687
}
688
689
static int bpf_iter_init_array_map(void *priv_data,
690
struct bpf_iter_aux_info *aux)
691
{
692
struct bpf_iter_seq_array_map_info *seq_info = priv_data;
693
struct bpf_map *map = aux->map;
694
struct bpf_array *array = container_of(map, struct bpf_array, map);
695
void *value_buf;
696
u32 buf_size;
697
698
if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
699
buf_size = array->elem_size * num_possible_cpus();
700
value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN);
701
if (!value_buf)
702
return -ENOMEM;
703
704
seq_info->percpu_value_buf = value_buf;
705
}
706
707
/* bpf_iter_attach_map() acquires a map uref, and the uref may be
708
* released before or in the middle of iterating map elements, so
709
* acquire an extra map uref for iterator.
710
*/
711
bpf_map_inc_with_uref(map);
712
seq_info->map = map;
713
return 0;
714
}
715
716
static void bpf_iter_fini_array_map(void *priv_data)
717
{
718
struct bpf_iter_seq_array_map_info *seq_info = priv_data;
719
720
bpf_map_put_with_uref(seq_info->map);
721
kfree(seq_info->percpu_value_buf);
722
}
723
724
static const struct seq_operations bpf_array_map_seq_ops = {
725
.start = bpf_array_map_seq_start,
726
.next = bpf_array_map_seq_next,
727
.stop = bpf_array_map_seq_stop,
728
.show = bpf_array_map_seq_show,
729
};
730
731
static const struct bpf_iter_seq_info iter_seq_info = {
732
.seq_ops = &bpf_array_map_seq_ops,
733
.init_seq_private = bpf_iter_init_array_map,
734
.fini_seq_private = bpf_iter_fini_array_map,
735
.seq_priv_size = sizeof(struct bpf_iter_seq_array_map_info),
736
};
737
738
static long bpf_for_each_array_elem(struct bpf_map *map, bpf_callback_t callback_fn,
739
void *callback_ctx, u64 flags)
740
{
741
u32 i, key, num_elems = 0;
742
struct bpf_array *array;
743
bool is_percpu;
744
u64 ret = 0;
745
void *val;
746
747
cant_migrate();
748
749
if (flags != 0)
750
return -EINVAL;
751
752
is_percpu = map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
753
array = container_of(map, struct bpf_array, map);
754
for (i = 0; i < map->max_entries; i++) {
755
if (is_percpu)
756
val = this_cpu_ptr(array->pptrs[i]);
757
else
758
val = array_map_elem_ptr(array, i);
759
num_elems++;
760
key = i;
761
ret = callback_fn((u64)(long)map, (u64)(long)&key,
762
(u64)(long)val, (u64)(long)callback_ctx, 0);
763
/* return value: 0 - continue, 1 - stop and return */
764
if (ret)
765
break;
766
}
767
768
return num_elems;
769
}
770
771
static u64 array_map_mem_usage(const struct bpf_map *map)
772
{
773
struct bpf_array *array = container_of(map, struct bpf_array, map);
774
bool percpu = map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
775
u32 elem_size = array->elem_size;
776
u64 entries = map->max_entries;
777
u64 usage = sizeof(*array);
778
779
if (percpu) {
780
usage += entries * sizeof(void *);
781
usage += entries * elem_size * num_possible_cpus();
782
} else {
783
if (map->map_flags & BPF_F_MMAPABLE) {
784
usage = PAGE_ALIGN(usage);
785
usage += PAGE_ALIGN(entries * elem_size);
786
} else {
787
usage += entries * elem_size;
788
}
789
}
790
return usage;
791
}
792
793
BTF_ID_LIST_SINGLE(array_map_btf_ids, struct, bpf_array)
794
const struct bpf_map_ops array_map_ops = {
795
.map_meta_equal = array_map_meta_equal,
796
.map_alloc_check = array_map_alloc_check,
797
.map_alloc = array_map_alloc,
798
.map_free = array_map_free,
799
.map_get_next_key = array_map_get_next_key,
800
.map_release_uref = array_map_free_internal_structs,
801
.map_lookup_elem = array_map_lookup_elem,
802
.map_update_elem = array_map_update_elem,
803
.map_delete_elem = array_map_delete_elem,
804
.map_gen_lookup = array_map_gen_lookup,
805
.map_direct_value_addr = array_map_direct_value_addr,
806
.map_direct_value_meta = array_map_direct_value_meta,
807
.map_mmap = array_map_mmap,
808
.map_seq_show_elem = array_map_seq_show_elem,
809
.map_check_btf = array_map_check_btf,
810
.map_lookup_batch = generic_map_lookup_batch,
811
.map_update_batch = generic_map_update_batch,
812
.map_set_for_each_callback_args = map_set_for_each_callback_args,
813
.map_for_each_callback = bpf_for_each_array_elem,
814
.map_mem_usage = array_map_mem_usage,
815
.map_btf_id = &array_map_btf_ids[0],
816
.iter_seq_info = &iter_seq_info,
817
.map_get_hash = &array_map_get_hash,
818
};
819
820
const struct bpf_map_ops percpu_array_map_ops = {
821
.map_meta_equal = bpf_map_meta_equal,
822
.map_alloc_check = array_map_alloc_check,
823
.map_alloc = array_map_alloc,
824
.map_free = array_map_free,
825
.map_get_next_key = array_map_get_next_key,
826
.map_lookup_elem = percpu_array_map_lookup_elem,
827
.map_gen_lookup = percpu_array_map_gen_lookup,
828
.map_update_elem = array_map_update_elem,
829
.map_delete_elem = array_map_delete_elem,
830
.map_lookup_percpu_elem = percpu_array_map_lookup_percpu_elem,
831
.map_seq_show_elem = percpu_array_map_seq_show_elem,
832
.map_check_btf = array_map_check_btf,
833
.map_lookup_batch = generic_map_lookup_batch,
834
.map_update_batch = generic_map_update_batch,
835
.map_set_for_each_callback_args = map_set_for_each_callback_args,
836
.map_for_each_callback = bpf_for_each_array_elem,
837
.map_mem_usage = array_map_mem_usage,
838
.map_btf_id = &array_map_btf_ids[0],
839
.iter_seq_info = &iter_seq_info,
840
};
841
842
static int fd_array_map_alloc_check(union bpf_attr *attr)
843
{
844
/* only file descriptors can be stored in this type of map */
845
if (attr->value_size != sizeof(u32))
846
return -EINVAL;
847
/* Program read-only/write-only not supported for special maps yet. */
848
if (attr->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG))
849
return -EINVAL;
850
return array_map_alloc_check(attr);
851
}
852
853
static void fd_array_map_free(struct bpf_map *map)
854
{
855
struct bpf_array *array = container_of(map, struct bpf_array, map);
856
int i;
857
858
/* make sure it's empty */
859
for (i = 0; i < array->map.max_entries; i++)
860
BUG_ON(array->ptrs[i] != NULL);
861
862
bpf_map_area_free(array);
863
}
864
865
static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key)
866
{
867
return ERR_PTR(-EOPNOTSUPP);
868
}
869
870
/* only called from syscall */
871
int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
872
{
873
void **elem, *ptr;
874
int ret = 0;
875
876
if (!map->ops->map_fd_sys_lookup_elem)
877
return -ENOTSUPP;
878
879
rcu_read_lock();
880
elem = array_map_lookup_elem(map, key);
881
if (elem && (ptr = READ_ONCE(*elem)))
882
*value = map->ops->map_fd_sys_lookup_elem(ptr);
883
else
884
ret = -ENOENT;
885
rcu_read_unlock();
886
887
return ret;
888
}
889
890
/* only called from syscall */
891
int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
892
void *key, void *value, u64 map_flags)
893
{
894
struct bpf_array *array = container_of(map, struct bpf_array, map);
895
void *new_ptr, *old_ptr;
896
u32 index = *(u32 *)key, ufd;
897
898
if (map_flags != BPF_ANY)
899
return -EINVAL;
900
901
if (index >= array->map.max_entries)
902
return -E2BIG;
903
904
ufd = *(u32 *)value;
905
new_ptr = map->ops->map_fd_get_ptr(map, map_file, ufd);
906
if (IS_ERR(new_ptr))
907
return PTR_ERR(new_ptr);
908
909
if (map->ops->map_poke_run) {
910
mutex_lock(&array->aux->poke_mutex);
911
old_ptr = xchg(array->ptrs + index, new_ptr);
912
map->ops->map_poke_run(map, index, old_ptr, new_ptr);
913
mutex_unlock(&array->aux->poke_mutex);
914
} else {
915
old_ptr = xchg(array->ptrs + index, new_ptr);
916
}
917
918
if (old_ptr)
919
map->ops->map_fd_put_ptr(map, old_ptr, true);
920
return 0;
921
}
922
923
static long __fd_array_map_delete_elem(struct bpf_map *map, void *key, bool need_defer)
924
{
925
struct bpf_array *array = container_of(map, struct bpf_array, map);
926
void *old_ptr;
927
u32 index = *(u32 *)key;
928
929
if (index >= array->map.max_entries)
930
return -E2BIG;
931
932
if (map->ops->map_poke_run) {
933
mutex_lock(&array->aux->poke_mutex);
934
old_ptr = xchg(array->ptrs + index, NULL);
935
map->ops->map_poke_run(map, index, old_ptr, NULL);
936
mutex_unlock(&array->aux->poke_mutex);
937
} else {
938
old_ptr = xchg(array->ptrs + index, NULL);
939
}
940
941
if (old_ptr) {
942
map->ops->map_fd_put_ptr(map, old_ptr, need_defer);
943
return 0;
944
} else {
945
return -ENOENT;
946
}
947
}
948
949
static long fd_array_map_delete_elem(struct bpf_map *map, void *key)
950
{
951
return __fd_array_map_delete_elem(map, key, true);
952
}
953
954
static void *prog_fd_array_get_ptr(struct bpf_map *map,
955
struct file *map_file, int fd)
956
{
957
struct bpf_prog *prog = bpf_prog_get(fd);
958
bool is_extended;
959
960
if (IS_ERR(prog))
961
return prog;
962
963
if (prog->type == BPF_PROG_TYPE_EXT ||
964
!bpf_prog_map_compatible(map, prog)) {
965
bpf_prog_put(prog);
966
return ERR_PTR(-EINVAL);
967
}
968
969
mutex_lock(&prog->aux->ext_mutex);
970
is_extended = prog->aux->is_extended;
971
if (!is_extended)
972
prog->aux->prog_array_member_cnt++;
973
mutex_unlock(&prog->aux->ext_mutex);
974
if (is_extended) {
975
/* Extended prog can not be tail callee. It's to prevent a
976
* potential infinite loop like:
977
* tail callee prog entry -> tail callee prog subprog ->
978
* freplace prog entry --tailcall-> tail callee prog entry.
979
*/
980
bpf_prog_put(prog);
981
return ERR_PTR(-EBUSY);
982
}
983
984
return prog;
985
}
986
987
static void prog_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
988
{
989
struct bpf_prog *prog = ptr;
990
991
mutex_lock(&prog->aux->ext_mutex);
992
prog->aux->prog_array_member_cnt--;
993
mutex_unlock(&prog->aux->ext_mutex);
994
/* bpf_prog is freed after one RCU or tasks trace grace period */
995
bpf_prog_put(prog);
996
}
997
998
static u32 prog_fd_array_sys_lookup_elem(void *ptr)
999
{
1000
return ((struct bpf_prog *)ptr)->aux->id;
1001
}
1002
1003
/* decrement refcnt of all bpf_progs that are stored in this map */
1004
static void bpf_fd_array_map_clear(struct bpf_map *map, bool need_defer)
1005
{
1006
struct bpf_array *array = container_of(map, struct bpf_array, map);
1007
int i;
1008
1009
for (i = 0; i < array->map.max_entries; i++)
1010
__fd_array_map_delete_elem(map, &i, need_defer);
1011
}
1012
1013
static void prog_array_map_seq_show_elem(struct bpf_map *map, void *key,
1014
struct seq_file *m)
1015
{
1016
void **elem, *ptr;
1017
u32 prog_id;
1018
1019
rcu_read_lock();
1020
1021
elem = array_map_lookup_elem(map, key);
1022
if (elem) {
1023
ptr = READ_ONCE(*elem);
1024
if (ptr) {
1025
seq_printf(m, "%u: ", *(u32 *)key);
1026
prog_id = prog_fd_array_sys_lookup_elem(ptr);
1027
btf_type_seq_show(map->btf, map->btf_value_type_id,
1028
&prog_id, m);
1029
seq_putc(m, '\n');
1030
}
1031
}
1032
1033
rcu_read_unlock();
1034
}
1035
1036
struct prog_poke_elem {
1037
struct list_head list;
1038
struct bpf_prog_aux *aux;
1039
};
1040
1041
static int prog_array_map_poke_track(struct bpf_map *map,
1042
struct bpf_prog_aux *prog_aux)
1043
{
1044
struct prog_poke_elem *elem;
1045
struct bpf_array_aux *aux;
1046
int ret = 0;
1047
1048
aux = container_of(map, struct bpf_array, map)->aux;
1049
mutex_lock(&aux->poke_mutex);
1050
list_for_each_entry(elem, &aux->poke_progs, list) {
1051
if (elem->aux == prog_aux)
1052
goto out;
1053
}
1054
1055
elem = kmalloc(sizeof(*elem), GFP_KERNEL);
1056
if (!elem) {
1057
ret = -ENOMEM;
1058
goto out;
1059
}
1060
1061
INIT_LIST_HEAD(&elem->list);
1062
/* We must track the program's aux info at this point in time
1063
* since the program pointer itself may not be stable yet, see
1064
* also comment in prog_array_map_poke_run().
1065
*/
1066
elem->aux = prog_aux;
1067
1068
list_add_tail(&elem->list, &aux->poke_progs);
1069
out:
1070
mutex_unlock(&aux->poke_mutex);
1071
return ret;
1072
}
1073
1074
static void prog_array_map_poke_untrack(struct bpf_map *map,
1075
struct bpf_prog_aux *prog_aux)
1076
{
1077
struct prog_poke_elem *elem, *tmp;
1078
struct bpf_array_aux *aux;
1079
1080
aux = container_of(map, struct bpf_array, map)->aux;
1081
mutex_lock(&aux->poke_mutex);
1082
list_for_each_entry_safe(elem, tmp, &aux->poke_progs, list) {
1083
if (elem->aux == prog_aux) {
1084
list_del_init(&elem->list);
1085
kfree(elem);
1086
break;
1087
}
1088
}
1089
mutex_unlock(&aux->poke_mutex);
1090
}
1091
1092
void __weak bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
1093
struct bpf_prog *new, struct bpf_prog *old)
1094
{
1095
WARN_ON_ONCE(1);
1096
}
1097
1098
static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
1099
struct bpf_prog *old,
1100
struct bpf_prog *new)
1101
{
1102
struct prog_poke_elem *elem;
1103
struct bpf_array_aux *aux;
1104
1105
aux = container_of(map, struct bpf_array, map)->aux;
1106
WARN_ON_ONCE(!mutex_is_locked(&aux->poke_mutex));
1107
1108
list_for_each_entry(elem, &aux->poke_progs, list) {
1109
struct bpf_jit_poke_descriptor *poke;
1110
int i;
1111
1112
for (i = 0; i < elem->aux->size_poke_tab; i++) {
1113
poke = &elem->aux->poke_tab[i];
1114
1115
/* Few things to be aware of:
1116
*
1117
* 1) We can only ever access aux in this context, but
1118
* not aux->prog since it might not be stable yet and
1119
* there could be danger of use after free otherwise.
1120
* 2) Initially when we start tracking aux, the program
1121
* is not JITed yet and also does not have a kallsyms
1122
* entry. We skip these as poke->tailcall_target_stable
1123
* is not active yet. The JIT will do the final fixup
1124
* before setting it stable. The various
1125
* poke->tailcall_target_stable are successively
1126
* activated, so tail call updates can arrive from here
1127
* while JIT is still finishing its final fixup for
1128
* non-activated poke entries.
1129
* 3) Also programs reaching refcount of zero while patching
1130
* is in progress is okay since we're protected under
1131
* poke_mutex and untrack the programs before the JIT
1132
* buffer is freed.
1133
*/
1134
if (!READ_ONCE(poke->tailcall_target_stable))
1135
continue;
1136
if (poke->reason != BPF_POKE_REASON_TAIL_CALL)
1137
continue;
1138
if (poke->tail_call.map != map ||
1139
poke->tail_call.key != key)
1140
continue;
1141
1142
bpf_arch_poke_desc_update(poke, new, old);
1143
}
1144
}
1145
}
1146
1147
static void prog_array_map_clear_deferred(struct work_struct *work)
1148
{
1149
struct bpf_map *map = container_of(work, struct bpf_array_aux,
1150
work)->map;
1151
bpf_fd_array_map_clear(map, true);
1152
bpf_map_put(map);
1153
}
1154
1155
static void prog_array_map_clear(struct bpf_map *map)
1156
{
1157
struct bpf_array_aux *aux = container_of(map, struct bpf_array,
1158
map)->aux;
1159
bpf_map_inc(map);
1160
schedule_work(&aux->work);
1161
}
1162
1163
static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr)
1164
{
1165
struct bpf_array_aux *aux;
1166
struct bpf_map *map;
1167
1168
aux = kzalloc(sizeof(*aux), GFP_KERNEL_ACCOUNT);
1169
if (!aux)
1170
return ERR_PTR(-ENOMEM);
1171
1172
INIT_WORK(&aux->work, prog_array_map_clear_deferred);
1173
INIT_LIST_HEAD(&aux->poke_progs);
1174
mutex_init(&aux->poke_mutex);
1175
1176
map = array_map_alloc(attr);
1177
if (IS_ERR(map)) {
1178
kfree(aux);
1179
return map;
1180
}
1181
1182
container_of(map, struct bpf_array, map)->aux = aux;
1183
aux->map = map;
1184
1185
return map;
1186
}
1187
1188
static void prog_array_map_free(struct bpf_map *map)
1189
{
1190
struct prog_poke_elem *elem, *tmp;
1191
struct bpf_array_aux *aux;
1192
1193
aux = container_of(map, struct bpf_array, map)->aux;
1194
list_for_each_entry_safe(elem, tmp, &aux->poke_progs, list) {
1195
list_del_init(&elem->list);
1196
kfree(elem);
1197
}
1198
kfree(aux);
1199
fd_array_map_free(map);
1200
}
1201
1202
/* prog_array->aux->{type,jited} is a runtime binding.
1203
* Doing static check alone in the verifier is not enough.
1204
* Thus, prog_array_map cannot be used as an inner_map
1205
* and map_meta_equal is not implemented.
1206
*/
1207
const struct bpf_map_ops prog_array_map_ops = {
1208
.map_alloc_check = fd_array_map_alloc_check,
1209
.map_alloc = prog_array_map_alloc,
1210
.map_free = prog_array_map_free,
1211
.map_poke_track = prog_array_map_poke_track,
1212
.map_poke_untrack = prog_array_map_poke_untrack,
1213
.map_poke_run = prog_array_map_poke_run,
1214
.map_get_next_key = array_map_get_next_key,
1215
.map_lookup_elem = fd_array_map_lookup_elem,
1216
.map_delete_elem = fd_array_map_delete_elem,
1217
.map_fd_get_ptr = prog_fd_array_get_ptr,
1218
.map_fd_put_ptr = prog_fd_array_put_ptr,
1219
.map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem,
1220
.map_release_uref = prog_array_map_clear,
1221
.map_seq_show_elem = prog_array_map_seq_show_elem,
1222
.map_mem_usage = array_map_mem_usage,
1223
.map_btf_id = &array_map_btf_ids[0],
1224
};
1225
1226
static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file,
1227
struct file *map_file)
1228
{
1229
struct bpf_event_entry *ee;
1230
1231
ee = kzalloc(sizeof(*ee), GFP_KERNEL);
1232
if (ee) {
1233
ee->event = perf_file->private_data;
1234
ee->perf_file = perf_file;
1235
ee->map_file = map_file;
1236
}
1237
1238
return ee;
1239
}
1240
1241
static void __bpf_event_entry_free(struct rcu_head *rcu)
1242
{
1243
struct bpf_event_entry *ee;
1244
1245
ee = container_of(rcu, struct bpf_event_entry, rcu);
1246
fput(ee->perf_file);
1247
kfree(ee);
1248
}
1249
1250
static void bpf_event_entry_free_rcu(struct bpf_event_entry *ee)
1251
{
1252
call_rcu(&ee->rcu, __bpf_event_entry_free);
1253
}
1254
1255
static void *perf_event_fd_array_get_ptr(struct bpf_map *map,
1256
struct file *map_file, int fd)
1257
{
1258
struct bpf_event_entry *ee;
1259
struct perf_event *event;
1260
struct file *perf_file;
1261
u64 value;
1262
1263
perf_file = perf_event_get(fd);
1264
if (IS_ERR(perf_file))
1265
return perf_file;
1266
1267
ee = ERR_PTR(-EOPNOTSUPP);
1268
event = perf_file->private_data;
1269
if (perf_event_read_local(event, &value, NULL, NULL) == -EOPNOTSUPP)
1270
goto err_out;
1271
1272
ee = bpf_event_entry_gen(perf_file, map_file);
1273
if (ee)
1274
return ee;
1275
ee = ERR_PTR(-ENOMEM);
1276
err_out:
1277
fput(perf_file);
1278
return ee;
1279
}
1280
1281
static void perf_event_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
1282
{
1283
/* bpf_perf_event is freed after one RCU grace period */
1284
bpf_event_entry_free_rcu(ptr);
1285
}
1286
1287
static void perf_event_fd_array_release(struct bpf_map *map,
1288
struct file *map_file)
1289
{
1290
struct bpf_array *array = container_of(map, struct bpf_array, map);
1291
struct bpf_event_entry *ee;
1292
int i;
1293
1294
if (map->map_flags & BPF_F_PRESERVE_ELEMS)
1295
return;
1296
1297
rcu_read_lock();
1298
for (i = 0; i < array->map.max_entries; i++) {
1299
ee = READ_ONCE(array->ptrs[i]);
1300
if (ee && ee->map_file == map_file)
1301
__fd_array_map_delete_elem(map, &i, true);
1302
}
1303
rcu_read_unlock();
1304
}
1305
1306
static void perf_event_fd_array_map_free(struct bpf_map *map)
1307
{
1308
if (map->map_flags & BPF_F_PRESERVE_ELEMS)
1309
bpf_fd_array_map_clear(map, false);
1310
fd_array_map_free(map);
1311
}
1312
1313
const struct bpf_map_ops perf_event_array_map_ops = {
1314
.map_meta_equal = bpf_map_meta_equal,
1315
.map_alloc_check = fd_array_map_alloc_check,
1316
.map_alloc = array_map_alloc,
1317
.map_free = perf_event_fd_array_map_free,
1318
.map_get_next_key = array_map_get_next_key,
1319
.map_lookup_elem = fd_array_map_lookup_elem,
1320
.map_delete_elem = fd_array_map_delete_elem,
1321
.map_fd_get_ptr = perf_event_fd_array_get_ptr,
1322
.map_fd_put_ptr = perf_event_fd_array_put_ptr,
1323
.map_release = perf_event_fd_array_release,
1324
.map_check_btf = map_check_no_btf,
1325
.map_mem_usage = array_map_mem_usage,
1326
.map_btf_id = &array_map_btf_ids[0],
1327
};
1328
1329
#ifdef CONFIG_CGROUPS
1330
static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
1331
struct file *map_file /* not used */,
1332
int fd)
1333
{
1334
return cgroup_get_from_fd(fd);
1335
}
1336
1337
static void cgroup_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
1338
{
1339
/* cgroup_put free cgrp after a rcu grace period */
1340
cgroup_put(ptr);
1341
}
1342
1343
static void cgroup_fd_array_free(struct bpf_map *map)
1344
{
1345
bpf_fd_array_map_clear(map, false);
1346
fd_array_map_free(map);
1347
}
1348
1349
const struct bpf_map_ops cgroup_array_map_ops = {
1350
.map_meta_equal = bpf_map_meta_equal,
1351
.map_alloc_check = fd_array_map_alloc_check,
1352
.map_alloc = array_map_alloc,
1353
.map_free = cgroup_fd_array_free,
1354
.map_get_next_key = array_map_get_next_key,
1355
.map_lookup_elem = fd_array_map_lookup_elem,
1356
.map_delete_elem = fd_array_map_delete_elem,
1357
.map_fd_get_ptr = cgroup_fd_array_get_ptr,
1358
.map_fd_put_ptr = cgroup_fd_array_put_ptr,
1359
.map_check_btf = map_check_no_btf,
1360
.map_mem_usage = array_map_mem_usage,
1361
.map_btf_id = &array_map_btf_ids[0],
1362
};
1363
#endif
1364
1365
static struct bpf_map *array_of_map_alloc(union bpf_attr *attr)
1366
{
1367
struct bpf_map *map, *inner_map_meta;
1368
1369
inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd);
1370
if (IS_ERR(inner_map_meta))
1371
return inner_map_meta;
1372
1373
map = array_map_alloc(attr);
1374
if (IS_ERR(map)) {
1375
bpf_map_meta_free(inner_map_meta);
1376
return map;
1377
}
1378
1379
map->inner_map_meta = inner_map_meta;
1380
1381
return map;
1382
}
1383
1384
static void array_of_map_free(struct bpf_map *map)
1385
{
1386
/* map->inner_map_meta is only accessed by syscall which
1387
* is protected by fdget/fdput.
1388
*/
1389
bpf_map_meta_free(map->inner_map_meta);
1390
bpf_fd_array_map_clear(map, false);
1391
fd_array_map_free(map);
1392
}
1393
1394
static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
1395
{
1396
struct bpf_map **inner_map = array_map_lookup_elem(map, key);
1397
1398
if (!inner_map)
1399
return NULL;
1400
1401
return READ_ONCE(*inner_map);
1402
}
1403
1404
static int array_of_map_gen_lookup(struct bpf_map *map,
1405
struct bpf_insn *insn_buf)
1406
{
1407
struct bpf_array *array = container_of(map, struct bpf_array, map);
1408
u32 elem_size = array->elem_size;
1409
struct bpf_insn *insn = insn_buf;
1410
const int ret = BPF_REG_0;
1411
const int map_ptr = BPF_REG_1;
1412
const int index = BPF_REG_2;
1413
1414
*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
1415
*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
1416
if (!map->bypass_spec_v1) {
1417
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6);
1418
*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
1419
} else {
1420
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
1421
}
1422
if (is_power_of_2(elem_size))
1423
*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
1424
else
1425
*insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
1426
*insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
1427
*insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0);
1428
*insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1);
1429
*insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
1430
*insn++ = BPF_MOV64_IMM(ret, 0);
1431
1432
return insn - insn_buf;
1433
}
1434
1435
const struct bpf_map_ops array_of_maps_map_ops = {
1436
.map_alloc_check = fd_array_map_alloc_check,
1437
.map_alloc = array_of_map_alloc,
1438
.map_free = array_of_map_free,
1439
.map_get_next_key = array_map_get_next_key,
1440
.map_lookup_elem = array_of_map_lookup_elem,
1441
.map_delete_elem = fd_array_map_delete_elem,
1442
.map_fd_get_ptr = bpf_map_fd_get_ptr,
1443
.map_fd_put_ptr = bpf_map_fd_put_ptr,
1444
.map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
1445
.map_gen_lookup = array_of_map_gen_lookup,
1446
.map_lookup_batch = generic_map_lookup_batch,
1447
.map_update_batch = generic_map_update_batch,
1448
.map_check_btf = map_check_no_btf,
1449
.map_mem_usage = array_map_mem_usage,
1450
.map_btf_id = &array_map_btf_ids[0],
1451
};
1452
1453