Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/tools/testing/selftests/bpf/benchs/bench_trigger.c
29270 views
1
// SPDX-License-Identifier: GPL-2.0
2
/* Copyright (c) 2020 Facebook */
3
#define _GNU_SOURCE
4
#include <argp.h>
5
#include <unistd.h>
6
#include <stdint.h>
7
#include "bpf_util.h"
8
#include "bench.h"
9
#include "trigger_bench.skel.h"
10
#include "trace_helpers.h"
11
12
#define MAX_TRIG_BATCH_ITERS 1000
13
14
static struct {
15
__u32 batch_iters;
16
} args = {
17
.batch_iters = 100,
18
};
19
20
enum {
21
ARG_TRIG_BATCH_ITERS = 7000,
22
};
23
24
static const struct argp_option opts[] = {
25
{ "trig-batch-iters", ARG_TRIG_BATCH_ITERS, "BATCH_ITER_CNT", 0,
26
"Number of in-kernel iterations per one driver test run"},
27
{},
28
};
29
30
static error_t parse_arg(int key, char *arg, struct argp_state *state)
31
{
32
long ret;
33
34
switch (key) {
35
case ARG_TRIG_BATCH_ITERS:
36
ret = strtol(arg, NULL, 10);
37
if (ret < 1 || ret > MAX_TRIG_BATCH_ITERS) {
38
fprintf(stderr, "invalid --trig-batch-iters value (should be between %d and %d)\n",
39
1, MAX_TRIG_BATCH_ITERS);
40
argp_usage(state);
41
}
42
args.batch_iters = ret;
43
break;
44
default:
45
return ARGP_ERR_UNKNOWN;
46
}
47
48
return 0;
49
}
50
51
const struct argp bench_trigger_batch_argp = {
52
.options = opts,
53
.parser = parse_arg,
54
};
55
56
/* adjust slot shift in inc_hits() if changing */
57
#define MAX_BUCKETS 256
58
59
#pragma GCC diagnostic ignored "-Wattributes"
60
61
/* BPF triggering benchmarks */
62
static struct trigger_ctx {
63
struct trigger_bench *skel;
64
bool usermode_counters;
65
int driver_prog_fd;
66
} ctx;
67
68
static struct counter base_hits[MAX_BUCKETS];
69
70
static __always_inline void inc_counter(struct counter *counters)
71
{
72
static __thread int tid = 0;
73
unsigned slot;
74
75
if (unlikely(tid == 0))
76
tid = sys_gettid();
77
78
/* multiplicative hashing, it's fast */
79
slot = 2654435769U * tid;
80
slot >>= 24;
81
82
atomic_inc(&base_hits[slot].value); /* use highest byte as an index */
83
}
84
85
static long sum_and_reset_counters(struct counter *counters)
86
{
87
int i;
88
long sum = 0;
89
90
for (i = 0; i < MAX_BUCKETS; i++)
91
sum += atomic_swap(&counters[i].value, 0);
92
return sum;
93
}
94
95
static void trigger_validate(void)
96
{
97
if (env.consumer_cnt != 0) {
98
fprintf(stderr, "benchmark doesn't support consumer!\n");
99
exit(1);
100
}
101
}
102
103
static void *trigger_producer(void *input)
104
{
105
if (ctx.usermode_counters) {
106
while (true) {
107
(void)syscall(__NR_getpgid);
108
inc_counter(base_hits);
109
}
110
} else {
111
while (true)
112
(void)syscall(__NR_getpgid);
113
}
114
return NULL;
115
}
116
117
static void *trigger_producer_batch(void *input)
118
{
119
int fd = ctx.driver_prog_fd ?: bpf_program__fd(ctx.skel->progs.trigger_driver);
120
121
while (true)
122
bpf_prog_test_run_opts(fd, NULL);
123
124
return NULL;
125
}
126
127
static void trigger_measure(struct bench_res *res)
128
{
129
if (ctx.usermode_counters)
130
res->hits = sum_and_reset_counters(base_hits);
131
else
132
res->hits = sum_and_reset_counters(ctx.skel->bss->hits);
133
}
134
135
static void setup_ctx(void)
136
{
137
setup_libbpf();
138
139
ctx.skel = trigger_bench__open();
140
if (!ctx.skel) {
141
fprintf(stderr, "failed to open skeleton\n");
142
exit(1);
143
}
144
145
/* default "driver" BPF program */
146
bpf_program__set_autoload(ctx.skel->progs.trigger_driver, true);
147
148
ctx.skel->rodata->batch_iters = args.batch_iters;
149
}
150
151
static void load_ctx(void)
152
{
153
int err;
154
155
err = trigger_bench__load(ctx.skel);
156
if (err) {
157
fprintf(stderr, "failed to open skeleton\n");
158
exit(1);
159
}
160
}
161
162
static void attach_bpf(struct bpf_program *prog)
163
{
164
struct bpf_link *link;
165
166
link = bpf_program__attach(prog);
167
if (!link) {
168
fprintf(stderr, "failed to attach program!\n");
169
exit(1);
170
}
171
}
172
173
static void trigger_syscall_count_setup(void)
174
{
175
ctx.usermode_counters = true;
176
}
177
178
/* Batched, staying mostly in-kernel triggering setups */
179
static void trigger_kernel_count_setup(void)
180
{
181
setup_ctx();
182
bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
183
bpf_program__set_autoload(ctx.skel->progs.trigger_count, true);
184
load_ctx();
185
/* override driver program */
186
ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_count);
187
}
188
189
static void trigger_kprobe_setup(void)
190
{
191
setup_ctx();
192
bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kprobe, true);
193
load_ctx();
194
attach_bpf(ctx.skel->progs.bench_trigger_kprobe);
195
}
196
197
static void trigger_kretprobe_setup(void)
198
{
199
setup_ctx();
200
bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kretprobe, true);
201
load_ctx();
202
attach_bpf(ctx.skel->progs.bench_trigger_kretprobe);
203
}
204
205
static void trigger_kprobe_multi_setup(void)
206
{
207
setup_ctx();
208
bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kprobe_multi, true);
209
load_ctx();
210
attach_bpf(ctx.skel->progs.bench_trigger_kprobe_multi);
211
}
212
213
static void trigger_kretprobe_multi_setup(void)
214
{
215
setup_ctx();
216
bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kretprobe_multi, true);
217
load_ctx();
218
attach_bpf(ctx.skel->progs.bench_trigger_kretprobe_multi);
219
}
220
221
static void trigger_fentry_setup(void)
222
{
223
setup_ctx();
224
bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fentry, true);
225
load_ctx();
226
attach_bpf(ctx.skel->progs.bench_trigger_fentry);
227
}
228
229
static void attach_ksyms_all(struct bpf_program *empty, bool kretprobe)
230
{
231
LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
232
char **syms = NULL;
233
size_t cnt = 0;
234
235
/* Some recursive functions will be skipped in
236
* bpf_get_ksyms -> skip_entry, as they can introduce sufficient
237
* overhead. However, it's difficut to skip all the recursive
238
* functions for a debug kernel.
239
*
240
* So, don't run the kprobe-multi-all and kretprobe-multi-all on
241
* a debug kernel.
242
*/
243
if (bpf_get_ksyms(&syms, &cnt, true)) {
244
fprintf(stderr, "failed to get ksyms\n");
245
exit(1);
246
}
247
248
opts.syms = (const char **) syms;
249
opts.cnt = cnt;
250
opts.retprobe = kretprobe;
251
/* attach empty to all the kernel functions except bpf_get_numa_node_id. */
252
if (!bpf_program__attach_kprobe_multi_opts(empty, NULL, &opts)) {
253
fprintf(stderr, "failed to attach bpf_program__attach_kprobe_multi_opts to all\n");
254
exit(1);
255
}
256
}
257
258
static void trigger_kprobe_multi_all_setup(void)
259
{
260
struct bpf_program *prog, *empty;
261
262
setup_ctx();
263
empty = ctx.skel->progs.bench_kprobe_multi_empty;
264
prog = ctx.skel->progs.bench_trigger_kprobe_multi;
265
bpf_program__set_autoload(empty, true);
266
bpf_program__set_autoload(prog, true);
267
load_ctx();
268
269
attach_ksyms_all(empty, false);
270
attach_bpf(prog);
271
}
272
273
static void trigger_kretprobe_multi_all_setup(void)
274
{
275
struct bpf_program *prog, *empty;
276
277
setup_ctx();
278
empty = ctx.skel->progs.bench_kretprobe_multi_empty;
279
prog = ctx.skel->progs.bench_trigger_kretprobe_multi;
280
bpf_program__set_autoload(empty, true);
281
bpf_program__set_autoload(prog, true);
282
load_ctx();
283
284
attach_ksyms_all(empty, true);
285
attach_bpf(prog);
286
}
287
288
static void trigger_fexit_setup(void)
289
{
290
setup_ctx();
291
bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fexit, true);
292
load_ctx();
293
attach_bpf(ctx.skel->progs.bench_trigger_fexit);
294
}
295
296
static void trigger_fmodret_setup(void)
297
{
298
setup_ctx();
299
bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
300
bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true);
301
bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fmodret, true);
302
load_ctx();
303
/* override driver program */
304
ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc);
305
attach_bpf(ctx.skel->progs.bench_trigger_fmodret);
306
}
307
308
static void trigger_tp_setup(void)
309
{
310
setup_ctx();
311
bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
312
bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true);
313
bpf_program__set_autoload(ctx.skel->progs.bench_trigger_tp, true);
314
load_ctx();
315
/* override driver program */
316
ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc);
317
attach_bpf(ctx.skel->progs.bench_trigger_tp);
318
}
319
320
static void trigger_rawtp_setup(void)
321
{
322
setup_ctx();
323
bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
324
bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true);
325
bpf_program__set_autoload(ctx.skel->progs.bench_trigger_rawtp, true);
326
load_ctx();
327
/* override driver program */
328
ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc);
329
attach_bpf(ctx.skel->progs.bench_trigger_rawtp);
330
}
331
332
/* make sure call is not inlined and not avoided by compiler, so __weak and
333
* inline asm volatile in the body of the function
334
*
335
* There is a performance difference between uprobing at nop location vs other
336
* instructions. So use two different targets, one of which starts with nop
337
* and another doesn't.
338
*
339
* GCC doesn't generate stack setup preamble for these functions due to them
340
* having no input arguments and doing nothing in the body.
341
*/
342
__nocf_check __weak void uprobe_target_nop(void)
343
{
344
asm volatile ("nop");
345
}
346
347
__weak void opaque_noop_func(void)
348
{
349
}
350
351
__nocf_check __weak int uprobe_target_push(void)
352
{
353
/* overhead of function call is negligible compared to uprobe
354
* triggering, so this shouldn't affect benchmark results much
355
*/
356
opaque_noop_func();
357
return 1;
358
}
359
360
__nocf_check __weak void uprobe_target_ret(void)
361
{
362
asm volatile ("");
363
}
364
365
static void *uprobe_producer_count(void *input)
366
{
367
while (true) {
368
uprobe_target_nop();
369
inc_counter(base_hits);
370
}
371
return NULL;
372
}
373
374
static void *uprobe_producer_nop(void *input)
375
{
376
while (true)
377
uprobe_target_nop();
378
return NULL;
379
}
380
381
static void *uprobe_producer_push(void *input)
382
{
383
while (true)
384
uprobe_target_push();
385
return NULL;
386
}
387
388
static void *uprobe_producer_ret(void *input)
389
{
390
while (true)
391
uprobe_target_ret();
392
return NULL;
393
}
394
395
#ifdef __x86_64__
396
__nocf_check __weak void uprobe_target_nop5(void)
397
{
398
asm volatile (".byte 0x0f, 0x1f, 0x44, 0x00, 0x00");
399
}
400
401
static void *uprobe_producer_nop5(void *input)
402
{
403
while (true)
404
uprobe_target_nop5();
405
return NULL;
406
}
407
#endif
408
409
static void usetup(bool use_retprobe, bool use_multi, void *target_addr)
410
{
411
size_t uprobe_offset;
412
struct bpf_link *link;
413
int err;
414
415
setup_libbpf();
416
417
ctx.skel = trigger_bench__open();
418
if (!ctx.skel) {
419
fprintf(stderr, "failed to open skeleton\n");
420
exit(1);
421
}
422
423
if (use_multi)
424
bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe_multi, true);
425
else
426
bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe, true);
427
428
err = trigger_bench__load(ctx.skel);
429
if (err) {
430
fprintf(stderr, "failed to load skeleton\n");
431
exit(1);
432
}
433
434
uprobe_offset = get_uprobe_offset(target_addr);
435
if (use_multi) {
436
LIBBPF_OPTS(bpf_uprobe_multi_opts, opts,
437
.retprobe = use_retprobe,
438
.cnt = 1,
439
.offsets = &uprobe_offset,
440
);
441
link = bpf_program__attach_uprobe_multi(
442
ctx.skel->progs.bench_trigger_uprobe_multi,
443
-1 /* all PIDs */, "/proc/self/exe", NULL, &opts);
444
ctx.skel->links.bench_trigger_uprobe_multi = link;
445
} else {
446
link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe,
447
use_retprobe,
448
-1 /* all PIDs */,
449
"/proc/self/exe",
450
uprobe_offset);
451
ctx.skel->links.bench_trigger_uprobe = link;
452
}
453
if (!link) {
454
fprintf(stderr, "failed to attach %s!\n", use_multi ? "multi-uprobe" : "uprobe");
455
exit(1);
456
}
457
}
458
459
static void usermode_count_setup(void)
460
{
461
ctx.usermode_counters = true;
462
}
463
464
static void uprobe_nop_setup(void)
465
{
466
usetup(false, false /* !use_multi */, &uprobe_target_nop);
467
}
468
469
static void uretprobe_nop_setup(void)
470
{
471
usetup(true, false /* !use_multi */, &uprobe_target_nop);
472
}
473
474
static void uprobe_push_setup(void)
475
{
476
usetup(false, false /* !use_multi */, &uprobe_target_push);
477
}
478
479
static void uretprobe_push_setup(void)
480
{
481
usetup(true, false /* !use_multi */, &uprobe_target_push);
482
}
483
484
static void uprobe_ret_setup(void)
485
{
486
usetup(false, false /* !use_multi */, &uprobe_target_ret);
487
}
488
489
static void uretprobe_ret_setup(void)
490
{
491
usetup(true, false /* !use_multi */, &uprobe_target_ret);
492
}
493
494
static void uprobe_multi_nop_setup(void)
495
{
496
usetup(false, true /* use_multi */, &uprobe_target_nop);
497
}
498
499
static void uretprobe_multi_nop_setup(void)
500
{
501
usetup(true, true /* use_multi */, &uprobe_target_nop);
502
}
503
504
static void uprobe_multi_push_setup(void)
505
{
506
usetup(false, true /* use_multi */, &uprobe_target_push);
507
}
508
509
static void uretprobe_multi_push_setup(void)
510
{
511
usetup(true, true /* use_multi */, &uprobe_target_push);
512
}
513
514
static void uprobe_multi_ret_setup(void)
515
{
516
usetup(false, true /* use_multi */, &uprobe_target_ret);
517
}
518
519
static void uretprobe_multi_ret_setup(void)
520
{
521
usetup(true, true /* use_multi */, &uprobe_target_ret);
522
}
523
524
#ifdef __x86_64__
525
static void uprobe_nop5_setup(void)
526
{
527
usetup(false, false /* !use_multi */, &uprobe_target_nop5);
528
}
529
530
static void uretprobe_nop5_setup(void)
531
{
532
usetup(true, false /* !use_multi */, &uprobe_target_nop5);
533
}
534
535
static void uprobe_multi_nop5_setup(void)
536
{
537
usetup(false, true /* use_multi */, &uprobe_target_nop5);
538
}
539
540
static void uretprobe_multi_nop5_setup(void)
541
{
542
usetup(true, true /* use_multi */, &uprobe_target_nop5);
543
}
544
#endif
545
546
const struct bench bench_trig_syscall_count = {
547
.name = "trig-syscall-count",
548
.validate = trigger_validate,
549
.setup = trigger_syscall_count_setup,
550
.producer_thread = trigger_producer,
551
.measure = trigger_measure,
552
.report_progress = hits_drops_report_progress,
553
.report_final = hits_drops_report_final,
554
};
555
556
/* batched (staying mostly in kernel) kprobe/fentry benchmarks */
557
#define BENCH_TRIG_KERNEL(KIND, NAME) \
558
const struct bench bench_trig_##KIND = { \
559
.name = "trig-" NAME, \
560
.setup = trigger_##KIND##_setup, \
561
.producer_thread = trigger_producer_batch, \
562
.measure = trigger_measure, \
563
.report_progress = hits_drops_report_progress, \
564
.report_final = hits_drops_report_final, \
565
.argp = &bench_trigger_batch_argp, \
566
}
567
568
BENCH_TRIG_KERNEL(kernel_count, "kernel-count");
569
BENCH_TRIG_KERNEL(kprobe, "kprobe");
570
BENCH_TRIG_KERNEL(kretprobe, "kretprobe");
571
BENCH_TRIG_KERNEL(kprobe_multi, "kprobe-multi");
572
BENCH_TRIG_KERNEL(kretprobe_multi, "kretprobe-multi");
573
BENCH_TRIG_KERNEL(fentry, "fentry");
574
BENCH_TRIG_KERNEL(kprobe_multi_all, "kprobe-multi-all");
575
BENCH_TRIG_KERNEL(kretprobe_multi_all, "kretprobe-multi-all");
576
BENCH_TRIG_KERNEL(fexit, "fexit");
577
BENCH_TRIG_KERNEL(fmodret, "fmodret");
578
BENCH_TRIG_KERNEL(tp, "tp");
579
BENCH_TRIG_KERNEL(rawtp, "rawtp");
580
581
/* uprobe benchmarks */
582
#define BENCH_TRIG_USERMODE(KIND, PRODUCER, NAME) \
583
const struct bench bench_trig_##KIND = { \
584
.name = "trig-" NAME, \
585
.validate = trigger_validate, \
586
.setup = KIND##_setup, \
587
.producer_thread = uprobe_producer_##PRODUCER, \
588
.measure = trigger_measure, \
589
.report_progress = hits_drops_report_progress, \
590
.report_final = hits_drops_report_final, \
591
}
592
593
BENCH_TRIG_USERMODE(usermode_count, count, "usermode-count");
594
BENCH_TRIG_USERMODE(uprobe_nop, nop, "uprobe-nop");
595
BENCH_TRIG_USERMODE(uprobe_push, push, "uprobe-push");
596
BENCH_TRIG_USERMODE(uprobe_ret, ret, "uprobe-ret");
597
BENCH_TRIG_USERMODE(uretprobe_nop, nop, "uretprobe-nop");
598
BENCH_TRIG_USERMODE(uretprobe_push, push, "uretprobe-push");
599
BENCH_TRIG_USERMODE(uretprobe_ret, ret, "uretprobe-ret");
600
BENCH_TRIG_USERMODE(uprobe_multi_nop, nop, "uprobe-multi-nop");
601
BENCH_TRIG_USERMODE(uprobe_multi_push, push, "uprobe-multi-push");
602
BENCH_TRIG_USERMODE(uprobe_multi_ret, ret, "uprobe-multi-ret");
603
BENCH_TRIG_USERMODE(uretprobe_multi_nop, nop, "uretprobe-multi-nop");
604
BENCH_TRIG_USERMODE(uretprobe_multi_push, push, "uretprobe-multi-push");
605
BENCH_TRIG_USERMODE(uretprobe_multi_ret, ret, "uretprobe-multi-ret");
606
#ifdef __x86_64__
607
BENCH_TRIG_USERMODE(uprobe_nop5, nop5, "uprobe-nop5");
608
BENCH_TRIG_USERMODE(uretprobe_nop5, nop5, "uretprobe-nop5");
609
BENCH_TRIG_USERMODE(uprobe_multi_nop5, nop5, "uprobe-multi-nop5");
610
BENCH_TRIG_USERMODE(uretprobe_multi_nop5, nop5, "uretprobe-multi-nop5");
611
#endif
612
613