Path: blob/master/tools/testing/selftests/bpf/benchs/bench_trigger.c
29270 views
// SPDX-License-Identifier: GPL-2.01/* Copyright (c) 2020 Facebook */2#define _GNU_SOURCE3#include <argp.h>4#include <unistd.h>5#include <stdint.h>6#include "bpf_util.h"7#include "bench.h"8#include "trigger_bench.skel.h"9#include "trace_helpers.h"1011#define MAX_TRIG_BATCH_ITERS 10001213static struct {14__u32 batch_iters;15} args = {16.batch_iters = 100,17};1819enum {20ARG_TRIG_BATCH_ITERS = 7000,21};2223static const struct argp_option opts[] = {24{ "trig-batch-iters", ARG_TRIG_BATCH_ITERS, "BATCH_ITER_CNT", 0,25"Number of in-kernel iterations per one driver test run"},26{},27};2829static error_t parse_arg(int key, char *arg, struct argp_state *state)30{31long ret;3233switch (key) {34case ARG_TRIG_BATCH_ITERS:35ret = strtol(arg, NULL, 10);36if (ret < 1 || ret > MAX_TRIG_BATCH_ITERS) {37fprintf(stderr, "invalid --trig-batch-iters value (should be between %d and %d)\n",381, MAX_TRIG_BATCH_ITERS);39argp_usage(state);40}41args.batch_iters = ret;42break;43default:44return ARGP_ERR_UNKNOWN;45}4647return 0;48}4950const struct argp bench_trigger_batch_argp = {51.options = opts,52.parser = parse_arg,53};5455/* adjust slot shift in inc_hits() if changing */56#define MAX_BUCKETS 2565758#pragma GCC diagnostic ignored "-Wattributes"5960/* BPF triggering benchmarks */61static struct trigger_ctx {62struct trigger_bench *skel;63bool usermode_counters;64int driver_prog_fd;65} ctx;6667static struct counter base_hits[MAX_BUCKETS];6869static __always_inline void inc_counter(struct counter *counters)70{71static __thread int tid = 0;72unsigned slot;7374if (unlikely(tid == 0))75tid = sys_gettid();7677/* multiplicative hashing, it's fast */78slot = 2654435769U * tid;79slot >>= 24;8081atomic_inc(&base_hits[slot].value); /* use highest byte as an index */82}8384static long sum_and_reset_counters(struct counter *counters)85{86int i;87long sum = 0;8889for (i = 0; i < MAX_BUCKETS; i++)90sum += atomic_swap(&counters[i].value, 0);91return sum;92}9394static void trigger_validate(void)95{96if (env.consumer_cnt != 0) {97fprintf(stderr, "benchmark doesn't support consumer!\n");98exit(1);99}100}101102static void *trigger_producer(void *input)103{104if (ctx.usermode_counters) {105while (true) {106(void)syscall(__NR_getpgid);107inc_counter(base_hits);108}109} else {110while (true)111(void)syscall(__NR_getpgid);112}113return NULL;114}115116static void *trigger_producer_batch(void *input)117{118int fd = ctx.driver_prog_fd ?: bpf_program__fd(ctx.skel->progs.trigger_driver);119120while (true)121bpf_prog_test_run_opts(fd, NULL);122123return NULL;124}125126static void trigger_measure(struct bench_res *res)127{128if (ctx.usermode_counters)129res->hits = sum_and_reset_counters(base_hits);130else131res->hits = sum_and_reset_counters(ctx.skel->bss->hits);132}133134static void setup_ctx(void)135{136setup_libbpf();137138ctx.skel = trigger_bench__open();139if (!ctx.skel) {140fprintf(stderr, "failed to open skeleton\n");141exit(1);142}143144/* default "driver" BPF program */145bpf_program__set_autoload(ctx.skel->progs.trigger_driver, true);146147ctx.skel->rodata->batch_iters = args.batch_iters;148}149150static void load_ctx(void)151{152int err;153154err = trigger_bench__load(ctx.skel);155if (err) {156fprintf(stderr, "failed to open skeleton\n");157exit(1);158}159}160161static void attach_bpf(struct bpf_program *prog)162{163struct bpf_link *link;164165link = bpf_program__attach(prog);166if (!link) {167fprintf(stderr, "failed to attach program!\n");168exit(1);169}170}171172static void trigger_syscall_count_setup(void)173{174ctx.usermode_counters = true;175}176177/* Batched, staying mostly in-kernel triggering setups */178static void trigger_kernel_count_setup(void)179{180setup_ctx();181bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);182bpf_program__set_autoload(ctx.skel->progs.trigger_count, true);183load_ctx();184/* override driver program */185ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_count);186}187188static void trigger_kprobe_setup(void)189{190setup_ctx();191bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kprobe, true);192load_ctx();193attach_bpf(ctx.skel->progs.bench_trigger_kprobe);194}195196static void trigger_kretprobe_setup(void)197{198setup_ctx();199bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kretprobe, true);200load_ctx();201attach_bpf(ctx.skel->progs.bench_trigger_kretprobe);202}203204static void trigger_kprobe_multi_setup(void)205{206setup_ctx();207bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kprobe_multi, true);208load_ctx();209attach_bpf(ctx.skel->progs.bench_trigger_kprobe_multi);210}211212static void trigger_kretprobe_multi_setup(void)213{214setup_ctx();215bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kretprobe_multi, true);216load_ctx();217attach_bpf(ctx.skel->progs.bench_trigger_kretprobe_multi);218}219220static void trigger_fentry_setup(void)221{222setup_ctx();223bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fentry, true);224load_ctx();225attach_bpf(ctx.skel->progs.bench_trigger_fentry);226}227228static void attach_ksyms_all(struct bpf_program *empty, bool kretprobe)229{230LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);231char **syms = NULL;232size_t cnt = 0;233234/* Some recursive functions will be skipped in235* bpf_get_ksyms -> skip_entry, as they can introduce sufficient236* overhead. However, it's difficut to skip all the recursive237* functions for a debug kernel.238*239* So, don't run the kprobe-multi-all and kretprobe-multi-all on240* a debug kernel.241*/242if (bpf_get_ksyms(&syms, &cnt, true)) {243fprintf(stderr, "failed to get ksyms\n");244exit(1);245}246247opts.syms = (const char **) syms;248opts.cnt = cnt;249opts.retprobe = kretprobe;250/* attach empty to all the kernel functions except bpf_get_numa_node_id. */251if (!bpf_program__attach_kprobe_multi_opts(empty, NULL, &opts)) {252fprintf(stderr, "failed to attach bpf_program__attach_kprobe_multi_opts to all\n");253exit(1);254}255}256257static void trigger_kprobe_multi_all_setup(void)258{259struct bpf_program *prog, *empty;260261setup_ctx();262empty = ctx.skel->progs.bench_kprobe_multi_empty;263prog = ctx.skel->progs.bench_trigger_kprobe_multi;264bpf_program__set_autoload(empty, true);265bpf_program__set_autoload(prog, true);266load_ctx();267268attach_ksyms_all(empty, false);269attach_bpf(prog);270}271272static void trigger_kretprobe_multi_all_setup(void)273{274struct bpf_program *prog, *empty;275276setup_ctx();277empty = ctx.skel->progs.bench_kretprobe_multi_empty;278prog = ctx.skel->progs.bench_trigger_kretprobe_multi;279bpf_program__set_autoload(empty, true);280bpf_program__set_autoload(prog, true);281load_ctx();282283attach_ksyms_all(empty, true);284attach_bpf(prog);285}286287static void trigger_fexit_setup(void)288{289setup_ctx();290bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fexit, true);291load_ctx();292attach_bpf(ctx.skel->progs.bench_trigger_fexit);293}294295static void trigger_fmodret_setup(void)296{297setup_ctx();298bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);299bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true);300bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fmodret, true);301load_ctx();302/* override driver program */303ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc);304attach_bpf(ctx.skel->progs.bench_trigger_fmodret);305}306307static void trigger_tp_setup(void)308{309setup_ctx();310bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);311bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true);312bpf_program__set_autoload(ctx.skel->progs.bench_trigger_tp, true);313load_ctx();314/* override driver program */315ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc);316attach_bpf(ctx.skel->progs.bench_trigger_tp);317}318319static void trigger_rawtp_setup(void)320{321setup_ctx();322bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);323bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true);324bpf_program__set_autoload(ctx.skel->progs.bench_trigger_rawtp, true);325load_ctx();326/* override driver program */327ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc);328attach_bpf(ctx.skel->progs.bench_trigger_rawtp);329}330331/* make sure call is not inlined and not avoided by compiler, so __weak and332* inline asm volatile in the body of the function333*334* There is a performance difference between uprobing at nop location vs other335* instructions. So use two different targets, one of which starts with nop336* and another doesn't.337*338* GCC doesn't generate stack setup preamble for these functions due to them339* having no input arguments and doing nothing in the body.340*/341__nocf_check __weak void uprobe_target_nop(void)342{343asm volatile ("nop");344}345346__weak void opaque_noop_func(void)347{348}349350__nocf_check __weak int uprobe_target_push(void)351{352/* overhead of function call is negligible compared to uprobe353* triggering, so this shouldn't affect benchmark results much354*/355opaque_noop_func();356return 1;357}358359__nocf_check __weak void uprobe_target_ret(void)360{361asm volatile ("");362}363364static void *uprobe_producer_count(void *input)365{366while (true) {367uprobe_target_nop();368inc_counter(base_hits);369}370return NULL;371}372373static void *uprobe_producer_nop(void *input)374{375while (true)376uprobe_target_nop();377return NULL;378}379380static void *uprobe_producer_push(void *input)381{382while (true)383uprobe_target_push();384return NULL;385}386387static void *uprobe_producer_ret(void *input)388{389while (true)390uprobe_target_ret();391return NULL;392}393394#ifdef __x86_64__395__nocf_check __weak void uprobe_target_nop5(void)396{397asm volatile (".byte 0x0f, 0x1f, 0x44, 0x00, 0x00");398}399400static void *uprobe_producer_nop5(void *input)401{402while (true)403uprobe_target_nop5();404return NULL;405}406#endif407408static void usetup(bool use_retprobe, bool use_multi, void *target_addr)409{410size_t uprobe_offset;411struct bpf_link *link;412int err;413414setup_libbpf();415416ctx.skel = trigger_bench__open();417if (!ctx.skel) {418fprintf(stderr, "failed to open skeleton\n");419exit(1);420}421422if (use_multi)423bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe_multi, true);424else425bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe, true);426427err = trigger_bench__load(ctx.skel);428if (err) {429fprintf(stderr, "failed to load skeleton\n");430exit(1);431}432433uprobe_offset = get_uprobe_offset(target_addr);434if (use_multi) {435LIBBPF_OPTS(bpf_uprobe_multi_opts, opts,436.retprobe = use_retprobe,437.cnt = 1,438.offsets = &uprobe_offset,439);440link = bpf_program__attach_uprobe_multi(441ctx.skel->progs.bench_trigger_uprobe_multi,442-1 /* all PIDs */, "/proc/self/exe", NULL, &opts);443ctx.skel->links.bench_trigger_uprobe_multi = link;444} else {445link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe,446use_retprobe,447-1 /* all PIDs */,448"/proc/self/exe",449uprobe_offset);450ctx.skel->links.bench_trigger_uprobe = link;451}452if (!link) {453fprintf(stderr, "failed to attach %s!\n", use_multi ? "multi-uprobe" : "uprobe");454exit(1);455}456}457458static void usermode_count_setup(void)459{460ctx.usermode_counters = true;461}462463static void uprobe_nop_setup(void)464{465usetup(false, false /* !use_multi */, &uprobe_target_nop);466}467468static void uretprobe_nop_setup(void)469{470usetup(true, false /* !use_multi */, &uprobe_target_nop);471}472473static void uprobe_push_setup(void)474{475usetup(false, false /* !use_multi */, &uprobe_target_push);476}477478static void uretprobe_push_setup(void)479{480usetup(true, false /* !use_multi */, &uprobe_target_push);481}482483static void uprobe_ret_setup(void)484{485usetup(false, false /* !use_multi */, &uprobe_target_ret);486}487488static void uretprobe_ret_setup(void)489{490usetup(true, false /* !use_multi */, &uprobe_target_ret);491}492493static void uprobe_multi_nop_setup(void)494{495usetup(false, true /* use_multi */, &uprobe_target_nop);496}497498static void uretprobe_multi_nop_setup(void)499{500usetup(true, true /* use_multi */, &uprobe_target_nop);501}502503static void uprobe_multi_push_setup(void)504{505usetup(false, true /* use_multi */, &uprobe_target_push);506}507508static void uretprobe_multi_push_setup(void)509{510usetup(true, true /* use_multi */, &uprobe_target_push);511}512513static void uprobe_multi_ret_setup(void)514{515usetup(false, true /* use_multi */, &uprobe_target_ret);516}517518static void uretprobe_multi_ret_setup(void)519{520usetup(true, true /* use_multi */, &uprobe_target_ret);521}522523#ifdef __x86_64__524static void uprobe_nop5_setup(void)525{526usetup(false, false /* !use_multi */, &uprobe_target_nop5);527}528529static void uretprobe_nop5_setup(void)530{531usetup(true, false /* !use_multi */, &uprobe_target_nop5);532}533534static void uprobe_multi_nop5_setup(void)535{536usetup(false, true /* use_multi */, &uprobe_target_nop5);537}538539static void uretprobe_multi_nop5_setup(void)540{541usetup(true, true /* use_multi */, &uprobe_target_nop5);542}543#endif544545const struct bench bench_trig_syscall_count = {546.name = "trig-syscall-count",547.validate = trigger_validate,548.setup = trigger_syscall_count_setup,549.producer_thread = trigger_producer,550.measure = trigger_measure,551.report_progress = hits_drops_report_progress,552.report_final = hits_drops_report_final,553};554555/* batched (staying mostly in kernel) kprobe/fentry benchmarks */556#define BENCH_TRIG_KERNEL(KIND, NAME) \557const struct bench bench_trig_##KIND = { \558.name = "trig-" NAME, \559.setup = trigger_##KIND##_setup, \560.producer_thread = trigger_producer_batch, \561.measure = trigger_measure, \562.report_progress = hits_drops_report_progress, \563.report_final = hits_drops_report_final, \564.argp = &bench_trigger_batch_argp, \565}566567BENCH_TRIG_KERNEL(kernel_count, "kernel-count");568BENCH_TRIG_KERNEL(kprobe, "kprobe");569BENCH_TRIG_KERNEL(kretprobe, "kretprobe");570BENCH_TRIG_KERNEL(kprobe_multi, "kprobe-multi");571BENCH_TRIG_KERNEL(kretprobe_multi, "kretprobe-multi");572BENCH_TRIG_KERNEL(fentry, "fentry");573BENCH_TRIG_KERNEL(kprobe_multi_all, "kprobe-multi-all");574BENCH_TRIG_KERNEL(kretprobe_multi_all, "kretprobe-multi-all");575BENCH_TRIG_KERNEL(fexit, "fexit");576BENCH_TRIG_KERNEL(fmodret, "fmodret");577BENCH_TRIG_KERNEL(tp, "tp");578BENCH_TRIG_KERNEL(rawtp, "rawtp");579580/* uprobe benchmarks */581#define BENCH_TRIG_USERMODE(KIND, PRODUCER, NAME) \582const struct bench bench_trig_##KIND = { \583.name = "trig-" NAME, \584.validate = trigger_validate, \585.setup = KIND##_setup, \586.producer_thread = uprobe_producer_##PRODUCER, \587.measure = trigger_measure, \588.report_progress = hits_drops_report_progress, \589.report_final = hits_drops_report_final, \590}591592BENCH_TRIG_USERMODE(usermode_count, count, "usermode-count");593BENCH_TRIG_USERMODE(uprobe_nop, nop, "uprobe-nop");594BENCH_TRIG_USERMODE(uprobe_push, push, "uprobe-push");595BENCH_TRIG_USERMODE(uprobe_ret, ret, "uprobe-ret");596BENCH_TRIG_USERMODE(uretprobe_nop, nop, "uretprobe-nop");597BENCH_TRIG_USERMODE(uretprobe_push, push, "uretprobe-push");598BENCH_TRIG_USERMODE(uretprobe_ret, ret, "uretprobe-ret");599BENCH_TRIG_USERMODE(uprobe_multi_nop, nop, "uprobe-multi-nop");600BENCH_TRIG_USERMODE(uprobe_multi_push, push, "uprobe-multi-push");601BENCH_TRIG_USERMODE(uprobe_multi_ret, ret, "uprobe-multi-ret");602BENCH_TRIG_USERMODE(uretprobe_multi_nop, nop, "uretprobe-multi-nop");603BENCH_TRIG_USERMODE(uretprobe_multi_push, push, "uretprobe-multi-push");604BENCH_TRIG_USERMODE(uretprobe_multi_ret, ret, "uretprobe-multi-ret");605#ifdef __x86_64__606BENCH_TRIG_USERMODE(uprobe_nop5, nop5, "uprobe-nop5");607BENCH_TRIG_USERMODE(uretprobe_nop5, nop5, "uretprobe-nop5");608BENCH_TRIG_USERMODE(uprobe_multi_nop5, nop5, "uprobe-multi-nop5");609BENCH_TRIG_USERMODE(uretprobe_multi_nop5, nop5, "uretprobe-multi-nop5");610#endif611612613