Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/tools/lib/bpf/usdt.bpf.h
29278 views
1
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
2
/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
3
#ifndef __USDT_BPF_H__
4
#define __USDT_BPF_H__
5
6
#include <linux/errno.h>
7
#include "bpf_helpers.h"
8
#include "bpf_tracing.h"
9
10
/* Below types and maps are internal implementation details of libbpf's USDT
11
* support and are subjects to change. Also, bpf_usdt_xxx() API helpers should
12
* be considered an unstable API as well and might be adjusted based on user
13
* feedback from using libbpf's USDT support in production.
14
*/
15
16
/* User can override BPF_USDT_MAX_SPEC_CNT to change default size of internal
17
* map that keeps track of USDT argument specifications. This might be
18
* necessary if there are a lot of USDT attachments.
19
*/
20
#ifndef BPF_USDT_MAX_SPEC_CNT
21
#define BPF_USDT_MAX_SPEC_CNT 256
22
#endif
23
/* User can override BPF_USDT_MAX_IP_CNT to change default size of internal
24
* map that keeps track of IP (memory address) mapping to USDT argument
25
* specification.
26
* Note, if kernel supports BPF cookies, this map is not used and could be
27
* resized all the way to 1 to save a bit of memory.
28
*/
29
#ifndef BPF_USDT_MAX_IP_CNT
30
#define BPF_USDT_MAX_IP_CNT (4 * BPF_USDT_MAX_SPEC_CNT)
31
#endif
32
33
enum __bpf_usdt_arg_type {
34
BPF_USDT_ARG_CONST,
35
BPF_USDT_ARG_REG,
36
BPF_USDT_ARG_REG_DEREF,
37
BPF_USDT_ARG_SIB,
38
};
39
40
/*
41
* This struct layout is designed specifically to be backwards/forward
42
* compatible between libbpf versions for ARG_CONST, ARG_REG, and
43
* ARG_REG_DEREF modes. ARG_SIB requires libbpf v1.7+.
44
*/
45
struct __bpf_usdt_arg_spec {
46
/* u64 scalar interpreted depending on arg_type, see below */
47
__u64 val_off;
48
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
49
/* arg location case, see bpf_usdt_arg() for details */
50
enum __bpf_usdt_arg_type arg_type: 8;
51
/* index register offset within struct pt_regs */
52
__u16 idx_reg_off: 12;
53
/* scale factor for index register (1, 2, 4, or 8) */
54
__u16 scale_bitshift: 4;
55
/* reserved for future use, keeps reg_off offset stable */
56
__u8 __reserved: 8;
57
#else
58
__u8 __reserved: 8;
59
__u16 idx_reg_off: 12;
60
__u16 scale_bitshift: 4;
61
enum __bpf_usdt_arg_type arg_type: 8;
62
#endif
63
/* offset of referenced register within struct pt_regs */
64
short reg_off;
65
/* whether arg should be interpreted as signed value */
66
bool arg_signed;
67
/* number of bits that need to be cleared and, optionally,
68
* sign-extended to cast arguments that are 1, 2, or 4 bytes
69
* long into final 8-byte u64/s64 value returned to user
70
*/
71
char arg_bitshift;
72
};
73
74
/* should match USDT_MAX_ARG_CNT in usdt.c exactly */
75
#define BPF_USDT_MAX_ARG_CNT 12
76
struct __bpf_usdt_spec {
77
struct __bpf_usdt_arg_spec args[BPF_USDT_MAX_ARG_CNT];
78
__u64 usdt_cookie;
79
short arg_cnt;
80
};
81
82
struct {
83
__uint(type, BPF_MAP_TYPE_ARRAY);
84
__uint(max_entries, BPF_USDT_MAX_SPEC_CNT);
85
__type(key, int);
86
__type(value, struct __bpf_usdt_spec);
87
} __bpf_usdt_specs SEC(".maps") __weak;
88
89
struct {
90
__uint(type, BPF_MAP_TYPE_HASH);
91
__uint(max_entries, BPF_USDT_MAX_IP_CNT);
92
__type(key, long);
93
__type(value, __u32);
94
} __bpf_usdt_ip_to_spec_id SEC(".maps") __weak;
95
96
extern const _Bool LINUX_HAS_BPF_COOKIE __kconfig;
97
98
static __always_inline
99
int __bpf_usdt_spec_id(struct pt_regs *ctx)
100
{
101
if (!LINUX_HAS_BPF_COOKIE) {
102
long ip = PT_REGS_IP(ctx);
103
int *spec_id_ptr;
104
105
spec_id_ptr = bpf_map_lookup_elem(&__bpf_usdt_ip_to_spec_id, &ip);
106
return spec_id_ptr ? *spec_id_ptr : -ESRCH;
107
}
108
109
return bpf_get_attach_cookie(ctx);
110
}
111
112
/* Return number of USDT arguments defined for currently traced USDT. */
113
__weak __hidden
114
int bpf_usdt_arg_cnt(struct pt_regs *ctx)
115
{
116
struct __bpf_usdt_spec *spec;
117
int spec_id;
118
119
spec_id = __bpf_usdt_spec_id(ctx);
120
if (spec_id < 0)
121
return -ESRCH;
122
123
spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id);
124
if (!spec)
125
return -ESRCH;
126
127
return spec->arg_cnt;
128
}
129
130
/* Returns the size in bytes of the #*arg_num* (zero-indexed) USDT argument.
131
* Returns negative error if argument is not found or arg_num is invalid.
132
*/
133
static __always_inline
134
int bpf_usdt_arg_size(struct pt_regs *ctx, __u64 arg_num)
135
{
136
struct __bpf_usdt_arg_spec *arg_spec;
137
struct __bpf_usdt_spec *spec;
138
int spec_id;
139
140
spec_id = __bpf_usdt_spec_id(ctx);
141
if (spec_id < 0)
142
return -ESRCH;
143
144
spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id);
145
if (!spec)
146
return -ESRCH;
147
148
if (arg_num >= BPF_USDT_MAX_ARG_CNT)
149
return -ENOENT;
150
barrier_var(arg_num);
151
if (arg_num >= spec->arg_cnt)
152
return -ENOENT;
153
154
arg_spec = &spec->args[arg_num];
155
156
/* arg_spec->arg_bitshift = 64 - arg_sz * 8
157
* so: arg_sz = (64 - arg_spec->arg_bitshift) / 8
158
*/
159
return (unsigned int)(64 - arg_spec->arg_bitshift) / 8;
160
}
161
162
/* Fetch USDT argument #*arg_num* (zero-indexed) and put its value into *res.
163
* Returns 0 on success; negative error, otherwise.
164
* On error *res is guaranteed to be set to zero.
165
*/
166
__weak __hidden
167
int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res)
168
{
169
struct __bpf_usdt_spec *spec;
170
struct __bpf_usdt_arg_spec *arg_spec;
171
unsigned long val, idx;
172
int err, spec_id;
173
174
*res = 0;
175
176
spec_id = __bpf_usdt_spec_id(ctx);
177
if (spec_id < 0)
178
return -ESRCH;
179
180
spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id);
181
if (!spec)
182
return -ESRCH;
183
184
if (arg_num >= BPF_USDT_MAX_ARG_CNT)
185
return -ENOENT;
186
barrier_var(arg_num);
187
if (arg_num >= spec->arg_cnt)
188
return -ENOENT;
189
190
arg_spec = &spec->args[arg_num];
191
switch (arg_spec->arg_type) {
192
case BPF_USDT_ARG_CONST:
193
/* Arg is just a constant ("-4@$-9" in USDT arg spec).
194
* value is recorded in arg_spec->val_off directly.
195
*/
196
val = arg_spec->val_off;
197
break;
198
case BPF_USDT_ARG_REG:
199
/* Arg is in a register (e.g, "8@%rax" in USDT arg spec),
200
* so we read the contents of that register directly from
201
* struct pt_regs. To keep things simple user-space parts
202
* record offsetof(struct pt_regs, <regname>) in arg_spec->reg_off.
203
*/
204
err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off);
205
if (err)
206
return err;
207
break;
208
case BPF_USDT_ARG_REG_DEREF:
209
/* Arg is in memory addressed by register, plus some offset
210
* (e.g., "-4@-1204(%rbp)" in USDT arg spec). Register is
211
* identified like with BPF_USDT_ARG_REG case, and the offset
212
* is in arg_spec->val_off. We first fetch register contents
213
* from pt_regs, then do another user-space probe read to
214
* fetch argument value itself.
215
*/
216
err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off);
217
if (err)
218
return err;
219
err = bpf_probe_read_user(&val, sizeof(val), (void *)val + arg_spec->val_off);
220
if (err)
221
return err;
222
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
223
val >>= arg_spec->arg_bitshift;
224
#endif
225
break;
226
case BPF_USDT_ARG_SIB:
227
/* Arg is in memory addressed by SIB (Scale-Index-Base) mode
228
* (e.g., "-1@-96(%rbp,%rax,8)" in USDT arg spec). We first
229
* fetch the base register contents and the index register
230
* contents from pt_regs. Then we calculate the final address
231
* as base + (index * scale) + offset, and do a user-space
232
* probe read to fetch the argument value.
233
*/
234
err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off);
235
if (err)
236
return err;
237
err = bpf_probe_read_kernel(&idx, sizeof(idx), (void *)ctx + arg_spec->idx_reg_off);
238
if (err)
239
return err;
240
err = bpf_probe_read_user(&val, sizeof(val), (void *)(val + (idx << arg_spec->scale_bitshift) + arg_spec->val_off));
241
if (err)
242
return err;
243
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
244
val >>= arg_spec->arg_bitshift;
245
#endif
246
break;
247
default:
248
return -EINVAL;
249
}
250
251
/* cast arg from 1, 2, or 4 bytes to final 8 byte size clearing
252
* necessary upper arg_bitshift bits, with sign extension if argument
253
* is signed
254
*/
255
val <<= arg_spec->arg_bitshift;
256
if (arg_spec->arg_signed)
257
val = ((long)val) >> arg_spec->arg_bitshift;
258
else
259
val = val >> arg_spec->arg_bitshift;
260
*res = val;
261
return 0;
262
}
263
264
/* Retrieve user-specified cookie value provided during attach as
265
* bpf_usdt_opts.usdt_cookie. This serves the same purpose as BPF cookie
266
* returned by bpf_get_attach_cookie(). Libbpf's support for USDT is itself
267
* utilizing BPF cookies internally, so user can't use BPF cookie directly
268
* for USDT programs and has to use bpf_usdt_cookie() API instead.
269
*/
270
__weak __hidden
271
long bpf_usdt_cookie(struct pt_regs *ctx)
272
{
273
struct __bpf_usdt_spec *spec;
274
int spec_id;
275
276
spec_id = __bpf_usdt_spec_id(ctx);
277
if (spec_id < 0)
278
return 0;
279
280
spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id);
281
if (!spec)
282
return 0;
283
284
return spec->usdt_cookie;
285
}
286
287
/* we rely on ___bpf_apply() and ___bpf_narg() macros already defined in bpf_tracing.h */
288
#define ___bpf_usdt_args0() ctx
289
#define ___bpf_usdt_args1(x) ___bpf_usdt_args0(), ({ long _x; bpf_usdt_arg(ctx, 0, &_x); _x; })
290
#define ___bpf_usdt_args2(x, args...) ___bpf_usdt_args1(args), ({ long _x; bpf_usdt_arg(ctx, 1, &_x); _x; })
291
#define ___bpf_usdt_args3(x, args...) ___bpf_usdt_args2(args), ({ long _x; bpf_usdt_arg(ctx, 2, &_x); _x; })
292
#define ___bpf_usdt_args4(x, args...) ___bpf_usdt_args3(args), ({ long _x; bpf_usdt_arg(ctx, 3, &_x); _x; })
293
#define ___bpf_usdt_args5(x, args...) ___bpf_usdt_args4(args), ({ long _x; bpf_usdt_arg(ctx, 4, &_x); _x; })
294
#define ___bpf_usdt_args6(x, args...) ___bpf_usdt_args5(args), ({ long _x; bpf_usdt_arg(ctx, 5, &_x); _x; })
295
#define ___bpf_usdt_args7(x, args...) ___bpf_usdt_args6(args), ({ long _x; bpf_usdt_arg(ctx, 6, &_x); _x; })
296
#define ___bpf_usdt_args8(x, args...) ___bpf_usdt_args7(args), ({ long _x; bpf_usdt_arg(ctx, 7, &_x); _x; })
297
#define ___bpf_usdt_args9(x, args...) ___bpf_usdt_args8(args), ({ long _x; bpf_usdt_arg(ctx, 8, &_x); _x; })
298
#define ___bpf_usdt_args10(x, args...) ___bpf_usdt_args9(args), ({ long _x; bpf_usdt_arg(ctx, 9, &_x); _x; })
299
#define ___bpf_usdt_args11(x, args...) ___bpf_usdt_args10(args), ({ long _x; bpf_usdt_arg(ctx, 10, &_x); _x; })
300
#define ___bpf_usdt_args12(x, args...) ___bpf_usdt_args11(args), ({ long _x; bpf_usdt_arg(ctx, 11, &_x); _x; })
301
#define ___bpf_usdt_args(args...) ___bpf_apply(___bpf_usdt_args, ___bpf_narg(args))(args)
302
303
/*
304
* BPF_USDT serves the same purpose for USDT handlers as BPF_PROG for
305
* tp_btf/fentry/fexit BPF programs and BPF_KPROBE for kprobes.
306
* Original struct pt_regs * context is preserved as 'ctx' argument.
307
*/
308
#define BPF_USDT(name, args...) \
309
name(struct pt_regs *ctx); \
310
static __always_inline typeof(name(0)) \
311
____##name(struct pt_regs *ctx, ##args); \
312
typeof(name(0)) name(struct pt_regs *ctx) \
313
{ \
314
_Pragma("GCC diagnostic push") \
315
_Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
316
return ____##name(___bpf_usdt_args(args)); \
317
_Pragma("GCC diagnostic pop") \
318
} \
319
static __always_inline typeof(name(0)) \
320
____##name(struct pt_regs *ctx, ##args)
321
322
#endif /* __USDT_BPF_H__ */
323
324