Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/tools/tracing/latency/latency-collector.c
29271 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* Copyright (C) 2017, 2018, 2019, 2021 BMW Car IT GmbH
4
* Author: Viktor Rosendahl ([email protected])
5
*/
6
7
#define _GNU_SOURCE
8
#define _POSIX_C_SOURCE 200809L
9
10
#include <ctype.h>
11
#include <stdbool.h>
12
#include <stdio.h>
13
#include <stdlib.h>
14
#include <string.h>
15
16
#include <err.h>
17
#include <errno.h>
18
#include <fcntl.h>
19
#include <getopt.h>
20
#include <sched.h>
21
#include <linux/unistd.h>
22
#include <signal.h>
23
#include <sys/inotify.h>
24
#include <unistd.h>
25
#include <pthread.h>
26
#include <tracefs.h>
27
28
static const char *prg_name;
29
static const char *prg_unknown = "unknown program name";
30
31
static int fd_stdout;
32
33
static int sched_policy;
34
static bool sched_policy_set;
35
36
static int sched_pri;
37
static bool sched_pri_set;
38
39
static bool trace_enable = true;
40
static bool setup_ftrace = true;
41
static bool use_random_sleep;
42
43
#define TRACE_OPTS \
44
C(FUNC_TR, "function-trace"), \
45
C(DISP_GR, "display-graph"), \
46
C(NR, NULL)
47
48
#undef C
49
#define C(a, b) OPTIDX_##a
50
51
enum traceopt {
52
TRACE_OPTS
53
};
54
55
#undef C
56
#define C(a, b) b
57
58
static const char *const optstr[] = {
59
TRACE_OPTS
60
};
61
62
enum errhandling {
63
ERR_EXIT = 0,
64
ERR_WARN,
65
ERR_CLEANUP,
66
};
67
68
static bool use_options[OPTIDX_NR];
69
70
static char inotify_buffer[655360];
71
72
#define likely(x) __builtin_expect(!!(x), 1)
73
#define unlikely(x) __builtin_expect(!!(x), 0)
74
#define bool2str(x) (x ? "true":"false")
75
76
#define DEFAULT_NR_PRINTER_THREADS (3)
77
static unsigned int nr_threads = DEFAULT_NR_PRINTER_THREADS;
78
79
#define DEFAULT_TABLE_SIZE (2)
80
static unsigned int table_startsize = DEFAULT_TABLE_SIZE;
81
82
static int verbosity;
83
84
#define verbose_sizechange() (verbosity >= 1)
85
#define verbose_lostevent() (verbosity >= 2)
86
#define verbose_ftrace() (verbosity >= 1)
87
88
#define was_changed(ORIG, CUR) (strcmp(ORIG, CUR) != 0)
89
#define needs_change(CUR, WANTED) (strcmp(CUR, WANTED) != 0)
90
91
static const char *debug_tracefile;
92
static const char *debug_tracefile_dflt;
93
static const char *debug_maxlat;
94
static const char *debug_maxlat_dflt;
95
static const char * const DEBUG_NOFILE = "[file not found]";
96
97
static const char * const TR_MAXLAT = "tracing_max_latency";
98
static const char * const TR_THRESH = "tracing_thresh";
99
static const char * const TR_CURRENT = "current_tracer";
100
static const char * const TR_OPTIONS = "trace_options";
101
102
static const char * const NOP_TRACER = "nop";
103
104
static const char * const OPT_NO_PREFIX = "no";
105
106
#define DFLT_THRESHOLD_US "0"
107
static const char *threshold = DFLT_THRESHOLD_US;
108
109
#define DEV_URANDOM "/dev/urandom"
110
#define RT_DEFAULT_PRI (99)
111
#define DEFAULT_PRI (0)
112
113
#define USEC_PER_MSEC (1000L)
114
#define NSEC_PER_USEC (1000L)
115
#define NSEC_PER_MSEC (USEC_PER_MSEC * NSEC_PER_USEC)
116
117
#define MSEC_PER_SEC (1000L)
118
#define USEC_PER_SEC (USEC_PER_MSEC * MSEC_PER_SEC)
119
#define NSEC_PER_SEC (NSEC_PER_MSEC * MSEC_PER_SEC)
120
121
#define SLEEP_TIME_MS_DEFAULT (1000L)
122
#define TRY_PRINTMUTEX_MS (1000)
123
124
static long sleep_time = (USEC_PER_MSEC * SLEEP_TIME_MS_DEFAULT);
125
126
static const char * const queue_full_warning =
127
"Could not queue trace for printing. It is likely that events happen faster\n"
128
"than what they can be printed. Probably partly because of random sleeping\n";
129
130
static const char * const no_tracer_msg =
131
"Could not find any tracers! Running this program as root may help!\n";
132
133
static const char * const no_latency_tr_msg =
134
"No latency tracers are supported by your kernel!\n";
135
136
struct policy {
137
const char *name;
138
int policy;
139
int default_pri;
140
};
141
142
static const struct policy policies[] = {
143
{ "other", SCHED_OTHER, DEFAULT_PRI },
144
{ "batch", SCHED_BATCH, DEFAULT_PRI },
145
{ "idle", SCHED_IDLE, DEFAULT_PRI },
146
{ "rr", SCHED_RR, RT_DEFAULT_PRI },
147
{ "fifo", SCHED_FIFO, RT_DEFAULT_PRI },
148
{ NULL, 0, DEFAULT_PRI }
149
};
150
151
/*
152
* The default tracer will be the first on this list that is supported by the
153
* currently running Linux kernel.
154
*/
155
static const char * const relevant_tracers[] = {
156
"preemptirqsoff",
157
"preemptoff",
158
"irqsoff",
159
"wakeup",
160
"wakeup_rt",
161
"wakeup_dl",
162
NULL
163
};
164
165
/* This is the list of tracers for which random sleep makes sense */
166
static const char * const random_tracers[] = {
167
"preemptirqsoff",
168
"preemptoff",
169
"irqsoff",
170
NULL
171
};
172
173
static const char *current_tracer;
174
static bool force_tracer;
175
176
struct ftrace_state {
177
char *tracer;
178
char *thresh;
179
bool opt[OPTIDX_NR];
180
bool opt_valid[OPTIDX_NR];
181
pthread_mutex_t mutex;
182
};
183
184
struct entry {
185
int ticket;
186
int ticket_completed_ref;
187
};
188
189
struct print_state {
190
int ticket_counter;
191
int ticket_completed;
192
pthread_mutex_t mutex;
193
pthread_cond_t cond;
194
int cnt;
195
pthread_mutex_t cnt_mutex;
196
};
197
198
struct short_msg {
199
char buf[160];
200
int len;
201
};
202
203
static struct print_state printstate;
204
static struct ftrace_state save_state;
205
volatile sig_atomic_t signal_flag;
206
207
#define PROB_TABLE_MAX_SIZE (1000)
208
209
int probabilities[PROB_TABLE_MAX_SIZE];
210
211
struct sleep_table {
212
int *table;
213
int size;
214
pthread_mutex_t mutex;
215
};
216
217
static struct sleep_table sleeptable;
218
219
#define QUEUE_SIZE (10)
220
221
struct queue {
222
struct entry entries[QUEUE_SIZE];
223
int next_prod_idx;
224
int next_cons_idx;
225
pthread_mutex_t mutex;
226
pthread_cond_t cond;
227
};
228
229
#define MAX_THREADS (40)
230
231
struct queue printqueue;
232
pthread_t printthread[MAX_THREADS];
233
pthread_mutex_t print_mtx;
234
#define PRINT_BUFFER_SIZE (16 * 1024 * 1024)
235
236
static void cleanup_exit(int status);
237
static int set_trace_opt(const char *opt, bool value);
238
239
static __always_inline void *malloc_or_die(size_t size)
240
{
241
void *ptr = malloc(size);
242
243
if (unlikely(ptr == NULL)) {
244
warn("malloc() failed");
245
cleanup_exit(EXIT_FAILURE);
246
}
247
return ptr;
248
}
249
250
static __always_inline void *malloc_or_die_nocleanup(size_t size)
251
{
252
void *ptr = malloc(size);
253
254
if (unlikely(ptr == NULL))
255
err(0, "malloc() failed");
256
return ptr;
257
}
258
259
static __always_inline void write_or_die(int fd, const char *buf, size_t count)
260
{
261
ssize_t r;
262
263
do {
264
r = write(fd, buf, count);
265
if (unlikely(r < 0)) {
266
if (errno == EINTR)
267
continue;
268
warn("write() failed");
269
cleanup_exit(EXIT_FAILURE);
270
}
271
count -= r;
272
buf += r;
273
} while (count > 0);
274
}
275
276
static __always_inline void clock_gettime_or_die(clockid_t clk_id,
277
struct timespec *tp)
278
{
279
int r = clock_gettime(clk_id, tp);
280
281
if (unlikely(r != 0))
282
err(EXIT_FAILURE, "clock_gettime() failed");
283
}
284
285
static __always_inline void sigemptyset_or_die(sigset_t *s)
286
{
287
if (unlikely(sigemptyset(s) != 0)) {
288
warn("sigemptyset() failed");
289
cleanup_exit(EXIT_FAILURE);
290
}
291
}
292
293
static __always_inline void sigaddset_or_die(sigset_t *s, int signum)
294
{
295
if (unlikely(sigaddset(s, signum) != 0)) {
296
warn("sigemptyset() failed");
297
cleanup_exit(EXIT_FAILURE);
298
}
299
}
300
301
static __always_inline void sigaction_or_die(int signum,
302
const struct sigaction *act,
303
struct sigaction *oldact)
304
{
305
if (unlikely(sigaction(signum, act, oldact) != 0)) {
306
warn("sigaction() failed");
307
cleanup_exit(EXIT_FAILURE);
308
}
309
}
310
311
static void open_stdout(void)
312
{
313
if (setvbuf(stdout, NULL, _IONBF, 0) != 0)
314
err(EXIT_FAILURE, "setvbuf() failed");
315
fd_stdout = fileno(stdout);
316
if (fd_stdout < 0)
317
err(EXIT_FAILURE, "fileno() failed");
318
}
319
320
/*
321
* It's not worth it to call cleanup_exit() from mutex functions because
322
* cleanup_exit() uses mutexes.
323
*/
324
static __always_inline void mutex_lock(pthread_mutex_t *mtx)
325
{
326
errno = pthread_mutex_lock(mtx);
327
if (unlikely(errno))
328
err(EXIT_FAILURE, "pthread_mutex_lock() failed");
329
}
330
331
332
static __always_inline void mutex_unlock(pthread_mutex_t *mtx)
333
{
334
errno = pthread_mutex_unlock(mtx);
335
if (unlikely(errno))
336
err(EXIT_FAILURE, "pthread_mutex_unlock() failed");
337
}
338
339
static __always_inline void cond_signal(pthread_cond_t *cond)
340
{
341
errno = pthread_cond_signal(cond);
342
if (unlikely(errno))
343
err(EXIT_FAILURE, "pthread_cond_signal() failed");
344
}
345
346
static __always_inline void cond_wait(pthread_cond_t *restrict cond,
347
pthread_mutex_t *restrict mutex)
348
{
349
errno = pthread_cond_wait(cond, mutex);
350
if (unlikely(errno))
351
err(EXIT_FAILURE, "pthread_cond_wait() failed");
352
}
353
354
static __always_inline void cond_broadcast(pthread_cond_t *cond)
355
{
356
errno = pthread_cond_broadcast(cond);
357
if (unlikely(errno))
358
err(EXIT_FAILURE, "pthread_cond_broadcast() failed");
359
}
360
361
static __always_inline void
362
mutex_init(pthread_mutex_t *mutex,
363
const pthread_mutexattr_t *attr)
364
{
365
errno = pthread_mutex_init(mutex, attr);
366
if (errno)
367
err(EXIT_FAILURE, "pthread_mutex_init() failed");
368
}
369
370
static __always_inline void mutexattr_init(pthread_mutexattr_t *attr)
371
{
372
errno = pthread_mutexattr_init(attr);
373
if (errno)
374
err(EXIT_FAILURE, "pthread_mutexattr_init() failed");
375
}
376
377
static __always_inline void mutexattr_destroy(pthread_mutexattr_t *attr)
378
{
379
errno = pthread_mutexattr_destroy(attr);
380
if (errno)
381
err(EXIT_FAILURE, "pthread_mutexattr_destroy() failed");
382
}
383
384
static __always_inline void mutexattr_settype(pthread_mutexattr_t *attr,
385
int type)
386
{
387
errno = pthread_mutexattr_settype(attr, type);
388
if (errno)
389
err(EXIT_FAILURE, "pthread_mutexattr_settype() failed");
390
}
391
392
static __always_inline void condattr_init(pthread_condattr_t *attr)
393
{
394
errno = pthread_condattr_init(attr);
395
if (errno)
396
err(EXIT_FAILURE, "pthread_condattr_init() failed");
397
}
398
399
static __always_inline void condattr_destroy(pthread_condattr_t *attr)
400
{
401
errno = pthread_condattr_destroy(attr);
402
if (errno)
403
err(EXIT_FAILURE, "pthread_condattr_destroy() failed");
404
}
405
406
static __always_inline void condattr_setclock(pthread_condattr_t *attr,
407
clockid_t clock_id)
408
{
409
errno = pthread_condattr_setclock(attr, clock_id);
410
if (unlikely(errno))
411
err(EXIT_FAILURE, "pthread_condattr_setclock() failed");
412
}
413
414
static __always_inline void cond_init(pthread_cond_t *cond,
415
const pthread_condattr_t *attr)
416
{
417
errno = pthread_cond_init(cond, attr);
418
if (errno)
419
err(EXIT_FAILURE, "pthread_cond_init() failed");
420
}
421
422
static __always_inline int
423
cond_timedwait(pthread_cond_t *restrict cond,
424
pthread_mutex_t *restrict mutex,
425
const struct timespec *restrict abstime)
426
{
427
errno = pthread_cond_timedwait(cond, mutex, abstime);
428
if (errno && errno != ETIMEDOUT)
429
err(EXIT_FAILURE, "pthread_cond_timedwait() failed");
430
return errno;
431
}
432
433
static void init_printstate(void)
434
{
435
pthread_condattr_t cattr;
436
437
printstate.ticket_counter = 0;
438
printstate.ticket_completed = 0;
439
printstate.cnt = 0;
440
441
mutex_init(&printstate.mutex, NULL);
442
443
condattr_init(&cattr);
444
condattr_setclock(&cattr, CLOCK_MONOTONIC);
445
cond_init(&printstate.cond, &cattr);
446
condattr_destroy(&cattr);
447
}
448
449
static void init_print_mtx(void)
450
{
451
pthread_mutexattr_t mattr;
452
453
mutexattr_init(&mattr);
454
mutexattr_settype(&mattr, PTHREAD_MUTEX_RECURSIVE);
455
mutex_init(&print_mtx, &mattr);
456
mutexattr_destroy(&mattr);
457
458
}
459
460
static void signal_blocking(int how)
461
{
462
sigset_t s;
463
464
sigemptyset_or_die(&s);
465
sigaddset_or_die(&s, SIGHUP);
466
sigaddset_or_die(&s, SIGTERM);
467
sigaddset_or_die(&s, SIGINT);
468
469
errno = pthread_sigmask(how, &s, NULL);
470
if (unlikely(errno)) {
471
warn("pthread_sigmask() failed");
472
cleanup_exit(EXIT_FAILURE);
473
}
474
}
475
476
static void signal_handler(int num)
477
{
478
signal_flag = num;
479
}
480
481
static void setup_sig_handler(void)
482
{
483
struct sigaction sa;
484
485
memset(&sa, 0, sizeof(sa));
486
sa.sa_handler = signal_handler;
487
488
sigaction_or_die(SIGHUP, &sa, NULL);
489
sigaction_or_die(SIGTERM, &sa, NULL);
490
sigaction_or_die(SIGINT, &sa, NULL);
491
}
492
493
static void process_signal(int signal)
494
{
495
char *name;
496
497
name = strsignal(signal);
498
if (name == NULL)
499
printf("Received signal %d\n", signal);
500
else
501
printf("Received signal %d (%s)\n", signal, name);
502
cleanup_exit(EXIT_SUCCESS);
503
}
504
505
static __always_inline void check_signals(void)
506
{
507
int signal = signal_flag;
508
509
if (unlikely(signal))
510
process_signal(signal);
511
}
512
513
static __always_inline void get_time_in_future(struct timespec *future,
514
long time_us)
515
{
516
long nsec;
517
518
clock_gettime_or_die(CLOCK_MONOTONIC, future);
519
future->tv_sec += time_us / USEC_PER_SEC;
520
nsec = future->tv_nsec + (time_us * NSEC_PER_USEC) % NSEC_PER_SEC;
521
if (nsec >= NSEC_PER_SEC) {
522
future->tv_nsec = nsec % NSEC_PER_SEC;
523
future->tv_sec += 1;
524
}
525
}
526
527
static __always_inline bool time_has_passed(const struct timespec *time)
528
{
529
struct timespec now;
530
531
clock_gettime_or_die(CLOCK_MONOTONIC, &now);
532
if (now.tv_sec > time->tv_sec)
533
return true;
534
if (now.tv_sec < time->tv_sec)
535
return false;
536
return (now.tv_nsec >= time->tv_nsec);
537
}
538
539
static bool mutex_trylock_limit(pthread_mutex_t *mutex, int time_ms)
540
{
541
long time_us = time_ms * USEC_PER_MSEC;
542
struct timespec limit;
543
544
get_time_in_future(&limit, time_us);
545
do {
546
errno = pthread_mutex_trylock(mutex);
547
if (errno && errno != EBUSY)
548
err(EXIT_FAILURE, "pthread_mutex_trylock() failed");
549
} while (errno && !time_has_passed(&limit));
550
return errno == 0;
551
}
552
553
static void restore_trace_opts(const struct ftrace_state *state,
554
const bool *cur)
555
{
556
int i;
557
int r;
558
559
for (i = 0; i < OPTIDX_NR; i++)
560
if (state->opt_valid[i] && state->opt[i] != cur[i]) {
561
r = set_trace_opt(optstr[i], state->opt[i]);
562
if (r < 0)
563
warnx("Failed to restore the %s option to %s",
564
optstr[i], bool2str(state->opt[i]));
565
else if (verbose_ftrace())
566
printf("Restored the %s option in %s to %s\n",
567
optstr[i], TR_OPTIONS,
568
bool2str(state->opt[i]));
569
}
570
}
571
572
static char *read_file(const char *file, enum errhandling h)
573
{
574
int psize;
575
char *r;
576
static const char *emsg = "Failed to read the %s file";
577
578
r = tracefs_instance_file_read(NULL, file, &psize);
579
if (!r) {
580
if (h) {
581
warn(emsg, file);
582
if (h == ERR_CLEANUP)
583
cleanup_exit(EXIT_FAILURE);
584
} else
585
errx(EXIT_FAILURE, emsg, file);
586
}
587
588
if (r && r[psize - 1] == '\n')
589
r[psize - 1] = '\0';
590
return r;
591
}
592
593
static void restore_file(const char *file, char **saved, const char *cur)
594
{
595
if (*saved && was_changed(*saved, cur)) {
596
if (tracefs_instance_file_write(NULL, file, *saved) < 0)
597
warnx("Failed to restore %s to %s!", file, *saved);
598
else if (verbose_ftrace())
599
printf("Restored %s to %s\n", file, *saved);
600
free(*saved);
601
*saved = NULL;
602
}
603
}
604
605
static void restore_ftrace(void)
606
{
607
mutex_lock(&save_state.mutex);
608
609
restore_file(TR_CURRENT, &save_state.tracer, current_tracer);
610
restore_file(TR_THRESH, &save_state.thresh, threshold);
611
restore_trace_opts(&save_state, use_options);
612
613
mutex_unlock(&save_state.mutex);
614
}
615
616
static void cleanup_exit(int status)
617
{
618
char *maxlat;
619
620
if (!setup_ftrace)
621
exit(status);
622
623
/*
624
* We try the print_mtx for 1 sec in order to avoid garbled
625
* output if possible, but if it cannot be obtained we proceed anyway.
626
*/
627
mutex_trylock_limit(&print_mtx, TRY_PRINTMUTEX_MS);
628
629
maxlat = read_file(TR_MAXLAT, ERR_WARN);
630
if (maxlat) {
631
printf("The maximum detected latency was: %sus\n", maxlat);
632
free(maxlat);
633
}
634
635
restore_ftrace();
636
/*
637
* We do not need to unlock the print_mtx here because we will exit at
638
* the end of this function. Unlocking print_mtx causes problems if a
639
* print thread happens to be waiting for the mutex because we have
640
* just changed the ftrace settings to the original and thus the
641
* print thread would output incorrect data from ftrace.
642
*/
643
exit(status);
644
}
645
646
static void init_save_state(void)
647
{
648
pthread_mutexattr_t mattr;
649
650
mutexattr_init(&mattr);
651
mutexattr_settype(&mattr, PTHREAD_MUTEX_RECURSIVE);
652
mutex_init(&save_state.mutex, &mattr);
653
mutexattr_destroy(&mattr);
654
655
save_state.tracer = NULL;
656
save_state.thresh = NULL;
657
save_state.opt_valid[OPTIDX_FUNC_TR] = false;
658
save_state.opt_valid[OPTIDX_DISP_GR] = false;
659
}
660
661
static int printstate_next_ticket(struct entry *req)
662
{
663
int r;
664
665
r = ++(printstate.ticket_counter);
666
req->ticket = r;
667
req->ticket_completed_ref = printstate.ticket_completed;
668
cond_broadcast(&printstate.cond);
669
return r;
670
}
671
672
static __always_inline
673
void printstate_mark_req_completed(const struct entry *req)
674
{
675
if (req->ticket > printstate.ticket_completed)
676
printstate.ticket_completed = req->ticket;
677
}
678
679
static __always_inline
680
bool printstate_has_new_req_arrived(const struct entry *req)
681
{
682
return (printstate.ticket_counter != req->ticket);
683
}
684
685
static __always_inline int printstate_cnt_inc(void)
686
{
687
int value;
688
689
mutex_lock(&printstate.cnt_mutex);
690
value = ++printstate.cnt;
691
mutex_unlock(&printstate.cnt_mutex);
692
return value;
693
}
694
695
static __always_inline int printstate_cnt_dec(void)
696
{
697
int value;
698
699
mutex_lock(&printstate.cnt_mutex);
700
value = --printstate.cnt;
701
mutex_unlock(&printstate.cnt_mutex);
702
return value;
703
}
704
705
static __always_inline int printstate_cnt_read(void)
706
{
707
int value;
708
709
mutex_lock(&printstate.cnt_mutex);
710
value = printstate.cnt;
711
mutex_unlock(&printstate.cnt_mutex);
712
return value;
713
}
714
715
static __always_inline
716
bool prev_req_won_race(const struct entry *req)
717
{
718
return (printstate.ticket_completed != req->ticket_completed_ref);
719
}
720
721
static void sleeptable_resize(int size, bool printout, struct short_msg *msg)
722
{
723
int bytes;
724
725
if (printout) {
726
msg->len = 0;
727
if (unlikely(size > PROB_TABLE_MAX_SIZE))
728
bytes = snprintf(msg->buf, sizeof(msg->buf),
729
"Cannot increase probability table to %d (maximum size reached)\n", size);
730
else
731
bytes = snprintf(msg->buf, sizeof(msg->buf),
732
"Increasing probability table to %d\n", size);
733
if (bytes < 0)
734
warn("snprintf() failed");
735
else
736
msg->len = bytes;
737
}
738
739
if (unlikely(size < 0)) {
740
/* Should never happen */
741
warnx("Bad program state at %s:%d", __FILE__, __LINE__);
742
cleanup_exit(EXIT_FAILURE);
743
return;
744
}
745
sleeptable.size = size;
746
sleeptable.table = &probabilities[PROB_TABLE_MAX_SIZE - size];
747
}
748
749
static void init_probabilities(void)
750
{
751
int i;
752
int j = 1000;
753
754
for (i = 0; i < PROB_TABLE_MAX_SIZE; i++) {
755
probabilities[i] = 1000 / j;
756
j--;
757
}
758
mutex_init(&sleeptable.mutex, NULL);
759
}
760
761
static int table_get_probability(const struct entry *req,
762
struct short_msg *msg)
763
{
764
int diff = req->ticket - req->ticket_completed_ref;
765
int rval = 0;
766
767
msg->len = 0;
768
diff--;
769
/* Should never happen...*/
770
if (unlikely(diff < 0)) {
771
warnx("Programmer assumption error at %s:%d\n", __FILE__,
772
__LINE__);
773
cleanup_exit(EXIT_FAILURE);
774
}
775
mutex_lock(&sleeptable.mutex);
776
if (diff >= (sleeptable.size - 1)) {
777
rval = sleeptable.table[sleeptable.size - 1];
778
sleeptable_resize(sleeptable.size + 1, verbose_sizechange(),
779
msg);
780
} else {
781
rval = sleeptable.table[diff];
782
}
783
mutex_unlock(&sleeptable.mutex);
784
return rval;
785
}
786
787
static void init_queue(struct queue *q)
788
{
789
q->next_prod_idx = 0;
790
q->next_cons_idx = 0;
791
mutex_init(&q->mutex, NULL);
792
errno = pthread_cond_init(&q->cond, NULL);
793
if (errno)
794
err(EXIT_FAILURE, "pthread_cond_init() failed");
795
}
796
797
static __always_inline int queue_len(const struct queue *q)
798
{
799
if (q->next_prod_idx >= q->next_cons_idx)
800
return q->next_prod_idx - q->next_cons_idx;
801
else
802
return QUEUE_SIZE - q->next_cons_idx + q->next_prod_idx;
803
}
804
805
static __always_inline int queue_nr_free(const struct queue *q)
806
{
807
int nr_free = QUEUE_SIZE - queue_len(q);
808
809
/*
810
* If there is only one slot left we will anyway lie and claim that the
811
* queue is full because adding an element will make it appear empty
812
*/
813
if (nr_free == 1)
814
nr_free = 0;
815
return nr_free;
816
}
817
818
static __always_inline void queue_idx_inc(int *idx)
819
{
820
*idx = (*idx + 1) % QUEUE_SIZE;
821
}
822
823
static __always_inline void queue_push_to_back(struct queue *q,
824
const struct entry *e)
825
{
826
q->entries[q->next_prod_idx] = *e;
827
queue_idx_inc(&q->next_prod_idx);
828
}
829
830
static __always_inline struct entry queue_pop_from_front(struct queue *q)
831
{
832
struct entry e = q->entries[q->next_cons_idx];
833
834
queue_idx_inc(&q->next_cons_idx);
835
return e;
836
}
837
838
static __always_inline void queue_cond_signal(struct queue *q)
839
{
840
cond_signal(&q->cond);
841
}
842
843
static __always_inline void queue_cond_wait(struct queue *q)
844
{
845
cond_wait(&q->cond, &q->mutex);
846
}
847
848
static __always_inline int queue_try_to_add_entry(struct queue *q,
849
const struct entry *e)
850
{
851
int r = 0;
852
853
mutex_lock(&q->mutex);
854
if (queue_nr_free(q) > 0) {
855
queue_push_to_back(q, e);
856
cond_signal(&q->cond);
857
} else
858
r = -1;
859
mutex_unlock(&q->mutex);
860
return r;
861
}
862
863
static struct entry queue_wait_for_entry(struct queue *q)
864
{
865
struct entry e;
866
867
mutex_lock(&q->mutex);
868
while (true) {
869
if (queue_len(&printqueue) > 0) {
870
e = queue_pop_from_front(q);
871
break;
872
}
873
queue_cond_wait(q);
874
}
875
mutex_unlock(&q->mutex);
876
877
return e;
878
}
879
880
static const struct policy *policy_from_name(const char *name)
881
{
882
const struct policy *p = &policies[0];
883
884
while (p->name != NULL) {
885
if (!strcmp(name, p->name))
886
return p;
887
p++;
888
}
889
return NULL;
890
}
891
892
static const char *policy_name(int policy)
893
{
894
const struct policy *p = &policies[0];
895
static const char *rval = "unknown";
896
897
while (p->name != NULL) {
898
if (p->policy == policy)
899
return p->name;
900
p++;
901
}
902
return rval;
903
}
904
905
static bool is_relevant_tracer(const char *name)
906
{
907
unsigned int i;
908
909
for (i = 0; relevant_tracers[i]; i++)
910
if (!strcmp(name, relevant_tracers[i]))
911
return true;
912
return false;
913
}
914
915
static bool random_makes_sense(const char *name)
916
{
917
unsigned int i;
918
919
for (i = 0; random_tracers[i]; i++)
920
if (!strcmp(name, random_tracers[i]))
921
return true;
922
return false;
923
}
924
925
static void show_available(void)
926
{
927
char **tracers;
928
int found = 0;
929
int i;
930
931
tracers = tracefs_tracers(NULL);
932
for (i = 0; tracers && tracers[i]; i++) {
933
if (is_relevant_tracer(tracers[i]))
934
found++;
935
}
936
937
if (!tracers) {
938
warnx("%s", no_tracer_msg);
939
return;
940
}
941
942
if (!found) {
943
warnx("%s", no_latency_tr_msg);
944
tracefs_list_free(tracers);
945
return;
946
}
947
948
printf("The following latency tracers are available on your system:\n");
949
for (i = 0; tracers[i]; i++) {
950
if (is_relevant_tracer(tracers[i]))
951
printf("%s\n", tracers[i]);
952
}
953
tracefs_list_free(tracers);
954
}
955
956
static bool tracer_valid(const char *name, bool *notracer)
957
{
958
char **tracers;
959
int i;
960
bool rval = false;
961
962
*notracer = false;
963
tracers = tracefs_tracers(NULL);
964
if (!tracers) {
965
*notracer = true;
966
return false;
967
}
968
for (i = 0; tracers[i]; i++)
969
if (!strcmp(tracers[i], name)) {
970
rval = true;
971
break;
972
}
973
tracefs_list_free(tracers);
974
return rval;
975
}
976
977
static const char *find_default_tracer(void)
978
{
979
int i;
980
bool notracer;
981
bool valid;
982
983
for (i = 0; relevant_tracers[i]; i++) {
984
valid = tracer_valid(relevant_tracers[i], &notracer);
985
if (notracer)
986
errx(EXIT_FAILURE, "%s", no_tracer_msg);
987
if (valid)
988
return relevant_tracers[i];
989
}
990
return NULL;
991
}
992
993
static bool toss_coin(struct drand48_data *buffer, unsigned int prob)
994
{
995
long r;
996
997
if (unlikely(lrand48_r(buffer, &r))) {
998
warnx("lrand48_r() failed");
999
cleanup_exit(EXIT_FAILURE);
1000
}
1001
r = r % 1000L;
1002
if (r < prob)
1003
return true;
1004
else
1005
return false;
1006
}
1007
1008
1009
static long go_to_sleep(const struct entry *req)
1010
{
1011
struct timespec future;
1012
long delay = sleep_time;
1013
1014
get_time_in_future(&future, delay);
1015
1016
mutex_lock(&printstate.mutex);
1017
while (!printstate_has_new_req_arrived(req)) {
1018
cond_timedwait(&printstate.cond, &printstate.mutex, &future);
1019
if (time_has_passed(&future))
1020
break;
1021
}
1022
1023
if (printstate_has_new_req_arrived(req))
1024
delay = -1;
1025
mutex_unlock(&printstate.mutex);
1026
1027
return delay;
1028
}
1029
1030
1031
static void set_priority(void)
1032
{
1033
int r;
1034
pid_t pid;
1035
struct sched_param param;
1036
1037
memset(&param, 0, sizeof(param));
1038
param.sched_priority = sched_pri;
1039
1040
pid = getpid();
1041
r = sched_setscheduler(pid, sched_policy, &param);
1042
1043
if (r != 0)
1044
err(EXIT_FAILURE, "sched_setscheduler() failed");
1045
}
1046
1047
pid_t latency_collector_gettid(void)
1048
{
1049
return (pid_t) syscall(__NR_gettid);
1050
}
1051
1052
static void print_priority(void)
1053
{
1054
pid_t tid;
1055
int policy;
1056
int r;
1057
struct sched_param param;
1058
1059
tid = latency_collector_gettid();
1060
r = pthread_getschedparam(pthread_self(), &policy, &param);
1061
if (r != 0) {
1062
warn("pthread_getschedparam() failed");
1063
cleanup_exit(EXIT_FAILURE);
1064
}
1065
mutex_lock(&print_mtx);
1066
printf("Thread %d runs with scheduling policy %s and priority %d\n",
1067
tid, policy_name(policy), param.sched_priority);
1068
mutex_unlock(&print_mtx);
1069
}
1070
1071
static __always_inline
1072
void __print_skipmessage(const struct short_msg *resize_msg,
1073
const struct timespec *timestamp, char *buffer,
1074
size_t bufspace, const struct entry *req, bool excuse,
1075
const char *str)
1076
{
1077
ssize_t bytes = 0;
1078
char *p = &buffer[0];
1079
long us, sec;
1080
int r;
1081
1082
sec = timestamp->tv_sec;
1083
us = timestamp->tv_nsec / 1000;
1084
1085
if (resize_msg != NULL && resize_msg->len > 0) {
1086
strncpy(p, resize_msg->buf, resize_msg->len);
1087
bytes += resize_msg->len;
1088
p += resize_msg->len;
1089
bufspace -= resize_msg->len;
1090
}
1091
1092
if (excuse)
1093
r = snprintf(p, bufspace,
1094
"%ld.%06ld Latency %d printout skipped due to %s\n",
1095
sec, us, req->ticket, str);
1096
else
1097
r = snprintf(p, bufspace, "%ld.%06ld Latency %d detected\n",
1098
sec, us, req->ticket);
1099
1100
if (r < 0)
1101
warn("snprintf() failed");
1102
else
1103
bytes += r;
1104
1105
/* These prints could happen concurrently */
1106
mutex_lock(&print_mtx);
1107
write_or_die(fd_stdout, buffer, bytes);
1108
mutex_unlock(&print_mtx);
1109
}
1110
1111
static void print_skipmessage(const struct short_msg *resize_msg,
1112
const struct timespec *timestamp, char *buffer,
1113
size_t bufspace, const struct entry *req,
1114
bool excuse)
1115
{
1116
__print_skipmessage(resize_msg, timestamp, buffer, bufspace, req,
1117
excuse, "random delay");
1118
}
1119
1120
static void print_lostmessage(const struct timespec *timestamp, char *buffer,
1121
size_t bufspace, const struct entry *req,
1122
const char *reason)
1123
{
1124
__print_skipmessage(NULL, timestamp, buffer, bufspace, req, true,
1125
reason);
1126
}
1127
1128
static void print_tracefile(const struct short_msg *resize_msg,
1129
const struct timespec *timestamp, char *buffer,
1130
size_t bufspace, long slept,
1131
const struct entry *req)
1132
{
1133
static const int reserve = 256;
1134
char *p = &buffer[0];
1135
ssize_t bytes = 0;
1136
ssize_t bytes_tot = 0;
1137
long us, sec;
1138
long slept_ms;
1139
int trace_fd;
1140
1141
/* Save some space for the final string and final null char */
1142
bufspace = bufspace - reserve - 1;
1143
1144
if (resize_msg != NULL && resize_msg->len > 0) {
1145
bytes = resize_msg->len;
1146
strncpy(p, resize_msg->buf, bytes);
1147
bytes_tot += bytes;
1148
p += bytes;
1149
bufspace -= bytes;
1150
}
1151
1152
trace_fd = open(debug_tracefile, O_RDONLY);
1153
1154
if (trace_fd < 0) {
1155
warn("open() failed on %s", debug_tracefile);
1156
return;
1157
}
1158
1159
sec = timestamp->tv_sec;
1160
us = timestamp->tv_nsec / 1000;
1161
1162
if (slept != 0) {
1163
slept_ms = slept / 1000;
1164
bytes = snprintf(p, bufspace,
1165
"%ld.%06ld Latency %d randomly sleep for %ld ms before print\n",
1166
sec, us, req->ticket, slept_ms);
1167
} else {
1168
bytes = snprintf(p, bufspace,
1169
"%ld.%06ld Latency %d immediate print\n", sec,
1170
us, req->ticket);
1171
}
1172
1173
if (bytes < 0) {
1174
warn("snprintf() failed");
1175
return;
1176
}
1177
p += bytes;
1178
bufspace -= bytes;
1179
bytes_tot += bytes;
1180
1181
bytes = snprintf(p, bufspace,
1182
">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> BEGIN <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n"
1183
);
1184
1185
if (bytes < 0) {
1186
warn("snprintf() failed");
1187
return;
1188
}
1189
1190
p += bytes;
1191
bufspace -= bytes;
1192
bytes_tot += bytes;
1193
1194
do {
1195
bytes = read(trace_fd, p, bufspace);
1196
if (bytes < 0) {
1197
if (errno == EINTR)
1198
continue;
1199
warn("read() failed on %s", debug_tracefile);
1200
if (unlikely(close(trace_fd) != 0))
1201
warn("close() failed on %s", debug_tracefile);
1202
return;
1203
}
1204
if (bytes == 0)
1205
break;
1206
p += bytes;
1207
bufspace -= bytes;
1208
bytes_tot += bytes;
1209
} while (true);
1210
1211
if (unlikely(close(trace_fd) != 0))
1212
warn("close() failed on %s", debug_tracefile);
1213
1214
printstate_cnt_dec();
1215
/* Add the reserve space back to the budget for the final string */
1216
bufspace += reserve;
1217
1218
bytes = snprintf(p, bufspace,
1219
">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> END <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n\n");
1220
1221
if (bytes < 0) {
1222
warn("snprintf() failed");
1223
return;
1224
}
1225
1226
bytes_tot += bytes;
1227
1228
/* These prints could happen concurrently */
1229
mutex_lock(&print_mtx);
1230
write_or_die(fd_stdout, buffer, bytes_tot);
1231
mutex_unlock(&print_mtx);
1232
}
1233
1234
static char *get_no_opt(const char *opt)
1235
{
1236
char *no_opt;
1237
int s;
1238
1239
s = strlen(opt) + strlen(OPT_NO_PREFIX) + 1;
1240
/* We may be called from cleanup_exit() via set_trace_opt() */
1241
no_opt = malloc_or_die_nocleanup(s);
1242
strcpy(no_opt, OPT_NO_PREFIX);
1243
strcat(no_opt, opt);
1244
return no_opt;
1245
}
1246
1247
static char *find_next_optstr(const char *allopt, const char **next)
1248
{
1249
const char *begin;
1250
const char *end;
1251
char *r;
1252
int s = 0;
1253
1254
if (allopt == NULL)
1255
return NULL;
1256
1257
for (begin = allopt; *begin != '\0'; begin++) {
1258
if (isgraph(*begin))
1259
break;
1260
}
1261
1262
if (*begin == '\0')
1263
return NULL;
1264
1265
for (end = begin; *end != '\0' && isgraph(*end); end++)
1266
s++;
1267
1268
r = malloc_or_die_nocleanup(s + 1);
1269
strncpy(r, begin, s);
1270
r[s] = '\0';
1271
*next = begin + s;
1272
return r;
1273
}
1274
1275
static bool get_trace_opt(const char *allopt, const char *opt, bool *found)
1276
{
1277
*found = false;
1278
char *no_opt;
1279
char *str;
1280
const char *next = allopt;
1281
bool rval = false;
1282
1283
no_opt = get_no_opt(opt);
1284
1285
do {
1286
str = find_next_optstr(next, &next);
1287
if (str == NULL)
1288
break;
1289
if (!strcmp(str, opt)) {
1290
*found = true;
1291
rval = true;
1292
free(str);
1293
break;
1294
}
1295
if (!strcmp(str, no_opt)) {
1296
*found = true;
1297
rval = false;
1298
free(str);
1299
break;
1300
}
1301
free(str);
1302
} while (true);
1303
free(no_opt);
1304
1305
return rval;
1306
}
1307
1308
static int set_trace_opt(const char *opt, bool value)
1309
{
1310
char *str;
1311
int r;
1312
1313
if (value)
1314
str = strdup(opt);
1315
else
1316
str = get_no_opt(opt);
1317
1318
r = tracefs_instance_file_write(NULL, TR_OPTIONS, str);
1319
free(str);
1320
return r;
1321
}
1322
1323
void save_trace_opts(struct ftrace_state *state)
1324
{
1325
char *allopt;
1326
int psize;
1327
int i;
1328
1329
allopt = tracefs_instance_file_read(NULL, TR_OPTIONS, &psize);
1330
if (!allopt)
1331
errx(EXIT_FAILURE, "Failed to read the %s file\n", TR_OPTIONS);
1332
1333
for (i = 0; i < OPTIDX_NR; i++)
1334
state->opt[i] = get_trace_opt(allopt, optstr[i],
1335
&state->opt_valid[i]);
1336
1337
free(allopt);
1338
}
1339
1340
static void write_file(const char *file, const char *cur, const char *new,
1341
enum errhandling h)
1342
{
1343
int r;
1344
static const char *emsg = "Failed to write to the %s file!";
1345
1346
/* Do nothing if we now that the current and new value are equal */
1347
if (cur && !needs_change(cur, new))
1348
return;
1349
1350
r = tracefs_instance_file_write(NULL, file, new);
1351
if (r < 0) {
1352
if (h) {
1353
warnx(emsg, file);
1354
if (h == ERR_CLEANUP)
1355
cleanup_exit(EXIT_FAILURE);
1356
} else
1357
errx(EXIT_FAILURE, emsg, file);
1358
}
1359
if (verbose_ftrace()) {
1360
mutex_lock(&print_mtx);
1361
printf("%s was set to %s\n", file, new);
1362
mutex_unlock(&print_mtx);
1363
}
1364
}
1365
1366
static void reset_max_latency(void)
1367
{
1368
write_file(TR_MAXLAT, NULL, "0", ERR_CLEANUP);
1369
}
1370
1371
static void save_and_disable_tracer(void)
1372
{
1373
char *orig_th;
1374
char *tracer;
1375
bool need_nop = false;
1376
1377
mutex_lock(&save_state.mutex);
1378
1379
save_trace_opts(&save_state);
1380
tracer = read_file(TR_CURRENT, ERR_EXIT);
1381
orig_th = read_file(TR_THRESH, ERR_EXIT);
1382
1383
if (needs_change(tracer, NOP_TRACER)) {
1384
mutex_lock(&print_mtx);
1385
if (force_tracer) {
1386
printf(
1387
"The %s tracer is already in use but proceeding anyway!\n",
1388
tracer);
1389
} else {
1390
printf(
1391
"The %s tracer is already in use, cowardly bailing out!\n"
1392
"This could indicate that another program or instance is tracing.\n"
1393
"Use the -F [--force] option to disregard the current tracer.\n", tracer);
1394
exit(0);
1395
}
1396
mutex_unlock(&print_mtx);
1397
need_nop = true;
1398
}
1399
1400
save_state.tracer = tracer;
1401
save_state.thresh = orig_th;
1402
1403
if (need_nop)
1404
write_file(TR_CURRENT, NULL, NOP_TRACER, ERR_EXIT);
1405
1406
mutex_unlock(&save_state.mutex);
1407
}
1408
1409
void set_trace_opts(struct ftrace_state *state, bool *new)
1410
{
1411
int i;
1412
int r;
1413
1414
/*
1415
* We only set options if we earlier detected that the option exists in
1416
* the trace_options file and that the wanted setting is different from
1417
* the one we saw in save_and_disable_tracer()
1418
*/
1419
for (i = 0; i < OPTIDX_NR; i++)
1420
if (state->opt_valid[i] &&
1421
state->opt[i] != new[i]) {
1422
r = set_trace_opt(optstr[i], new[i]);
1423
if (r < 0) {
1424
warnx("Failed to set the %s option to %s",
1425
optstr[i], bool2str(new[i]));
1426
cleanup_exit(EXIT_FAILURE);
1427
}
1428
if (verbose_ftrace()) {
1429
mutex_lock(&print_mtx);
1430
printf("%s in %s was set to %s\n", optstr[i],
1431
TR_OPTIONS, bool2str(new[i]));
1432
mutex_unlock(&print_mtx);
1433
}
1434
}
1435
}
1436
1437
static void enable_tracer(void)
1438
{
1439
mutex_lock(&save_state.mutex);
1440
set_trace_opts(&save_state, use_options);
1441
1442
write_file(TR_THRESH, save_state.thresh, threshold, ERR_CLEANUP);
1443
write_file(TR_CURRENT, NOP_TRACER, current_tracer, ERR_CLEANUP);
1444
1445
mutex_unlock(&save_state.mutex);
1446
}
1447
1448
static void tracing_loop(void)
1449
{
1450
int ifd = inotify_init();
1451
int wd;
1452
const ssize_t bufsize = sizeof(inotify_buffer);
1453
const ssize_t istructsize = sizeof(struct inotify_event);
1454
char *buf = &inotify_buffer[0];
1455
ssize_t nr_read;
1456
char *p;
1457
int modified;
1458
struct inotify_event *event;
1459
struct entry req;
1460
char *buffer;
1461
const size_t bufspace = PRINT_BUFFER_SIZE;
1462
struct timespec timestamp;
1463
1464
print_priority();
1465
1466
buffer = malloc_or_die(bufspace);
1467
1468
if (ifd < 0)
1469
err(EXIT_FAILURE, "inotify_init() failed!");
1470
1471
1472
if (setup_ftrace) {
1473
/*
1474
* We must disable the tracer before resetting the max_latency
1475
*/
1476
save_and_disable_tracer();
1477
/*
1478
* We must reset the max_latency before the inotify_add_watch()
1479
* call.
1480
*/
1481
reset_max_latency();
1482
}
1483
1484
wd = inotify_add_watch(ifd, debug_maxlat, IN_MODIFY);
1485
if (wd < 0)
1486
err(EXIT_FAILURE, "inotify_add_watch() failed!");
1487
1488
if (setup_ftrace)
1489
enable_tracer();
1490
1491
signal_blocking(SIG_UNBLOCK);
1492
1493
while (true) {
1494
modified = 0;
1495
check_signals();
1496
nr_read = read(ifd, buf, bufsize);
1497
check_signals();
1498
if (nr_read < 0) {
1499
if (errno == EINTR)
1500
continue;
1501
warn("read() failed on inotify fd!");
1502
cleanup_exit(EXIT_FAILURE);
1503
}
1504
if (nr_read == bufsize)
1505
warnx("inotify() buffer filled, skipping events");
1506
if (nr_read < istructsize) {
1507
warnx("read() returned too few bytes on inotify fd");
1508
cleanup_exit(EXIT_FAILURE);
1509
}
1510
1511
for (p = buf; p < buf + nr_read;) {
1512
event = (struct inotify_event *) p;
1513
if ((event->mask & IN_MODIFY) != 0)
1514
modified++;
1515
p += istructsize + event->len;
1516
}
1517
while (modified > 0) {
1518
check_signals();
1519
mutex_lock(&printstate.mutex);
1520
check_signals();
1521
printstate_next_ticket(&req);
1522
if (printstate_cnt_read() > 0) {
1523
printstate_mark_req_completed(&req);
1524
mutex_unlock(&printstate.mutex);
1525
if (verbose_lostevent()) {
1526
clock_gettime_or_die(CLOCK_MONOTONIC,
1527
&timestamp);
1528
print_lostmessage(&timestamp, buffer,
1529
bufspace, &req,
1530
"inotify loop");
1531
}
1532
break;
1533
}
1534
mutex_unlock(&printstate.mutex);
1535
if (queue_try_to_add_entry(&printqueue, &req) != 0) {
1536
/* These prints could happen concurrently */
1537
check_signals();
1538
mutex_lock(&print_mtx);
1539
check_signals();
1540
write_or_die(fd_stdout, queue_full_warning,
1541
strlen(queue_full_warning));
1542
mutex_unlock(&print_mtx);
1543
}
1544
modified--;
1545
}
1546
}
1547
}
1548
1549
static void *do_printloop(void *arg)
1550
{
1551
const size_t bufspace = PRINT_BUFFER_SIZE;
1552
char *buffer;
1553
long *rseed = (long *) arg;
1554
struct drand48_data drandbuf;
1555
long slept = 0;
1556
struct entry req;
1557
int prob = 0;
1558
struct timespec timestamp;
1559
struct short_msg resize_msg;
1560
1561
print_priority();
1562
1563
if (srand48_r(*rseed, &drandbuf) != 0) {
1564
warn("srand48_r() failed!\n");
1565
cleanup_exit(EXIT_FAILURE);
1566
}
1567
1568
buffer = malloc_or_die(bufspace);
1569
1570
while (true) {
1571
req = queue_wait_for_entry(&printqueue);
1572
clock_gettime_or_die(CLOCK_MONOTONIC, &timestamp);
1573
mutex_lock(&printstate.mutex);
1574
if (prev_req_won_race(&req)) {
1575
printstate_mark_req_completed(&req);
1576
mutex_unlock(&printstate.mutex);
1577
if (verbose_lostevent())
1578
print_lostmessage(&timestamp, buffer, bufspace,
1579
&req, "print loop");
1580
continue;
1581
}
1582
mutex_unlock(&printstate.mutex);
1583
1584
/*
1585
* Toss a coin to decide if we want to sleep before printing
1586
* out the backtrace. The reason for this is that opening
1587
* /sys/kernel/tracing/trace will cause a blackout of
1588
* hundreds of ms, where no latencies will be noted by the
1589
* latency tracer. Thus by randomly sleeping we try to avoid
1590
* missing traces systematically due to this. With this option
1591
* we will sometimes get the first latency, some other times
1592
* some of the later ones, in case of closely spaced traces.
1593
*/
1594
if (trace_enable && use_random_sleep) {
1595
slept = 0;
1596
prob = table_get_probability(&req, &resize_msg);
1597
if (!toss_coin(&drandbuf, prob))
1598
slept = go_to_sleep(&req);
1599
if (slept >= 0) {
1600
/* A print is ongoing */
1601
printstate_cnt_inc();
1602
/*
1603
* We will do the printout below so we have to
1604
* mark it as completed while we still have the
1605
* mutex.
1606
*/
1607
mutex_lock(&printstate.mutex);
1608
printstate_mark_req_completed(&req);
1609
mutex_unlock(&printstate.mutex);
1610
}
1611
}
1612
if (trace_enable) {
1613
/*
1614
* slept < 0 means that we detected another
1615
* notification in go_to_sleep() above
1616
*/
1617
if (slept >= 0)
1618
/*
1619
* N.B. printstate_cnt_dec(); will be called
1620
* inside print_tracefile()
1621
*/
1622
print_tracefile(&resize_msg, &timestamp, buffer,
1623
bufspace, slept, &req);
1624
else
1625
print_skipmessage(&resize_msg, &timestamp,
1626
buffer, bufspace, &req, true);
1627
} else {
1628
print_skipmessage(&resize_msg, &timestamp, buffer,
1629
bufspace, &req, false);
1630
}
1631
}
1632
return NULL;
1633
}
1634
1635
static void start_printthread(void)
1636
{
1637
unsigned int i;
1638
long *seed;
1639
int ufd;
1640
1641
ufd = open(DEV_URANDOM, O_RDONLY);
1642
if (nr_threads > MAX_THREADS) {
1643
warnx(
1644
"Number of requested print threads was %d, max number is %d\n",
1645
nr_threads, MAX_THREADS);
1646
nr_threads = MAX_THREADS;
1647
}
1648
for (i = 0; i < nr_threads; i++) {
1649
seed = malloc_or_die(sizeof(*seed));
1650
if (ufd < 0 ||
1651
read(ufd, seed, sizeof(*seed)) != sizeof(*seed)) {
1652
printf(
1653
"Warning! Using trivial random number seed, since %s not available\n",
1654
DEV_URANDOM);
1655
fflush(stdout);
1656
*seed = i;
1657
}
1658
errno = pthread_create(&printthread[i], NULL, do_printloop,
1659
seed);
1660
if (errno)
1661
err(EXIT_FAILURE, "pthread_create()");
1662
}
1663
if (ufd > 0 && close(ufd) != 0)
1664
warn("close() failed");
1665
}
1666
1667
static void show_usage(void)
1668
{
1669
printf(
1670
"Usage: %s [OPTION]...\n\n"
1671
"Collect closely occurring latencies from %s\n"
1672
"with any of the following tracers: preemptirqsoff, preemptoff, irqsoff, "
1673
"wakeup,\nwakeup_dl, or wakeup_rt.\n\n"
1674
1675
"The occurrence of a latency is detected by monitoring the file\n"
1676
"%s with inotify.\n\n"
1677
1678
"The following options are supported:\n\n"
1679
1680
"-l, --list\t\tList the latency tracers that are supported by the\n"
1681
"\t\t\tcurrently running Linux kernel. If you don't see the\n"
1682
"\t\t\ttracer that you want, you will probably need to\n"
1683
"\t\t\tchange your kernel config and build a new kernel.\n\n"
1684
1685
"-t, --tracer TR\t\tUse the tracer TR. The default is to use the first\n"
1686
"\t\t\ttracer that is supported by the kernel in the following\n"
1687
"\t\t\torder of precedence:\n\n"
1688
"\t\t\tpreemptirqsoff\n"
1689
"\t\t\tpreemptoff\n"
1690
"\t\t\tirqsoff\n"
1691
"\t\t\twakeup\n"
1692
"\t\t\twakeup_rt\n"
1693
"\t\t\twakeup_dl\n"
1694
"\n"
1695
"\t\t\tIf TR is not on the list above, then a warning will be\n"
1696
"\t\t\tprinted.\n\n"
1697
1698
"-F, --force\t\tProceed even if another ftrace tracer is active. Without\n"
1699
"\t\t\tthis option, the program will refuse to start tracing if\n"
1700
"\t\t\tany other tracer than the nop tracer is active.\n\n"
1701
1702
"-s, --threshold TH\tConfigure ftrace to use a threshold of TH microseconds\n"
1703
"\t\t\tfor the tracer. The default is 0, which means that\n"
1704
"\t\t\ttracing_max_latency will be used. tracing_max_latency is\n"
1705
"\t\t\tset to 0 when the program is started and contains the\n"
1706
"\t\t\tmaximum of the latencies that have been encountered.\n\n"
1707
1708
"-f, --function\t\tEnable the function-trace option in trace_options. With\n"
1709
"\t\t\tthis option, ftrace will trace the functions that are\n"
1710
"\t\t\texecuted during a latency, without it we only get the\n"
1711
"\t\t\tbeginning, end, and backtrace.\n\n"
1712
1713
"-g, --graph\t\tEnable the display-graph option in trace_option. This\n"
1714
"\t\t\toption causes ftrace to show the graph of how functions\n"
1715
"\t\t\tare calling other functions.\n\n"
1716
1717
"-c, --policy POL\tRun the program with scheduling policy POL. POL can be\n"
1718
"\t\t\tother, batch, idle, rr or fifo. The default is rr. When\n"
1719
"\t\t\tusing rr or fifo, remember that these policies may cause\n"
1720
"\t\t\tother tasks to experience latencies.\n\n"
1721
1722
"-p, --priority PRI\tRun the program with priority PRI. The acceptable range\n"
1723
"\t\t\tof PRI depends on the scheduling policy.\n\n"
1724
1725
"-n, --notrace\t\tIf latency is detected, do not print out the content of\n"
1726
"\t\t\tthe trace file to standard output\n\n"
1727
1728
"-t, --threads NRTHR\tRun NRTHR threads for printing. Default is %d.\n\n"
1729
1730
"-r, --random\t\tArbitrarily sleep a certain amount of time, default\n"
1731
"\t\t\t%ld ms, before reading the trace file. The\n"
1732
"\t\t\tprobabilities for sleep are chosen so that the\n"
1733
"\t\t\tprobability of obtaining any of a cluster of closely\n"
1734
"\t\t\toccurring latencies are equal, i.e. we will randomly\n"
1735
"\t\t\tchoose which one we collect from the trace file.\n\n"
1736
"\t\t\tThis option is probably only useful with the irqsoff,\n"
1737
"\t\t\tpreemptoff, and preemptirqsoff tracers.\n\n"
1738
1739
"-a, --nrlat NRLAT\tFor the purpose of arbitrary delay, assume that there\n"
1740
"\t\t\tare no more than NRLAT clustered latencies. If NRLAT\n"
1741
"\t\t\tlatencies are detected during a run, this value will\n"
1742
"\t\t\tautomatically be increased to NRLAT + 1 and then to\n"
1743
"\t\t\tNRLAT + 2 and so on. The default is %d. This option\n"
1744
"\t\t\timplies -r. We need to know this number in order to\n"
1745
"\t\t\tbe able to calculate the probabilities of sleeping.\n"
1746
"\t\t\tSpecifically, the probabilities of not sleeping, i.e. to\n"
1747
"\t\t\tdo an immediate printout will be:\n\n"
1748
"\t\t\t1/NRLAT 1/(NRLAT - 1) ... 1/3 1/2 1\n\n"
1749
"\t\t\tThe probability of sleeping will be:\n\n"
1750
"\t\t\t1 - P, where P is from the series above\n\n"
1751
"\t\t\tThis descending probability will cause us to choose\n"
1752
"\t\t\tan occurrence at random. Observe that the final\n"
1753
"\t\t\tprobability is 0, it is when we reach this probability\n"
1754
"\t\t\tthat we increase NRLAT automatically. As an example,\n"
1755
"\t\t\twith the default value of 2, the probabilities will be:\n\n"
1756
"\t\t\t1/2 0\n\n"
1757
"\t\t\tThis means, when a latency is detected we will sleep\n"
1758
"\t\t\twith 50%% probability. If we ever detect another latency\n"
1759
"\t\t\tduring the sleep period, then the probability of sleep\n"
1760
"\t\t\twill be 0%% and the table will be expanded to:\n\n"
1761
"\t\t\t1/3 1/2 0\n\n"
1762
1763
"-v, --verbose\t\tIncrease the verbosity. If this option is given once,\n"
1764
"\t\t\tthen print a message every time that the NRLAT value\n"
1765
"\t\t\tis automatically increased. It also causes a message to\n"
1766
"\t\t\tbe printed when the ftrace settings are changed. If this\n"
1767
"\t\t\toption is given at least twice, then also print a\n"
1768
"\t\t\twarning for lost events.\n\n"
1769
1770
"-u, --time TIME\t\tArbitrarily sleep for a specified time TIME ms before\n"
1771
"\t\t\tprinting out the trace from the trace file. The default\n"
1772
"\t\t\tis %ld ms. This option implies -r.\n\n"
1773
1774
"-x, --no-ftrace\t\tDo not configure ftrace. This assume that the user\n"
1775
"\t\t\tconfigures the ftrace files in sysfs such as\n"
1776
"\t\t\t/sys/kernel/tracing/current_tracer or equivalent.\n\n"
1777
1778
"-i, --tracefile FILE\tUse FILE as trace file. The default is\n"
1779
"\t\t\t%s.\n"
1780
"\t\t\tThis options implies -x\n\n"
1781
1782
"-m, --max-lat FILE\tUse FILE as tracing_max_latency file. The default is\n"
1783
"\t\t\t%s.\n"
1784
"\t\t\tThis options implies -x\n\n"
1785
,
1786
prg_name, debug_tracefile_dflt, debug_maxlat_dflt, DEFAULT_NR_PRINTER_THREADS,
1787
SLEEP_TIME_MS_DEFAULT, DEFAULT_TABLE_SIZE, SLEEP_TIME_MS_DEFAULT,
1788
debug_tracefile_dflt, debug_maxlat_dflt);
1789
}
1790
1791
static void find_tracefiles(void)
1792
{
1793
debug_tracefile_dflt = tracefs_get_tracing_file("trace");
1794
if (debug_tracefile_dflt == NULL) {
1795
/* This is needed in show_usage() */
1796
debug_tracefile_dflt = DEBUG_NOFILE;
1797
}
1798
1799
debug_maxlat_dflt = tracefs_get_tracing_file("tracing_max_latency");
1800
if (debug_maxlat_dflt == NULL) {
1801
/* This is needed in show_usage() */
1802
debug_maxlat_dflt = DEBUG_NOFILE;
1803
}
1804
1805
debug_tracefile = debug_tracefile_dflt;
1806
debug_maxlat = debug_maxlat_dflt;
1807
}
1808
1809
bool alldigits(const char *s)
1810
{
1811
for (; *s != '\0'; s++)
1812
if (!isdigit(*s))
1813
return false;
1814
return true;
1815
}
1816
1817
void check_alldigits(const char *optarg, const char *argname)
1818
{
1819
if (!alldigits(optarg))
1820
errx(EXIT_FAILURE,
1821
"The %s parameter expects a decimal argument\n", argname);
1822
}
1823
1824
static void scan_arguments(int argc, char *argv[])
1825
{
1826
int c;
1827
int i;
1828
int option_idx = 0;
1829
1830
static struct option long_options[] = {
1831
{ "list", no_argument, 0, 'l' },
1832
{ "tracer", required_argument, 0, 't' },
1833
{ "force", no_argument, 0, 'F' },
1834
{ "threshold", required_argument, 0, 's' },
1835
{ "function", no_argument, 0, 'f' },
1836
{ "graph", no_argument, 0, 'g' },
1837
{ "policy", required_argument, 0, 'c' },
1838
{ "priority", required_argument, 0, 'p' },
1839
{ "help", no_argument, 0, 'h' },
1840
{ "notrace", no_argument, 0, 'n' },
1841
{ "random", no_argument, 0, 'r' },
1842
{ "nrlat", required_argument, 0, 'a' },
1843
{ "threads", required_argument, 0, 'e' },
1844
{ "time", required_argument, 0, 'u' },
1845
{ "verbose", no_argument, 0, 'v' },
1846
{ "no-ftrace", no_argument, 0, 'x' },
1847
{ "tracefile", required_argument, 0, 'i' },
1848
{ "max-lat", required_argument, 0, 'm' },
1849
{ 0, 0, 0, 0 }
1850
};
1851
const struct policy *p;
1852
int max, min;
1853
int value;
1854
bool notracer, valid;
1855
1856
/*
1857
* We must do this before parsing the arguments because show_usage()
1858
* needs to display these.
1859
*/
1860
find_tracefiles();
1861
1862
while (true) {
1863
c = getopt_long(argc, argv, "lt:Fs:fgc:p:hnra:e:u:vxi:m:",
1864
long_options, &option_idx);
1865
if (c == -1)
1866
break;
1867
1868
switch (c) {
1869
case 'l':
1870
show_available();
1871
exit(0);
1872
break;
1873
case 't':
1874
current_tracer = strdup(optarg);
1875
if (!is_relevant_tracer(current_tracer)) {
1876
warnx("%s is not a known latency tracer!\n",
1877
current_tracer);
1878
}
1879
valid = tracer_valid(current_tracer, &notracer);
1880
if (notracer)
1881
errx(EXIT_FAILURE, "%s", no_tracer_msg);
1882
if (!valid)
1883
errx(EXIT_FAILURE,
1884
"The tracer %s is not supported by your kernel!\n", current_tracer);
1885
break;
1886
case 'F':
1887
force_tracer = true;
1888
break;
1889
case 's':
1890
check_alldigits(optarg, "-s [--threshold]");
1891
threshold = strdup(optarg);
1892
break;
1893
case 'f':
1894
use_options[OPTIDX_FUNC_TR] = true;
1895
break;
1896
case 'g':
1897
use_options[OPTIDX_DISP_GR] = true;
1898
break;
1899
case 'c':
1900
p = policy_from_name(optarg);
1901
if (p != NULL) {
1902
sched_policy = p->policy;
1903
sched_policy_set = true;
1904
if (!sched_pri_set) {
1905
sched_pri = p->default_pri;
1906
sched_pri_set = true;
1907
}
1908
} else {
1909
warnx("Unknown scheduling %s\n", optarg);
1910
show_usage();
1911
exit(0);
1912
}
1913
break;
1914
case 'p':
1915
check_alldigits(optarg, "-p [--priority]");
1916
sched_pri = atoi(optarg);
1917
sched_pri_set = true;
1918
break;
1919
case 'h':
1920
show_usage();
1921
exit(0);
1922
break;
1923
case 'n':
1924
trace_enable = false;
1925
use_random_sleep = false;
1926
break;
1927
case 'e':
1928
check_alldigits(optarg, "-e [--threads]");
1929
value = atoi(optarg);
1930
if (value > 0)
1931
nr_threads = value;
1932
else {
1933
warnx("NRTHR must be > 0\n");
1934
show_usage();
1935
exit(0);
1936
}
1937
break;
1938
case 'u':
1939
check_alldigits(optarg, "-u [--time]");
1940
value = atoi(optarg);
1941
if (value < 0) {
1942
warnx("TIME must be >= 0\n");
1943
show_usage();
1944
exit(0);
1945
}
1946
trace_enable = true;
1947
use_random_sleep = true;
1948
sleep_time = value * USEC_PER_MSEC;
1949
break;
1950
case 'v':
1951
verbosity++;
1952
break;
1953
case 'r':
1954
trace_enable = true;
1955
use_random_sleep = true;
1956
break;
1957
case 'a':
1958
check_alldigits(optarg, "-a [--nrlat]");
1959
value = atoi(optarg);
1960
if (value <= 0) {
1961
warnx("NRLAT must be > 0\n");
1962
show_usage();
1963
exit(0);
1964
}
1965
trace_enable = true;
1966
use_random_sleep = true;
1967
table_startsize = value;
1968
break;
1969
case 'x':
1970
setup_ftrace = false;
1971
break;
1972
case 'i':
1973
setup_ftrace = false;
1974
debug_tracefile = strdup(optarg);
1975
break;
1976
case 'm':
1977
setup_ftrace = false;
1978
debug_maxlat = strdup(optarg);
1979
break;
1980
default:
1981
show_usage();
1982
exit(0);
1983
break;
1984
}
1985
}
1986
1987
if (setup_ftrace) {
1988
if (!current_tracer) {
1989
current_tracer = find_default_tracer();
1990
if (!current_tracer)
1991
errx(EXIT_FAILURE,
1992
"No default tracer found and tracer not specified\n");
1993
}
1994
1995
if (use_random_sleep && !random_makes_sense(current_tracer)) {
1996
warnx("WARNING: The tracer is %s and random sleep has",
1997
current_tracer);
1998
fprintf(stderr,
1999
"been enabled. Random sleep is intended for the following tracers:\n");
2000
for (i = 0; random_tracers[i]; i++)
2001
fprintf(stderr, "%s\n", random_tracers[i]);
2002
fprintf(stderr, "\n");
2003
}
2004
}
2005
2006
if (debug_tracefile == DEBUG_NOFILE ||
2007
debug_maxlat == DEBUG_NOFILE)
2008
errx(EXIT_FAILURE,
2009
"Could not find tracing directory e.g. /sys/kernel/tracing\n");
2010
2011
if (!sched_policy_set) {
2012
sched_policy = SCHED_RR;
2013
sched_policy_set = true;
2014
if (!sched_pri_set) {
2015
sched_pri = RT_DEFAULT_PRI;
2016
sched_pri_set = true;
2017
}
2018
}
2019
2020
max = sched_get_priority_max(sched_policy);
2021
min = sched_get_priority_min(sched_policy);
2022
2023
if (sched_pri < min) {
2024
printf(
2025
"ATTENTION: Increasing priority to minimum, which is %d\n", min);
2026
sched_pri = min;
2027
}
2028
if (sched_pri > max) {
2029
printf(
2030
"ATTENTION: Reducing priority to maximum, which is %d\n", max);
2031
sched_pri = max;
2032
}
2033
}
2034
2035
static void show_params(void)
2036
{
2037
printf(
2038
"\n"
2039
"Running with scheduling policy %s and priority %d. Using %d print threads.\n",
2040
policy_name(sched_policy), sched_pri, nr_threads);
2041
if (trace_enable) {
2042
if (use_random_sleep) {
2043
printf(
2044
"%s will be printed with random delay\n"
2045
"Start size of the probability table:\t\t\t%d\n"
2046
"Print a message when the prob. table changes size:\t%s\n"
2047
"Print a warning when an event has been lost:\t\t%s\n"
2048
"Sleep time is:\t\t\t\t\t\t%ld ms\n",
2049
debug_tracefile,
2050
table_startsize,
2051
bool2str(verbose_sizechange()),
2052
bool2str(verbose_lostevent()),
2053
sleep_time / USEC_PER_MSEC);
2054
} else {
2055
printf("%s will be printed immediately\n",
2056
debug_tracefile);
2057
}
2058
} else {
2059
printf("%s will not be printed\n",
2060
debug_tracefile);
2061
}
2062
if (setup_ftrace) {
2063
printf("Tracer:\t\t\t\t\t\t\t%s\n"
2064
"%s option:\t\t\t\t\t%s\n"
2065
"%s option:\t\t\t\t\t%s\n",
2066
current_tracer,
2067
optstr[OPTIDX_FUNC_TR],
2068
bool2str(use_options[OPTIDX_FUNC_TR]),
2069
optstr[OPTIDX_DISP_GR],
2070
bool2str(use_options[OPTIDX_DISP_GR]));
2071
if (!strcmp(threshold, "0"))
2072
printf("Threshold:\t\t\t\t\t\ttracing_max_latency\n");
2073
else
2074
printf("Threshold:\t\t\t\t\t\t%s\n", threshold);
2075
}
2076
printf("\n");
2077
}
2078
2079
int main(int argc, char *argv[])
2080
{
2081
init_save_state();
2082
signal_blocking(SIG_BLOCK);
2083
setup_sig_handler();
2084
open_stdout();
2085
2086
if (argc >= 1)
2087
prg_name = argv[0];
2088
else
2089
prg_name = prg_unknown;
2090
2091
scan_arguments(argc, argv);
2092
show_params();
2093
2094
init_printstate();
2095
init_print_mtx();
2096
if (use_random_sleep) {
2097
init_probabilities();
2098
if (verbose_sizechange())
2099
printf("Initializing probability table to %d\n",
2100
table_startsize);
2101
sleeptable_resize(table_startsize, false, NULL);
2102
}
2103
set_priority();
2104
init_queue(&printqueue);
2105
start_printthread();
2106
tracing_loop();
2107
return 0;
2108
}
2109
2110