Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/tools/accounting/delaytop.c
29265 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* delaytop.c - system-wide delay monitoring tool.
4
*
5
* This tool provides real-time monitoring and statistics of
6
* system, container, and task-level delays, including CPU,
7
* memory, IO, and IRQ. It supports both interactive (top-like),
8
* and can output delay information for the whole system, specific
9
* containers (cgroups), or individual tasks (PIDs).
10
*
11
* Key features:
12
* - Collects per-task delay accounting statistics via taskstats.
13
* - Collects system-wide PSI information.
14
* - Supports sorting, filtering.
15
* - Supports both interactive (screen refresh).
16
*
17
* Copyright (C) Fan Yu, ZTE Corp. 2025
18
* Copyright (C) Wang Yaxin, ZTE Corp. 2025
19
*
20
* Compile with
21
* gcc -I/usr/src/linux/include delaytop.c -o delaytop
22
*/
23
24
#include <stdio.h>
25
#include <stdlib.h>
26
#include <string.h>
27
#include <errno.h>
28
#include <unistd.h>
29
#include <fcntl.h>
30
#include <getopt.h>
31
#include <signal.h>
32
#include <time.h>
33
#include <dirent.h>
34
#include <ctype.h>
35
#include <stdbool.h>
36
#include <sys/types.h>
37
#include <sys/stat.h>
38
#include <sys/socket.h>
39
#include <sys/select.h>
40
#include <termios.h>
41
#include <limits.h>
42
#include <linux/genetlink.h>
43
#include <linux/taskstats.h>
44
#include <linux/cgroupstats.h>
45
#include <stddef.h>
46
47
#define PSI_PATH "/proc/pressure"
48
#define PSI_CPU_PATH "/proc/pressure/cpu"
49
#define PSI_MEMORY_PATH "/proc/pressure/memory"
50
#define PSI_IO_PATH "/proc/pressure/io"
51
#define PSI_IRQ_PATH "/proc/pressure/irq"
52
53
#define NLA_NEXT(na) ((struct nlattr *)((char *)(na) + NLA_ALIGN((na)->nla_len)))
54
#define NLA_DATA(na) ((void *)((char *)(na) + NLA_HDRLEN))
55
#define NLA_PAYLOAD(len) (len - NLA_HDRLEN)
56
57
#define GENLMSG_DATA(glh) ((void *)(NLMSG_DATA(glh) + GENL_HDRLEN))
58
#define GENLMSG_PAYLOAD(glh) (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN)
59
60
#define TASK_COMM_LEN 16
61
#define MAX_MSG_SIZE 1024
62
#define MAX_TASKS 1000
63
#define MAX_BUF_LEN 256
64
#define SET_TASK_STAT(task_count, field) tasks[task_count].field = stats.field
65
#define BOOL_FPRINT(stream, fmt, ...) \
66
({ \
67
int ret = fprintf(stream, fmt, ##__VA_ARGS__); \
68
ret >= 0; \
69
})
70
#define TASK_AVG(task, field) average_ms((task).field##_delay_total, (task).field##_count)
71
#define PSI_LINE_FORMAT "%-12s %6.1f%%/%6.1f%%/%6.1f%%/%8llu(ms)\n"
72
#define DELAY_FMT_DEFAULT "%8.2f %8.2f %8.2f %8.2f\n"
73
#define DELAY_FMT_MEMVERBOSE "%8.2f %8.2f %8.2f %8.2f %8.2f %8.2f\n"
74
#define SORT_FIELD(name, cmd, modes) \
75
{#name, #cmd, \
76
offsetof(struct task_info, name##_delay_total), \
77
offsetof(struct task_info, name##_count), \
78
modes}
79
#define END_FIELD {NULL, 0, 0}
80
81
/* Display mode types */
82
#define MODE_TYPE_ALL (0xFFFFFFFF)
83
#define MODE_DEFAULT (1 << 0)
84
#define MODE_MEMVERBOSE (1 << 1)
85
86
/* PSI statistics structure */
87
struct psi_stats {
88
double cpu_some_avg10, cpu_some_avg60, cpu_some_avg300;
89
unsigned long long cpu_some_total;
90
double cpu_full_avg10, cpu_full_avg60, cpu_full_avg300;
91
unsigned long long cpu_full_total;
92
double memory_some_avg10, memory_some_avg60, memory_some_avg300;
93
unsigned long long memory_some_total;
94
double memory_full_avg10, memory_full_avg60, memory_full_avg300;
95
unsigned long long memory_full_total;
96
double io_some_avg10, io_some_avg60, io_some_avg300;
97
unsigned long long io_some_total;
98
double io_full_avg10, io_full_avg60, io_full_avg300;
99
unsigned long long io_full_total;
100
double irq_full_avg10, irq_full_avg60, irq_full_avg300;
101
unsigned long long irq_full_total;
102
};
103
104
/* Task delay information structure */
105
struct task_info {
106
int pid;
107
int tgid;
108
char command[TASK_COMM_LEN];
109
unsigned long long cpu_count;
110
unsigned long long cpu_delay_total;
111
unsigned long long blkio_count;
112
unsigned long long blkio_delay_total;
113
unsigned long long swapin_count;
114
unsigned long long swapin_delay_total;
115
unsigned long long freepages_count;
116
unsigned long long freepages_delay_total;
117
unsigned long long thrashing_count;
118
unsigned long long thrashing_delay_total;
119
unsigned long long compact_count;
120
unsigned long long compact_delay_total;
121
unsigned long long wpcopy_count;
122
unsigned long long wpcopy_delay_total;
123
unsigned long long irq_count;
124
unsigned long long irq_delay_total;
125
unsigned long long mem_count;
126
unsigned long long mem_delay_total;
127
};
128
129
/* Container statistics structure */
130
struct container_stats {
131
int nr_sleeping; /* Number of sleeping processes */
132
int nr_running; /* Number of running processes */
133
int nr_stopped; /* Number of stopped processes */
134
int nr_uninterruptible; /* Number of uninterruptible processes */
135
int nr_io_wait; /* Number of processes in IO wait */
136
};
137
138
/* Delay field structure */
139
struct field_desc {
140
const char *name; /* Field name for cmdline argument */
141
const char *cmd_char; /* Interactive command */
142
unsigned long total_offset; /* Offset of total delay in task_info */
143
unsigned long count_offset; /* Offset of count in task_info */
144
size_t supported_modes; /* Supported display modes */
145
};
146
147
/* Program settings structure */
148
struct config {
149
int delay; /* Update interval in seconds */
150
int iterations; /* Number of iterations, 0 == infinite */
151
int max_processes; /* Maximum number of processes to show */
152
int output_one_time; /* Output once and exit */
153
int monitor_pid; /* Monitor specific PID */
154
char *container_path; /* Path to container cgroup */
155
const struct field_desc *sort_field; /* Current sort field */
156
size_t display_mode; /* Current display mode */
157
};
158
159
/* Global variables */
160
static struct config cfg;
161
static struct psi_stats psi;
162
static struct task_info tasks[MAX_TASKS];
163
static int task_count;
164
static int running = 1;
165
static struct container_stats container_stats;
166
static const struct field_desc sort_fields[] = {
167
SORT_FIELD(cpu, c, MODE_DEFAULT),
168
SORT_FIELD(blkio, i, MODE_DEFAULT),
169
SORT_FIELD(irq, q, MODE_DEFAULT),
170
SORT_FIELD(mem, m, MODE_DEFAULT | MODE_MEMVERBOSE),
171
SORT_FIELD(swapin, s, MODE_MEMVERBOSE),
172
SORT_FIELD(freepages, r, MODE_MEMVERBOSE),
173
SORT_FIELD(thrashing, t, MODE_MEMVERBOSE),
174
SORT_FIELD(compact, p, MODE_MEMVERBOSE),
175
SORT_FIELD(wpcopy, w, MODE_MEMVERBOSE),
176
END_FIELD
177
};
178
static int sort_selected;
179
180
/* Netlink socket variables */
181
static int nl_sd = -1;
182
static int family_id;
183
184
/* Set terminal to non-canonical mode for q-to-quit */
185
static struct termios orig_termios;
186
static void enable_raw_mode(void)
187
{
188
struct termios raw;
189
190
tcgetattr(STDIN_FILENO, &orig_termios);
191
raw = orig_termios;
192
raw.c_lflag &= ~(ICANON | ECHO);
193
tcsetattr(STDIN_FILENO, TCSAFLUSH, &raw);
194
}
195
static void disable_raw_mode(void)
196
{
197
tcsetattr(STDIN_FILENO, TCSAFLUSH, &orig_termios);
198
}
199
200
/* Find field descriptor by command line */
201
static const struct field_desc *get_field_by_cmd_char(char ch)
202
{
203
const struct field_desc *field;
204
205
for (field = sort_fields; field->name != NULL; field++) {
206
if (field->cmd_char[0] == ch)
207
return field;
208
}
209
210
return NULL;
211
}
212
213
/* Find field descriptor by name with string comparison */
214
static const struct field_desc *get_field_by_name(const char *name)
215
{
216
const struct field_desc *field;
217
size_t field_len;
218
219
for (field = sort_fields; field->name != NULL; field++) {
220
field_len = strlen(field->name);
221
if (field_len != strlen(name))
222
continue;
223
if (strncmp(field->name, name, field_len) == 0)
224
return field;
225
}
226
227
return NULL;
228
}
229
230
/* Find display name for a field descriptor */
231
static const char *get_name_by_field(const struct field_desc *field)
232
{
233
return field ? field->name : "UNKNOWN";
234
}
235
236
/* Generate string of available field names */
237
static void display_available_fields(size_t mode)
238
{
239
const struct field_desc *field;
240
char buf[MAX_BUF_LEN];
241
242
buf[0] = '\0';
243
244
for (field = sort_fields; field->name != NULL; field++) {
245
if (!(field->supported_modes & mode))
246
continue;
247
strncat(buf, "|", MAX_BUF_LEN - strlen(buf) - 1);
248
strncat(buf, field->name, MAX_BUF_LEN - strlen(buf) - 1);
249
buf[MAX_BUF_LEN - 1] = '\0';
250
}
251
252
fprintf(stderr, "Available fields: %s\n", buf);
253
}
254
255
/* Display usage information and command line options */
256
static void usage(void)
257
{
258
printf("Usage: delaytop [Options]\n"
259
"Options:\n"
260
" -h, --help Show this help message and exit\n"
261
" -d, --delay=SECONDS Set refresh interval (default: 2 seconds, min: 1)\n"
262
" -n, --iterations=COUNT Set number of updates (default: 0 = infinite)\n"
263
" -P, --processes=NUMBER Set maximum number of processes to show (default: 20, max: 1000)\n"
264
" -o, --once Display once and exit\n"
265
" -p, --pid=PID Monitor only the specified PID\n"
266
" -C, --container=PATH Monitor the container at specified cgroup path\n"
267
" -s, --sort=FIELD Sort by delay field (default: cpu)\n"
268
" -M, --memverbose Display memory detailed information\n");
269
exit(0);
270
}
271
272
/* Parse command line arguments and set configuration */
273
static void parse_args(int argc, char **argv)
274
{
275
int c;
276
const struct field_desc *field;
277
struct option long_options[] = {
278
{"help", no_argument, 0, 'h'},
279
{"delay", required_argument, 0, 'd'},
280
{"iterations", required_argument, 0, 'n'},
281
{"pid", required_argument, 0, 'p'},
282
{"once", no_argument, 0, 'o'},
283
{"processes", required_argument, 0, 'P'},
284
{"sort", required_argument, 0, 's'},
285
{"container", required_argument, 0, 'C'},
286
{"memverbose", no_argument, 0, 'M'},
287
{0, 0, 0, 0}
288
};
289
290
/* Set defaults */
291
cfg.delay = 2;
292
cfg.iterations = 0;
293
cfg.max_processes = 20;
294
cfg.sort_field = &sort_fields[0]; /* Default sorted by CPU delay */
295
cfg.output_one_time = 0;
296
cfg.monitor_pid = 0; /* 0 means monitor all PIDs */
297
cfg.container_path = NULL;
298
cfg.display_mode = MODE_DEFAULT;
299
300
while (1) {
301
int option_index = 0;
302
303
c = getopt_long(argc, argv, "hd:n:p:oP:C:s:M", long_options, &option_index);
304
if (c == -1)
305
break;
306
307
switch (c) {
308
case 'h':
309
usage();
310
break;
311
case 'd':
312
cfg.delay = atoi(optarg);
313
if (cfg.delay < 1) {
314
fprintf(stderr, "Error: delay must be >= 1.\n");
315
exit(1);
316
}
317
break;
318
case 'n':
319
cfg.iterations = atoi(optarg);
320
if (cfg.iterations < 0) {
321
fprintf(stderr, "Error: iterations must be >= 0.\n");
322
exit(1);
323
}
324
break;
325
case 'p':
326
cfg.monitor_pid = atoi(optarg);
327
if (cfg.monitor_pid < 1) {
328
fprintf(stderr, "Error: pid must be >= 1.\n");
329
exit(1);
330
}
331
break;
332
case 'o':
333
cfg.output_one_time = 1;
334
break;
335
case 'P':
336
cfg.max_processes = atoi(optarg);
337
if (cfg.max_processes < 1) {
338
fprintf(stderr, "Error: processes must be >= 1.\n");
339
exit(1);
340
}
341
if (cfg.max_processes > MAX_TASKS) {
342
fprintf(stderr, "Warning: processes capped to %d.\n",
343
MAX_TASKS);
344
cfg.max_processes = MAX_TASKS;
345
}
346
break;
347
case 'C':
348
cfg.container_path = strdup(optarg);
349
break;
350
case 's':
351
if (strlen(optarg) == 0) {
352
fprintf(stderr, "Error: empty sort field\n");
353
exit(1);
354
}
355
356
field = get_field_by_name(optarg);
357
/* Show available fields if invalid option provided */
358
if (!field) {
359
fprintf(stderr, "Error: invalid sort field '%s'\n", optarg);
360
display_available_fields(MODE_TYPE_ALL);
361
exit(1);
362
}
363
364
cfg.sort_field = field;
365
break;
366
case 'M':
367
cfg.display_mode = MODE_MEMVERBOSE;
368
cfg.sort_field = get_field_by_name("mem");
369
break;
370
default:
371
fprintf(stderr, "Try 'delaytop --help' for more information.\n");
372
exit(1);
373
}
374
}
375
}
376
377
/* Calculate average delay in milliseconds for overall memory */
378
static void set_mem_delay_total(struct task_info *t)
379
{
380
t->mem_delay_total = t->swapin_delay_total +
381
t->freepages_delay_total +
382
t->thrashing_delay_total +
383
t->compact_delay_total +
384
t->wpcopy_delay_total;
385
}
386
387
static void set_mem_count(struct task_info *t)
388
{
389
t->mem_count = t->swapin_count +
390
t->freepages_count +
391
t->thrashing_count +
392
t->compact_count +
393
t->wpcopy_count;
394
}
395
396
/* Create a raw netlink socket and bind */
397
static int create_nl_socket(void)
398
{
399
int fd;
400
struct sockaddr_nl local;
401
402
fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
403
if (fd < 0)
404
return -1;
405
406
memset(&local, 0, sizeof(local));
407
local.nl_family = AF_NETLINK;
408
409
if (bind(fd, (struct sockaddr *) &local, sizeof(local)) < 0) {
410
fprintf(stderr, "Failed to bind socket when create nl_socket\n");
411
close(fd);
412
return -1;
413
}
414
415
return fd;
416
}
417
418
/* Send a command via netlink */
419
static int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
420
__u8 genl_cmd, __u16 nla_type,
421
void *nla_data, int nla_len)
422
{
423
struct sockaddr_nl nladdr;
424
struct nlattr *na;
425
int r, buflen;
426
char *buf;
427
428
struct {
429
struct nlmsghdr n;
430
struct genlmsghdr g;
431
char buf[MAX_MSG_SIZE];
432
} msg;
433
434
msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
435
msg.n.nlmsg_type = nlmsg_type;
436
msg.n.nlmsg_flags = NLM_F_REQUEST;
437
msg.n.nlmsg_seq = 0;
438
msg.n.nlmsg_pid = nlmsg_pid;
439
msg.g.cmd = genl_cmd;
440
msg.g.version = 0x1;
441
na = (struct nlattr *) GENLMSG_DATA(&msg);
442
na->nla_type = nla_type;
443
na->nla_len = nla_len + NLA_HDRLEN;
444
memcpy(NLA_DATA(na), nla_data, nla_len);
445
msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len);
446
447
buf = (char *) &msg;
448
buflen = msg.n.nlmsg_len;
449
memset(&nladdr, 0, sizeof(nladdr));
450
nladdr.nl_family = AF_NETLINK;
451
while ((r = sendto(sd, buf, buflen, 0, (struct sockaddr *) &nladdr,
452
sizeof(nladdr))) < buflen) {
453
if (r > 0) {
454
buf += r;
455
buflen -= r;
456
} else if (errno != EAGAIN)
457
return -1;
458
}
459
return 0;
460
}
461
462
/* Get family ID for taskstats via netlink */
463
static int get_family_id(int sd)
464
{
465
struct {
466
struct nlmsghdr n;
467
struct genlmsghdr g;
468
char buf[256];
469
} ans;
470
471
int id = 0, rc;
472
struct nlattr *na;
473
int rep_len;
474
char name[100];
475
476
strncpy(name, TASKSTATS_GENL_NAME, sizeof(name) - 1);
477
name[sizeof(name) - 1] = '\0';
478
rc = send_cmd(sd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY,
479
CTRL_ATTR_FAMILY_NAME, (void *)name,
480
strlen(TASKSTATS_GENL_NAME)+1);
481
if (rc < 0) {
482
fprintf(stderr, "Failed to send cmd for family id\n");
483
return 0;
484
}
485
486
rep_len = recv(sd, &ans, sizeof(ans), 0);
487
if (ans.n.nlmsg_type == NLMSG_ERROR ||
488
(rep_len < 0) || !NLMSG_OK((&ans.n), rep_len)) {
489
fprintf(stderr, "Failed to receive response for family id\n");
490
return 0;
491
}
492
493
na = (struct nlattr *) GENLMSG_DATA(&ans);
494
na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len));
495
if (na->nla_type == CTRL_ATTR_FAMILY_ID)
496
id = *(__u16 *) NLA_DATA(na);
497
return id;
498
}
499
500
static int read_psi_stats(void)
501
{
502
FILE *fp;
503
char line[256];
504
int ret = 0;
505
int error_count = 0;
506
507
/* Check if PSI path exists */
508
if (access(PSI_PATH, F_OK) != 0) {
509
fprintf(stderr, "Error: PSI interface not found at %s\n", PSI_PATH);
510
fprintf(stderr, "Please ensure your kernel supports PSI (Pressure Stall Information)\n");
511
return -1;
512
}
513
514
/* Zero all fields */
515
memset(&psi, 0, sizeof(psi));
516
517
/* CPU pressure */
518
fp = fopen(PSI_CPU_PATH, "r");
519
if (fp) {
520
while (fgets(line, sizeof(line), fp)) {
521
if (strncmp(line, "some", 4) == 0) {
522
ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu",
523
&psi.cpu_some_avg10, &psi.cpu_some_avg60,
524
&psi.cpu_some_avg300, &psi.cpu_some_total);
525
if (ret != 4) {
526
fprintf(stderr, "Failed to parse CPU some PSI data\n");
527
error_count++;
528
}
529
} else if (strncmp(line, "full", 4) == 0) {
530
ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
531
&psi.cpu_full_avg10, &psi.cpu_full_avg60,
532
&psi.cpu_full_avg300, &psi.cpu_full_total);
533
if (ret != 4) {
534
fprintf(stderr, "Failed to parse CPU full PSI data\n");
535
error_count++;
536
}
537
}
538
}
539
fclose(fp);
540
} else {
541
fprintf(stderr, "Warning: Failed to open %s\n", PSI_CPU_PATH);
542
error_count++;
543
}
544
545
/* Memory pressure */
546
fp = fopen(PSI_MEMORY_PATH, "r");
547
if (fp) {
548
while (fgets(line, sizeof(line), fp)) {
549
if (strncmp(line, "some", 4) == 0) {
550
ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu",
551
&psi.memory_some_avg10, &psi.memory_some_avg60,
552
&psi.memory_some_avg300, &psi.memory_some_total);
553
if (ret != 4) {
554
fprintf(stderr, "Failed to parse Memory some PSI data\n");
555
error_count++;
556
}
557
} else if (strncmp(line, "full", 4) == 0) {
558
ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
559
&psi.memory_full_avg10, &psi.memory_full_avg60,
560
&psi.memory_full_avg300, &psi.memory_full_total);
561
if (ret != 4) {
562
fprintf(stderr, "Failed to parse Memory full PSI data\n");
563
error_count++;
564
}
565
}
566
}
567
fclose(fp);
568
} else {
569
fprintf(stderr, "Warning: Failed to open %s\n", PSI_MEMORY_PATH);
570
error_count++;
571
}
572
573
/* IO pressure */
574
fp = fopen(PSI_IO_PATH, "r");
575
if (fp) {
576
while (fgets(line, sizeof(line), fp)) {
577
if (strncmp(line, "some", 4) == 0) {
578
ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu",
579
&psi.io_some_avg10, &psi.io_some_avg60,
580
&psi.io_some_avg300, &psi.io_some_total);
581
if (ret != 4) {
582
fprintf(stderr, "Failed to parse IO some PSI data\n");
583
error_count++;
584
}
585
} else if (strncmp(line, "full", 4) == 0) {
586
ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
587
&psi.io_full_avg10, &psi.io_full_avg60,
588
&psi.io_full_avg300, &psi.io_full_total);
589
if (ret != 4) {
590
fprintf(stderr, "Failed to parse IO full PSI data\n");
591
error_count++;
592
}
593
}
594
}
595
fclose(fp);
596
} else {
597
fprintf(stderr, "Warning: Failed to open %s\n", PSI_IO_PATH);
598
error_count++;
599
}
600
601
/* IRQ pressure (only full) */
602
fp = fopen(PSI_IRQ_PATH, "r");
603
if (fp) {
604
while (fgets(line, sizeof(line), fp)) {
605
if (strncmp(line, "full", 4) == 0) {
606
ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
607
&psi.irq_full_avg10, &psi.irq_full_avg60,
608
&psi.irq_full_avg300, &psi.irq_full_total);
609
if (ret != 4) {
610
fprintf(stderr, "Failed to parse IRQ full PSI data\n");
611
error_count++;
612
}
613
}
614
}
615
fclose(fp);
616
} else {
617
fprintf(stderr, "Warning: Failed to open %s\n", PSI_IRQ_PATH);
618
error_count++;
619
}
620
621
/* Return error count: 0 means success, >0 means warnings, -1 means fatal error */
622
if (error_count > 0) {
623
fprintf(stderr, "PSI stats reading completed with %d warnings\n", error_count);
624
return error_count;
625
}
626
627
return 0;
628
}
629
630
static int read_comm(int pid, char *comm_buf, size_t buf_size)
631
{
632
char path[64];
633
int ret = -1;
634
size_t len;
635
FILE *fp;
636
637
snprintf(path, sizeof(path), "/proc/%d/comm", pid);
638
fp = fopen(path, "r");
639
if (!fp) {
640
fprintf(stderr, "Failed to open comm file /proc/%d/comm\n", pid);
641
return ret;
642
}
643
644
if (fgets(comm_buf, buf_size, fp)) {
645
len = strlen(comm_buf);
646
if (len > 0 && comm_buf[len - 1] == '\n')
647
comm_buf[len - 1] = '\0';
648
ret = 0;
649
}
650
651
fclose(fp);
652
653
return ret;
654
}
655
656
static void fetch_and_fill_task_info(int pid, const char *comm)
657
{
658
struct {
659
struct nlmsghdr n;
660
struct genlmsghdr g;
661
char buf[MAX_MSG_SIZE];
662
} resp;
663
struct taskstats stats;
664
struct nlattr *nested;
665
struct nlattr *na;
666
int nested_len;
667
int nl_len;
668
int rc;
669
670
/* Send request for task stats */
671
if (send_cmd(nl_sd, family_id, getpid(), TASKSTATS_CMD_GET,
672
TASKSTATS_CMD_ATTR_PID, &pid, sizeof(pid)) < 0) {
673
fprintf(stderr, "Failed to send request for task stats\n");
674
return;
675
}
676
677
/* Receive response */
678
rc = recv(nl_sd, &resp, sizeof(resp), 0);
679
if (rc < 0 || resp.n.nlmsg_type == NLMSG_ERROR) {
680
fprintf(stderr, "Failed to receive response for task stats\n");
681
return;
682
}
683
684
/* Parse response */
685
nl_len = GENLMSG_PAYLOAD(&resp.n);
686
na = (struct nlattr *) GENLMSG_DATA(&resp);
687
while (nl_len > 0) {
688
if (na->nla_type == TASKSTATS_TYPE_AGGR_PID) {
689
nested = (struct nlattr *) NLA_DATA(na);
690
nested_len = NLA_PAYLOAD(na->nla_len);
691
while (nested_len > 0) {
692
if (nested->nla_type == TASKSTATS_TYPE_STATS) {
693
memcpy(&stats, NLA_DATA(nested), sizeof(stats));
694
if (task_count < MAX_TASKS) {
695
tasks[task_count].pid = pid;
696
tasks[task_count].tgid = pid;
697
strncpy(tasks[task_count].command, comm,
698
TASK_COMM_LEN - 1);
699
tasks[task_count].command[TASK_COMM_LEN - 1] = '\0';
700
SET_TASK_STAT(task_count, cpu_count);
701
SET_TASK_STAT(task_count, cpu_delay_total);
702
SET_TASK_STAT(task_count, blkio_count);
703
SET_TASK_STAT(task_count, blkio_delay_total);
704
SET_TASK_STAT(task_count, swapin_count);
705
SET_TASK_STAT(task_count, swapin_delay_total);
706
SET_TASK_STAT(task_count, freepages_count);
707
SET_TASK_STAT(task_count, freepages_delay_total);
708
SET_TASK_STAT(task_count, thrashing_count);
709
SET_TASK_STAT(task_count, thrashing_delay_total);
710
SET_TASK_STAT(task_count, compact_count);
711
SET_TASK_STAT(task_count, compact_delay_total);
712
SET_TASK_STAT(task_count, wpcopy_count);
713
SET_TASK_STAT(task_count, wpcopy_delay_total);
714
SET_TASK_STAT(task_count, irq_count);
715
SET_TASK_STAT(task_count, irq_delay_total);
716
set_mem_count(&tasks[task_count]);
717
set_mem_delay_total(&tasks[task_count]);
718
task_count++;
719
}
720
break;
721
}
722
nested_len -= NLA_ALIGN(nested->nla_len);
723
nested = NLA_NEXT(nested);
724
}
725
}
726
nl_len -= NLA_ALIGN(na->nla_len);
727
na = NLA_NEXT(na);
728
}
729
return;
730
}
731
732
static void get_task_delays(void)
733
{
734
char comm[TASK_COMM_LEN];
735
struct dirent *entry;
736
DIR *dir;
737
int pid;
738
739
task_count = 0;
740
if (cfg.monitor_pid > 0) {
741
if (read_comm(cfg.monitor_pid, comm, sizeof(comm)) == 0)
742
fetch_and_fill_task_info(cfg.monitor_pid, comm);
743
return;
744
}
745
746
dir = opendir("/proc");
747
if (!dir) {
748
fprintf(stderr, "Error opening /proc directory\n");
749
return;
750
}
751
752
while ((entry = readdir(dir)) != NULL && task_count < MAX_TASKS) {
753
if (!isdigit(entry->d_name[0]))
754
continue;
755
pid = atoi(entry->d_name);
756
if (pid == 0)
757
continue;
758
if (read_comm(pid, comm, sizeof(comm)) != 0)
759
continue;
760
fetch_and_fill_task_info(pid, comm);
761
}
762
closedir(dir);
763
}
764
765
/* Calculate average delay in milliseconds */
766
static double average_ms(unsigned long long total, unsigned long long count)
767
{
768
if (count == 0)
769
return 0;
770
return (double)total / 1000000.0 / count;
771
}
772
773
/* Comparison function for sorting tasks */
774
static int compare_tasks(const void *a, const void *b)
775
{
776
const struct task_info *t1 = (const struct task_info *)a;
777
const struct task_info *t2 = (const struct task_info *)b;
778
unsigned long long total1;
779
unsigned long long total2;
780
unsigned long count1;
781
unsigned long count2;
782
double avg1, avg2;
783
784
total1 = *(unsigned long long *)((char *)t1 + cfg.sort_field->total_offset);
785
total2 = *(unsigned long long *)((char *)t2 + cfg.sort_field->total_offset);
786
count1 = *(unsigned long *)((char *)t1 + cfg.sort_field->count_offset);
787
count2 = *(unsigned long *)((char *)t2 + cfg.sort_field->count_offset);
788
789
avg1 = average_ms(total1, count1);
790
avg2 = average_ms(total2, count2);
791
if (avg1 != avg2)
792
return avg2 > avg1 ? 1 : -1;
793
794
return 0;
795
}
796
797
/* Sort tasks by selected field */
798
static void sort_tasks(void)
799
{
800
if (task_count > 0)
801
qsort(tasks, task_count, sizeof(struct task_info), compare_tasks);
802
}
803
804
/* Get container statistics via cgroupstats */
805
static void get_container_stats(void)
806
{
807
int rc, cfd;
808
struct {
809
struct nlmsghdr n;
810
struct genlmsghdr g;
811
char buf[MAX_MSG_SIZE];
812
} req, resp;
813
struct nlattr *na;
814
int nl_len;
815
struct cgroupstats stats;
816
817
/* Check if container path is set */
818
if (!cfg.container_path)
819
return;
820
821
/* Open container cgroup */
822
cfd = open(cfg.container_path, O_RDONLY);
823
if (cfd < 0) {
824
fprintf(stderr, "Error opening container path: %s\n", cfg.container_path);
825
return;
826
}
827
828
/* Send request for container stats */
829
if (send_cmd(nl_sd, family_id, getpid(), CGROUPSTATS_CMD_GET,
830
CGROUPSTATS_CMD_ATTR_FD, &cfd, sizeof(__u32)) < 0) {
831
fprintf(stderr, "Failed to send request for container stats\n");
832
close(cfd);
833
return;
834
}
835
836
/* Receive response */
837
rc = recv(nl_sd, &resp, sizeof(resp), 0);
838
if (rc < 0 || resp.n.nlmsg_type == NLMSG_ERROR) {
839
fprintf(stderr, "Failed to receive response for container stats\n");
840
close(cfd);
841
return;
842
}
843
844
/* Parse response */
845
nl_len = GENLMSG_PAYLOAD(&resp.n);
846
na = (struct nlattr *) GENLMSG_DATA(&resp);
847
while (nl_len > 0) {
848
if (na->nla_type == CGROUPSTATS_TYPE_CGROUP_STATS) {
849
/* Get the cgroupstats structure */
850
memcpy(&stats, NLA_DATA(na), sizeof(stats));
851
852
/* Fill container stats */
853
container_stats.nr_sleeping = stats.nr_sleeping;
854
container_stats.nr_running = stats.nr_running;
855
container_stats.nr_stopped = stats.nr_stopped;
856
container_stats.nr_uninterruptible = stats.nr_uninterruptible;
857
container_stats.nr_io_wait = stats.nr_io_wait;
858
break;
859
}
860
nl_len -= NLA_ALIGN(na->nla_len);
861
na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len));
862
}
863
864
close(cfd);
865
}
866
867
/* Display results to stdout or log file */
868
static void display_results(int psi_ret)
869
{
870
time_t now = time(NULL);
871
struct tm *tm_now = localtime(&now);
872
FILE *out = stdout;
873
char timestamp[32];
874
bool suc = true;
875
int i, count;
876
877
/* Clear terminal screen */
878
suc &= BOOL_FPRINT(out, "\033[H\033[J");
879
880
/* PSI output (one-line, no cat style) */
881
suc &= BOOL_FPRINT(out, "System Pressure Information: (avg10/avg60vg300/total)\n");
882
if (psi_ret) {
883
suc &= BOOL_FPRINT(out, " PSI not found: check if psi=1 enabled in cmdline\n");
884
} else {
885
suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
886
"CPU some:",
887
psi.cpu_some_avg10,
888
psi.cpu_some_avg60,
889
psi.cpu_some_avg300,
890
psi.cpu_some_total / 1000);
891
suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
892
"CPU full:",
893
psi.cpu_full_avg10,
894
psi.cpu_full_avg60,
895
psi.cpu_full_avg300,
896
psi.cpu_full_total / 1000);
897
suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
898
"Memory full:",
899
psi.memory_full_avg10,
900
psi.memory_full_avg60,
901
psi.memory_full_avg300,
902
psi.memory_full_total / 1000);
903
suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
904
"Memory some:",
905
psi.memory_some_avg10,
906
psi.memory_some_avg60,
907
psi.memory_some_avg300,
908
psi.memory_some_total / 1000);
909
suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
910
"IO full:",
911
psi.io_full_avg10,
912
psi.io_full_avg60,
913
psi.io_full_avg300,
914
psi.io_full_total / 1000);
915
suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
916
"IO some:",
917
psi.io_some_avg10,
918
psi.io_some_avg60,
919
psi.io_some_avg300,
920
psi.io_some_total / 1000);
921
suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
922
"IRQ full:",
923
psi.irq_full_avg10,
924
psi.irq_full_avg60,
925
psi.irq_full_avg300,
926
psi.irq_full_total / 1000);
927
}
928
929
if (cfg.container_path) {
930
suc &= BOOL_FPRINT(out, "Container Information (%s):\n", cfg.container_path);
931
suc &= BOOL_FPRINT(out, "Processes: running=%d, sleeping=%d, ",
932
container_stats.nr_running, container_stats.nr_sleeping);
933
suc &= BOOL_FPRINT(out, "stopped=%d, uninterruptible=%d, io_wait=%d\n\n",
934
container_stats.nr_stopped, container_stats.nr_uninterruptible,
935
container_stats.nr_io_wait);
936
}
937
938
/* Interacive command */
939
suc &= BOOL_FPRINT(out, "[o]sort [M]memverbose [q]quit\n");
940
if (sort_selected) {
941
if (cfg.display_mode == MODE_MEMVERBOSE)
942
suc &= BOOL_FPRINT(out,
943
"sort selection: [m]MEM [r]RCL [t]THR [p]CMP [w]WP\n");
944
else
945
suc &= BOOL_FPRINT(out,
946
"sort selection: [c]CPU [i]IO [m]MEM [q]IRQ\n");
947
}
948
949
/* Task delay output */
950
suc &= BOOL_FPRINT(out, "Top %d processes (sorted by %s delay):\n",
951
cfg.max_processes, get_name_by_field(cfg.sort_field));
952
953
suc &= BOOL_FPRINT(out, "%8s %8s %-17s", "PID", "TGID", "COMMAND");
954
if (cfg.display_mode == MODE_MEMVERBOSE) {
955
suc &= BOOL_FPRINT(out, "%8s %8s %8s %8s %8s %8s\n",
956
"MEM(ms)", "SWAP(ms)", "RCL(ms)",
957
"THR(ms)", "CMP(ms)", "WP(ms)");
958
suc &= BOOL_FPRINT(out, "-----------------------");
959
suc &= BOOL_FPRINT(out, "-----------------------");
960
suc &= BOOL_FPRINT(out, "-----------------------");
961
suc &= BOOL_FPRINT(out, "---------------------\n");
962
} else {
963
suc &= BOOL_FPRINT(out, "%8s %8s %8s %8s\n",
964
"CPU(ms)", "IO(ms)", "IRQ(ms)", "MEM(ms)");
965
suc &= BOOL_FPRINT(out, "-----------------------");
966
suc &= BOOL_FPRINT(out, "-----------------------");
967
suc &= BOOL_FPRINT(out, "--------------------------\n");
968
}
969
970
count = task_count < cfg.max_processes ? task_count : cfg.max_processes;
971
972
for (i = 0; i < count; i++) {
973
suc &= BOOL_FPRINT(out, "%8d %8d %-15s",
974
tasks[i].pid, tasks[i].tgid, tasks[i].command);
975
if (cfg.display_mode == MODE_MEMVERBOSE) {
976
suc &= BOOL_FPRINT(out, DELAY_FMT_MEMVERBOSE,
977
TASK_AVG(tasks[i], mem),
978
TASK_AVG(tasks[i], swapin),
979
TASK_AVG(tasks[i], freepages),
980
TASK_AVG(tasks[i], thrashing),
981
TASK_AVG(tasks[i], compact),
982
TASK_AVG(tasks[i], wpcopy));
983
} else {
984
suc &= BOOL_FPRINT(out, DELAY_FMT_DEFAULT,
985
TASK_AVG(tasks[i], cpu),
986
TASK_AVG(tasks[i], blkio),
987
TASK_AVG(tasks[i], irq),
988
TASK_AVG(tasks[i], mem));
989
}
990
}
991
992
suc &= BOOL_FPRINT(out, "\n");
993
994
if (!suc)
995
perror("Error writing to output");
996
}
997
998
/* Check for keyboard input with timeout based on cfg.delay */
999
static char check_for_keypress(void)
1000
{
1001
struct timeval tv = {cfg.delay, 0};
1002
fd_set readfds;
1003
char ch = 0;
1004
1005
FD_ZERO(&readfds);
1006
FD_SET(STDIN_FILENO, &readfds);
1007
int r = select(STDIN_FILENO + 1, &readfds, NULL, NULL, &tv);
1008
1009
if (r > 0 && FD_ISSET(STDIN_FILENO, &readfds)) {
1010
read(STDIN_FILENO, &ch, 1);
1011
return ch;
1012
}
1013
1014
return 0;
1015
}
1016
1017
#define MAX_MODE_SIZE 2
1018
static void toggle_display_mode(void)
1019
{
1020
static const size_t modes[MAX_MODE_SIZE] = {MODE_DEFAULT, MODE_MEMVERBOSE};
1021
static size_t cur_index;
1022
1023
cur_index = (cur_index + 1) % MAX_MODE_SIZE;
1024
cfg.display_mode = modes[cur_index];
1025
}
1026
1027
/* Handle keyboard input: sorting selection, mode toggle, or quit */
1028
static void handle_keypress(char ch, int *running)
1029
{
1030
const struct field_desc *field;
1031
1032
/* Change sort field */
1033
if (sort_selected) {
1034
field = get_field_by_cmd_char(ch);
1035
if (field && (field->supported_modes & cfg.display_mode))
1036
cfg.sort_field = field;
1037
1038
sort_selected = 0;
1039
/* Handle mode changes or quit */
1040
} else {
1041
switch (ch) {
1042
case 'o':
1043
sort_selected = 1;
1044
break;
1045
case 'M':
1046
toggle_display_mode();
1047
for (field = sort_fields; field->name != NULL; field++) {
1048
if (field->supported_modes & cfg.display_mode) {
1049
cfg.sort_field = field;
1050
break;
1051
}
1052
}
1053
break;
1054
case 'q':
1055
case 'Q':
1056
*running = 0;
1057
break;
1058
default:
1059
break;
1060
}
1061
}
1062
}
1063
1064
/* Main function */
1065
int main(int argc, char **argv)
1066
{
1067
const struct field_desc *field;
1068
int iterations = 0;
1069
int psi_ret = 0;
1070
char keypress;
1071
1072
/* Parse command line arguments */
1073
parse_args(argc, argv);
1074
1075
/* Setup netlink socket */
1076
nl_sd = create_nl_socket();
1077
if (nl_sd < 0) {
1078
fprintf(stderr, "Error creating netlink socket\n");
1079
exit(1);
1080
}
1081
1082
/* Get family ID for taskstats via netlink */
1083
family_id = get_family_id(nl_sd);
1084
if (!family_id) {
1085
fprintf(stderr, "Error getting taskstats family ID\n");
1086
close(nl_sd);
1087
exit(1);
1088
}
1089
1090
/* Set terminal to non-canonical mode for interaction */
1091
enable_raw_mode();
1092
1093
/* Main loop */
1094
while (running) {
1095
/* Auto-switch sort field when not matching display mode */
1096
if (!(cfg.sort_field->supported_modes & cfg.display_mode)) {
1097
for (field = sort_fields; field->name != NULL; field++) {
1098
if (field->supported_modes & cfg.display_mode) {
1099
cfg.sort_field = field;
1100
printf("Auto-switched sort field to: %s\n", field->name);
1101
break;
1102
}
1103
}
1104
}
1105
1106
/* Read PSI statistics */
1107
psi_ret = read_psi_stats();
1108
1109
/* Get container stats if container path provided */
1110
if (cfg.container_path)
1111
get_container_stats();
1112
1113
/* Get task delays */
1114
get_task_delays();
1115
1116
/* Sort tasks */
1117
sort_tasks();
1118
1119
/* Display results to stdout or log file */
1120
display_results(psi_ret);
1121
1122
/* Check for iterations */
1123
if (cfg.iterations > 0 && ++iterations >= cfg.iterations)
1124
break;
1125
1126
/* Exit if output_one_time is set */
1127
if (cfg.output_one_time)
1128
break;
1129
1130
/* Keypress for interactive usage */
1131
keypress = check_for_keypress();
1132
if (keypress)
1133
handle_keypress(keypress, &running);
1134
}
1135
1136
/* Restore terminal mode */
1137
disable_raw_mode();
1138
1139
/* Cleanup */
1140
close(nl_sd);
1141
if (cfg.container_path)
1142
free(cfg.container_path);
1143
1144
return 0;
1145
}
1146
1147