Path: blob/master/tools/testing/selftests/bpf/cgroup_helpers.c
29270 views
// SPDX-License-Identifier: GPL-2.01#define _GNU_SOURCE2#include <sched.h>3#include <sys/mount.h>4#include <sys/stat.h>5#include <sys/types.h>6#include <sys/xattr.h>7#include <linux/limits.h>8#include <stdio.h>9#include <stdlib.h>10#include <linux/sched.h>11#include <fcntl.h>12#include <unistd.h>13#include <ftw.h>1415#include "cgroup_helpers.h"16#include "bpf_util.h"1718/*19* To avoid relying on the system setup, when setup_cgroup_env is called20* we create a new mount namespace, and cgroup namespace. The cgroupv221* root is mounted at CGROUP_MOUNT_PATH. Unfortunately, most people don't22* have cgroupv2 enabled at this point in time. It's easier to create our23* own mount namespace and manage it ourselves. We assume /mnt exists.24*25* Related cgroupv1 helpers are named *classid*(), since we only use the26* net_cls controller for tagging net_cls.classid. We assume the default27* mount under /sys/fs/cgroup/net_cls, which should be the case for the28* vast majority of users.29*/3031#define WALK_FD_LIMIT 163233#define CGROUP_MOUNT_PATH "/mnt"34#define CGROUP_MOUNT_DFLT "/sys/fs/cgroup"35#define NETCLS_MOUNT_PATH CGROUP_MOUNT_DFLT "/net_cls"36#define CGROUP_WORK_DIR "/cgroup-test-work-dir"3738#define format_cgroup_path_pid(buf, path, pid) \39snprintf(buf, sizeof(buf), "%s%s%d%s", CGROUP_MOUNT_PATH, \40CGROUP_WORK_DIR, pid, path)4142#define format_cgroup_path(buf, path) \43format_cgroup_path_pid(buf, path, getpid())4445#define format_parent_cgroup_path(buf, path) \46format_cgroup_path_pid(buf, path, getppid())4748#define format_classid_path_pid(buf, pid) \49snprintf(buf, sizeof(buf), "%s%s%d", NETCLS_MOUNT_PATH, \50CGROUP_WORK_DIR, pid)5152#define format_classid_path(buf) \53format_classid_path_pid(buf, getpid())5455static __thread bool cgroup_workdir_mounted;5657static void __cleanup_cgroup_environment(void);5859static int __enable_controllers(const char *cgroup_path, const char *controllers)60{61char path[PATH_MAX + 1];62char enable[PATH_MAX + 1];63char *c, *c2;64int fd, cfd;65ssize_t len;6667/* If not controllers are passed, enable all available controllers */68if (!controllers) {69snprintf(path, sizeof(path), "%s/cgroup.controllers",70cgroup_path);71fd = open(path, O_RDONLY);72if (fd < 0) {73log_err("Opening cgroup.controllers: %s", path);74return 1;75}76len = read(fd, enable, sizeof(enable) - 1);77if (len < 0) {78close(fd);79log_err("Reading cgroup.controllers: %s", path);80return 1;81} else if (len == 0) { /* No controllers to enable */82close(fd);83return 0;84}85enable[len] = 0;86close(fd);87} else {88bpf_strlcpy(enable, controllers, sizeof(enable));89}9091snprintf(path, sizeof(path), "%s/cgroup.subtree_control", cgroup_path);92cfd = open(path, O_RDWR);93if (cfd < 0) {94log_err("Opening cgroup.subtree_control: %s", path);95return 1;96}9798for (c = strtok_r(enable, " ", &c2); c; c = strtok_r(NULL, " ", &c2)) {99if (dprintf(cfd, "+%s\n", c) <= 0) {100log_err("Enabling controller %s: %s", c, path);101close(cfd);102return 1;103}104}105close(cfd);106return 0;107}108109/**110* enable_controllers() - Enable cgroup v2 controllers111* @relative_path: The cgroup path, relative to the workdir112* @controllers: List of controllers to enable in cgroup.controllers format113*114*115* Enable given cgroup v2 controllers, if @controllers is NULL, enable all116* available controllers.117*118* If successful, 0 is returned.119*/120int enable_controllers(const char *relative_path, const char *controllers)121{122char cgroup_path[PATH_MAX + 1];123124format_cgroup_path(cgroup_path, relative_path);125return __enable_controllers(cgroup_path, controllers);126}127128static int __write_cgroup_file(const char *cgroup_path, const char *file,129const char *buf)130{131char file_path[PATH_MAX + 1];132int fd;133134snprintf(file_path, sizeof(file_path), "%s/%s", cgroup_path, file);135fd = open(file_path, O_RDWR);136if (fd < 0) {137log_err("Opening %s", file_path);138return 1;139}140141if (dprintf(fd, "%s", buf) <= 0) {142log_err("Writing to %s", file_path);143close(fd);144return 1;145}146close(fd);147return 0;148}149150/**151* write_cgroup_file() - Write to a cgroup file152* @relative_path: The cgroup path, relative to the workdir153* @file: The name of the file in cgroupfs to write to154* @buf: Buffer to write to the file155*156* Write to a file in the given cgroup's directory.157*158* If successful, 0 is returned.159*/160int write_cgroup_file(const char *relative_path, const char *file,161const char *buf)162{163char cgroup_path[PATH_MAX - 24];164165format_cgroup_path(cgroup_path, relative_path);166return __write_cgroup_file(cgroup_path, file, buf);167}168169/**170* write_cgroup_file_parent() - Write to a cgroup file in the parent process171* workdir172* @relative_path: The cgroup path, relative to the parent process workdir173* @file: The name of the file in cgroupfs to write to174* @buf: Buffer to write to the file175*176* Write to a file in the given cgroup's directory under the parent process177* workdir.178*179* If successful, 0 is returned.180*/181int write_cgroup_file_parent(const char *relative_path, const char *file,182const char *buf)183{184char cgroup_path[PATH_MAX - 24];185186format_parent_cgroup_path(cgroup_path, relative_path);187return __write_cgroup_file(cgroup_path, file, buf);188}189190/**191* setup_cgroup_environment() - Setup the cgroup environment192*193* After calling this function, cleanup_cgroup_environment should be called194* once testing is complete.195*196* This function will print an error to stderr and return 1 if it is unable197* to setup the cgroup environment. If setup is successful, 0 is returned.198*/199int setup_cgroup_environment(void)200{201char cgroup_workdir[PATH_MAX - 24];202203format_cgroup_path(cgroup_workdir, "");204205if (mkdir(CGROUP_MOUNT_PATH, 0777) && errno != EEXIST) {206log_err("mkdir mount");207return 1;208}209210if (unshare(CLONE_NEWNS)) {211log_err("unshare");212return 1;213}214215if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL)) {216log_err("mount fakeroot");217return 1;218}219220if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL) && errno != EBUSY) {221log_err("mount cgroup2");222return 1;223}224cgroup_workdir_mounted = true;225226/* Cleanup existing failed runs, now that the environment is setup */227__cleanup_cgroup_environment();228229if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) {230log_err("mkdir cgroup work dir");231return 1;232}233234/* Enable all available controllers to increase test coverage */235if (__enable_controllers(CGROUP_MOUNT_PATH, NULL) ||236__enable_controllers(cgroup_workdir, NULL))237return 1;238239return 0;240}241242static int nftwfunc(const char *filename, const struct stat *statptr,243int fileflags, struct FTW *pfwt)244{245if ((fileflags & FTW_D) && rmdir(filename))246log_err("Removing cgroup: %s", filename);247return 0;248}249250static int join_cgroup_from_top(const char *cgroup_path)251{252char cgroup_procs_path[PATH_MAX + 1];253pid_t pid = getpid();254int fd, rc = 0;255256snprintf(cgroup_procs_path, sizeof(cgroup_procs_path),257"%s/cgroup.procs", cgroup_path);258259fd = open(cgroup_procs_path, O_WRONLY);260if (fd < 0) {261log_err("Opening Cgroup Procs: %s", cgroup_procs_path);262return 1;263}264265if (dprintf(fd, "%d\n", pid) < 0) {266log_err("Joining Cgroup");267rc = 1;268}269270close(fd);271return rc;272}273274/**275* join_cgroup() - Join a cgroup276* @relative_path: The cgroup path, relative to the workdir, to join277*278* This function expects a cgroup to already be created, relative to the cgroup279* work dir, and it joins it. For example, passing "/my-cgroup" as the path280* would actually put the calling process into the cgroup281* "/cgroup-test-work-dir/my-cgroup"282*283* On success, it returns 0, otherwise on failure it returns 1.284*/285int join_cgroup(const char *relative_path)286{287char cgroup_path[PATH_MAX + 1];288289format_cgroup_path(cgroup_path, relative_path);290return join_cgroup_from_top(cgroup_path);291}292293/**294* join_root_cgroup() - Join the root cgroup295*296* This function joins the root cgroup.297*298* On success, it returns 0, otherwise on failure it returns 1.299*/300int join_root_cgroup(void)301{302return join_cgroup_from_top(CGROUP_MOUNT_PATH);303}304305/**306* join_parent_cgroup() - Join a cgroup in the parent process workdir307* @relative_path: The cgroup path, relative to parent process workdir, to join308*309* See join_cgroup().310*311* On success, it returns 0, otherwise on failure it returns 1.312*/313int join_parent_cgroup(const char *relative_path)314{315char cgroup_path[PATH_MAX + 1];316317format_parent_cgroup_path(cgroup_path, relative_path);318return join_cgroup_from_top(cgroup_path);319}320321/**322* set_cgroup_xattr() - Set xattr on a cgroup dir323* @relative_path: The cgroup path, relative to the workdir, to set xattr324* @name: xattr name325* @value: xattr value326*327* This function set xattr on cgroup dir.328*329* On success, it returns 0, otherwise on failure it returns -1.330*/331int set_cgroup_xattr(const char *relative_path,332const char *name,333const char *value)334{335char cgroup_path[PATH_MAX + 1];336337format_cgroup_path(cgroup_path, relative_path);338return setxattr(cgroup_path, name, value, strlen(value) + 1, 0);339}340341/**342* __cleanup_cgroup_environment() - Delete temporary cgroups343*344* This is a helper for cleanup_cgroup_environment() that is responsible for345* deletion of all temporary cgroups that have been created during the test.346*/347static void __cleanup_cgroup_environment(void)348{349char cgroup_workdir[PATH_MAX + 1];350351format_cgroup_path(cgroup_workdir, "");352join_cgroup_from_top(CGROUP_MOUNT_PATH);353nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);354}355356/**357* cleanup_cgroup_environment() - Cleanup Cgroup Testing Environment358*359* This is an idempotent function to delete all temporary cgroups that360* have been created during the test and unmount the cgroup testing work361* directory.362*363* At call time, it moves the calling process to the root cgroup, and then364* runs the deletion process. It is idempotent, and should not fail, unless365* a process is lingering.366*367* On failure, it will print an error to stderr, and try to continue.368*/369void cleanup_cgroup_environment(void)370{371__cleanup_cgroup_environment();372if (cgroup_workdir_mounted && umount(CGROUP_MOUNT_PATH))373log_err("umount cgroup2");374cgroup_workdir_mounted = false;375}376377/**378* get_root_cgroup() - Get the FD of the root cgroup379*380* On success, it returns the file descriptor. On failure, it returns -1.381* If there is a failure, it prints the error to stderr.382*/383int get_root_cgroup(void)384{385int fd;386387fd = open(CGROUP_MOUNT_PATH, O_RDONLY);388if (fd < 0) {389log_err("Opening root cgroup");390return -1;391}392return fd;393}394395/*396* remove_cgroup() - Remove a cgroup397* @relative_path: The cgroup path, relative to the workdir, to remove398*399* This function expects a cgroup to already be created, relative to the cgroup400* work dir. It also expects the cgroup doesn't have any children or live401* processes and it removes the cgroup.402*403* On failure, it will print an error to stderr.404*/405void remove_cgroup(const char *relative_path)406{407char cgroup_path[PATH_MAX + 1];408409format_cgroup_path(cgroup_path, relative_path);410if (rmdir(cgroup_path))411log_err("rmdiring cgroup %s .. %s", relative_path, cgroup_path);412}413414/*415* remove_cgroup_pid() - Remove a cgroup setup by process identified by PID416* @relative_path: The cgroup path, relative to the workdir, to remove417* @pid: PID to be used to find cgroup_path418*419* This function expects a cgroup to already be created, relative to the cgroup420* work dir. It also expects the cgroup doesn't have any children or live421* processes and it removes the cgroup.422*423* On failure, it will print an error to stderr.424*/425void remove_cgroup_pid(const char *relative_path, int pid)426{427char cgroup_path[PATH_MAX + 1];428429format_cgroup_path_pid(cgroup_path, relative_path, pid);430if (rmdir(cgroup_path))431log_err("rmdiring cgroup %s .. %s", relative_path, cgroup_path);432}433434/**435* create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD436* @relative_path: The cgroup path, relative to the workdir, to join437*438* This function creates a cgroup under the top level workdir and returns the439* file descriptor. It is idempotent.440*441* On success, it returns the file descriptor. On failure it returns -1.442* If there is a failure, it prints the error to stderr.443*/444int create_and_get_cgroup(const char *relative_path)445{446char cgroup_path[PATH_MAX + 1];447int fd;448449format_cgroup_path(cgroup_path, relative_path);450if (mkdir(cgroup_path, 0777) && errno != EEXIST) {451log_err("mkdiring cgroup %s .. %s", relative_path, cgroup_path);452return -1;453}454455fd = open(cgroup_path, O_RDONLY);456if (fd < 0) {457log_err("Opening Cgroup");458return -1;459}460461return fd;462}463464/**465* get_cgroup_id_from_path - Get cgroup id for a particular cgroup path466* @cgroup_workdir: The absolute cgroup path467*468* On success, it returns the cgroup id. On failure it returns 0,469* which is an invalid cgroup id.470* If there is a failure, it prints the error to stderr.471*/472static unsigned long long get_cgroup_id_from_path(const char *cgroup_workdir)473{474int dirfd, err, flags, mount_id, fhsize;475union {476unsigned long long cgid;477unsigned char raw_bytes[8];478} id;479struct file_handle *fhp, *fhp2;480unsigned long long ret = 0;481482dirfd = AT_FDCWD;483flags = 0;484fhsize = sizeof(*fhp);485fhp = calloc(1, fhsize);486if (!fhp) {487log_err("calloc");488return 0;489}490err = name_to_handle_at(dirfd, cgroup_workdir, fhp, &mount_id, flags);491if (err >= 0 || fhp->handle_bytes != 8) {492log_err("name_to_handle_at");493goto free_mem;494}495496fhsize = sizeof(struct file_handle) + fhp->handle_bytes;497fhp2 = realloc(fhp, fhsize);498if (!fhp2) {499log_err("realloc");500goto free_mem;501}502err = name_to_handle_at(dirfd, cgroup_workdir, fhp2, &mount_id, flags);503fhp = fhp2;504if (err < 0) {505log_err("name_to_handle_at");506goto free_mem;507}508509memcpy(id.raw_bytes, fhp->f_handle, 8);510ret = id.cgid;511512free_mem:513free(fhp);514return ret;515}516517unsigned long long get_cgroup_id(const char *relative_path)518{519char cgroup_workdir[PATH_MAX + 1];520521format_cgroup_path(cgroup_workdir, relative_path);522return get_cgroup_id_from_path(cgroup_workdir);523}524525int cgroup_setup_and_join(const char *path) {526int cg_fd;527528if (setup_cgroup_environment()) {529fprintf(stderr, "Failed to setup cgroup environment\n");530return -EINVAL;531}532533cg_fd = create_and_get_cgroup(path);534if (cg_fd < 0) {535fprintf(stderr, "Failed to create test cgroup\n");536cleanup_cgroup_environment();537return cg_fd;538}539540if (join_cgroup(path)) {541fprintf(stderr, "Failed to join cgroup\n");542cleanup_cgroup_environment();543return -EINVAL;544}545return cg_fd;546}547548/**549* setup_classid_environment() - Setup the cgroupv1 net_cls environment550*551* This function should only be called in a custom mount namespace, e.g.552* created by running setup_cgroup_environment.553*554* After calling this function, cleanup_classid_environment should be called555* once testing is complete.556*557* This function will print an error to stderr and return 1 if it is unable558* to setup the cgroup environment. If setup is successful, 0 is returned.559*/560int setup_classid_environment(void)561{562char cgroup_workdir[PATH_MAX + 1];563564format_classid_path(cgroup_workdir);565566if (mount("tmpfs", CGROUP_MOUNT_DFLT, "tmpfs", 0, NULL) &&567errno != EBUSY) {568log_err("mount cgroup base");569return 1;570}571572if (mkdir(NETCLS_MOUNT_PATH, 0777) && errno != EEXIST) {573log_err("mkdir cgroup net_cls");574return 1;575}576577if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls")) {578if (errno != EBUSY) {579log_err("mount cgroup net_cls");580return 1;581}582583if (rmdir(NETCLS_MOUNT_PATH)) {584log_err("rmdir cgroup net_cls");585return 1;586}587if (umount(CGROUP_MOUNT_DFLT)) {588log_err("umount cgroup base");589return 1;590}591}592593cleanup_classid_environment();594595if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) {596log_err("mkdir cgroup work dir");597return 1;598}599600return 0;601}602603/**604* set_classid() - Set a cgroupv1 net_cls classid605*606* Writes the classid into the cgroup work dir's net_cls.classid607* file in order to later on trigger socket tagging.608*609* We leverage the current pid as the classid, ensuring unique identification.610*611* On success, it returns 0, otherwise on failure it returns 1. If there612* is a failure, it prints the error to stderr.613*/614int set_classid(void)615{616char cgroup_workdir[PATH_MAX - 42];617char cgroup_classid_path[PATH_MAX + 1];618int fd, rc = 0;619620format_classid_path(cgroup_workdir);621snprintf(cgroup_classid_path, sizeof(cgroup_classid_path),622"%s/net_cls.classid", cgroup_workdir);623624fd = open(cgroup_classid_path, O_WRONLY);625if (fd < 0) {626log_err("Opening cgroup classid: %s", cgroup_classid_path);627return 1;628}629630if (dprintf(fd, "%u\n", getpid()) < 0) {631log_err("Setting cgroup classid");632rc = 1;633}634635close(fd);636return rc;637}638639/**640* join_classid() - Join a cgroupv1 net_cls classid641*642* This function expects the cgroup work dir to be already created, as we643* join it here. This causes the process sockets to be tagged with the given644* net_cls classid.645*646* On success, it returns 0, otherwise on failure it returns 1.647*/648int join_classid(void)649{650char cgroup_workdir[PATH_MAX + 1];651652format_classid_path(cgroup_workdir);653return join_cgroup_from_top(cgroup_workdir);654}655656/**657* cleanup_classid_environment() - Cleanup the cgroupv1 net_cls environment658*659* At call time, it moves the calling process to the root cgroup, and then660* runs the deletion process.661*662* On failure, it will print an error to stderr, and try to continue.663*/664void cleanup_classid_environment(void)665{666char cgroup_workdir[PATH_MAX + 1];667668format_classid_path(cgroup_workdir);669join_cgroup_from_top(NETCLS_MOUNT_PATH);670nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);671}672673/**674* get_classid_cgroup_id - Get the cgroup id of a net_cls cgroup675*/676unsigned long long get_classid_cgroup_id(void)677{678char cgroup_workdir[PATH_MAX + 1];679680format_classid_path(cgroup_workdir);681return get_cgroup_id_from_path(cgroup_workdir);682}683684/**685* get_cgroup1_hierarchy_id - Retrieves the ID of a cgroup1 hierarchy from the cgroup1 subsys name.686* @subsys_name: The cgroup1 subsys name, which can be retrieved from /proc/self/cgroup. It can be687* a named cgroup like "name=systemd", a controller name like "net_cls", or multi-controllers like688* "net_cls,net_prio".689*/690int get_cgroup1_hierarchy_id(const char *subsys_name)691{692char *c, *c2, *c3, *c4;693bool found = false;694char line[1024];695FILE *file;696int i, id;697698if (!subsys_name)699return -1;700701file = fopen("/proc/self/cgroup", "r");702if (!file) {703log_err("fopen /proc/self/cgroup");704return -1;705}706707while (fgets(line, 1024, file)) {708i = 0;709for (c = strtok_r(line, ":", &c2); c && i < 2; c = strtok_r(NULL, ":", &c2)) {710if (i == 0) {711id = strtol(c, NULL, 10);712} else if (i == 1) {713if (!strcmp(c, subsys_name)) {714found = true;715break;716}717718/* Multiple subsystems may share one single mount point */719for (c3 = strtok_r(c, ",", &c4); c3;720c3 = strtok_r(NULL, ",", &c4)) {721if (!strcmp(c, subsys_name)) {722found = true;723break;724}725}726}727i++;728}729if (found)730break;731}732fclose(file);733return found ? id : -1;734}735736/**737* open_classid() - Open a cgroupv1 net_cls classid738*739* This function expects the cgroup work dir to be already created, as we740* open it here.741*742* On success, it returns the file descriptor. On failure it returns -1.743*/744int open_classid(void)745{746char cgroup_workdir[PATH_MAX + 1];747748format_classid_path(cgroup_workdir);749return open(cgroup_workdir, O_RDONLY);750}751752753