Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/security/landlock/syscalls.c
29265 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* Landlock - System call implementations and user space interfaces
4
*
5
* Copyright © 2016-2020 Mickaël Salaün <[email protected]>
6
* Copyright © 2018-2020 ANSSI
7
* Copyright © 2021-2025 Microsoft Corporation
8
*/
9
10
#include <asm/current.h>
11
#include <linux/anon_inodes.h>
12
#include <linux/bitops.h>
13
#include <linux/build_bug.h>
14
#include <linux/capability.h>
15
#include <linux/cleanup.h>
16
#include <linux/compiler_types.h>
17
#include <linux/dcache.h>
18
#include <linux/err.h>
19
#include <linux/errno.h>
20
#include <linux/fs.h>
21
#include <linux/limits.h>
22
#include <linux/mount.h>
23
#include <linux/path.h>
24
#include <linux/sched.h>
25
#include <linux/security.h>
26
#include <linux/stddef.h>
27
#include <linux/syscalls.h>
28
#include <linux/types.h>
29
#include <linux/uaccess.h>
30
#include <uapi/linux/landlock.h>
31
32
#include "cred.h"
33
#include "domain.h"
34
#include "fs.h"
35
#include "limits.h"
36
#include "net.h"
37
#include "ruleset.h"
38
#include "setup.h"
39
40
static bool is_initialized(void)
41
{
42
if (likely(landlock_initialized))
43
return true;
44
45
pr_warn_once(
46
"Disabled but requested by user space. "
47
"You should enable Landlock at boot time: "
48
"https://docs.kernel.org/userspace-api/landlock.html#boot-time-configuration\n");
49
return false;
50
}
51
52
/**
53
* copy_min_struct_from_user - Safe future-proof argument copying
54
*
55
* Extend copy_struct_from_user() to check for consistent user buffer.
56
*
57
* @dst: Kernel space pointer or NULL.
58
* @ksize: Actual size of the data pointed to by @dst.
59
* @ksize_min: Minimal required size to be copied.
60
* @src: User space pointer or NULL.
61
* @usize: (Alleged) size of the data pointed to by @src.
62
*/
63
static __always_inline int
64
copy_min_struct_from_user(void *const dst, const size_t ksize,
65
const size_t ksize_min, const void __user *const src,
66
const size_t usize)
67
{
68
/* Checks buffer inconsistencies. */
69
BUILD_BUG_ON(!dst);
70
if (!src)
71
return -EFAULT;
72
73
/* Checks size ranges. */
74
BUILD_BUG_ON(ksize <= 0);
75
BUILD_BUG_ON(ksize < ksize_min);
76
if (usize < ksize_min)
77
return -EINVAL;
78
if (usize > PAGE_SIZE)
79
return -E2BIG;
80
81
/* Copies user buffer and fills with zeros. */
82
return copy_struct_from_user(dst, ksize, src, usize);
83
}
84
85
/*
86
* This function only contains arithmetic operations with constants, leading to
87
* BUILD_BUG_ON(). The related code is evaluated and checked at build time,
88
* but it is then ignored thanks to compiler optimizations.
89
*/
90
static void build_check_abi(void)
91
{
92
struct landlock_ruleset_attr ruleset_attr;
93
struct landlock_path_beneath_attr path_beneath_attr;
94
struct landlock_net_port_attr net_port_attr;
95
size_t ruleset_size, path_beneath_size, net_port_size;
96
97
/*
98
* For each user space ABI structures, first checks that there is no
99
* hole in them, then checks that all architectures have the same
100
* struct size.
101
*/
102
ruleset_size = sizeof(ruleset_attr.handled_access_fs);
103
ruleset_size += sizeof(ruleset_attr.handled_access_net);
104
ruleset_size += sizeof(ruleset_attr.scoped);
105
BUILD_BUG_ON(sizeof(ruleset_attr) != ruleset_size);
106
BUILD_BUG_ON(sizeof(ruleset_attr) != 24);
107
108
path_beneath_size = sizeof(path_beneath_attr.allowed_access);
109
path_beneath_size += sizeof(path_beneath_attr.parent_fd);
110
BUILD_BUG_ON(sizeof(path_beneath_attr) != path_beneath_size);
111
BUILD_BUG_ON(sizeof(path_beneath_attr) != 12);
112
113
net_port_size = sizeof(net_port_attr.allowed_access);
114
net_port_size += sizeof(net_port_attr.port);
115
BUILD_BUG_ON(sizeof(net_port_attr) != net_port_size);
116
BUILD_BUG_ON(sizeof(net_port_attr) != 16);
117
}
118
119
/* Ruleset handling */
120
121
static int fop_ruleset_release(struct inode *const inode,
122
struct file *const filp)
123
{
124
struct landlock_ruleset *ruleset = filp->private_data;
125
126
landlock_put_ruleset(ruleset);
127
return 0;
128
}
129
130
static ssize_t fop_dummy_read(struct file *const filp, char __user *const buf,
131
const size_t size, loff_t *const ppos)
132
{
133
/* Dummy handler to enable FMODE_CAN_READ. */
134
return -EINVAL;
135
}
136
137
static ssize_t fop_dummy_write(struct file *const filp,
138
const char __user *const buf, const size_t size,
139
loff_t *const ppos)
140
{
141
/* Dummy handler to enable FMODE_CAN_WRITE. */
142
return -EINVAL;
143
}
144
145
/*
146
* A ruleset file descriptor enables to build a ruleset by adding (i.e.
147
* writing) rule after rule, without relying on the task's context. This
148
* reentrant design is also used in a read way to enforce the ruleset on the
149
* current task.
150
*/
151
static const struct file_operations ruleset_fops = {
152
.release = fop_ruleset_release,
153
.read = fop_dummy_read,
154
.write = fop_dummy_write,
155
};
156
157
/*
158
* The Landlock ABI version should be incremented for each new Landlock-related
159
* user space visible change (e.g. Landlock syscalls). This version should
160
* only be incremented once per Linux release, and the date in
161
* Documentation/userspace-api/landlock.rst should be updated to reflect the
162
* UAPI change.
163
*/
164
const int landlock_abi_version = 7;
165
166
/**
167
* sys_landlock_create_ruleset - Create a new ruleset
168
*
169
* @attr: Pointer to a &struct landlock_ruleset_attr identifying the scope of
170
* the new ruleset.
171
* @size: Size of the pointed &struct landlock_ruleset_attr (needed for
172
* backward and forward compatibility).
173
* @flags: Supported values:
174
*
175
* - %LANDLOCK_CREATE_RULESET_VERSION
176
* - %LANDLOCK_CREATE_RULESET_ERRATA
177
*
178
* This system call enables to create a new Landlock ruleset, and returns the
179
* related file descriptor on success.
180
*
181
* If %LANDLOCK_CREATE_RULESET_VERSION or %LANDLOCK_CREATE_RULESET_ERRATA is
182
* set, then @attr must be NULL and @size must be 0.
183
*
184
* Possible returned errors are:
185
*
186
* - %EOPNOTSUPP: Landlock is supported by the kernel but disabled at boot time;
187
* - %EINVAL: unknown @flags, or unknown access, or unknown scope, or too small @size;
188
* - %E2BIG: @attr or @size inconsistencies;
189
* - %EFAULT: @attr or @size inconsistencies;
190
* - %ENOMSG: empty &landlock_ruleset_attr.handled_access_fs.
191
*
192
* .. kernel-doc:: include/uapi/linux/landlock.h
193
* :identifiers: landlock_create_ruleset_flags
194
*/
195
SYSCALL_DEFINE3(landlock_create_ruleset,
196
const struct landlock_ruleset_attr __user *const, attr,
197
const size_t, size, const __u32, flags)
198
{
199
struct landlock_ruleset_attr ruleset_attr;
200
struct landlock_ruleset *ruleset;
201
int err, ruleset_fd;
202
203
/* Build-time checks. */
204
build_check_abi();
205
206
if (!is_initialized())
207
return -EOPNOTSUPP;
208
209
if (flags) {
210
if (attr || size)
211
return -EINVAL;
212
213
if (flags == LANDLOCK_CREATE_RULESET_VERSION)
214
return landlock_abi_version;
215
216
if (flags == LANDLOCK_CREATE_RULESET_ERRATA)
217
return landlock_errata;
218
219
return -EINVAL;
220
}
221
222
/* Copies raw user space buffer. */
223
err = copy_min_struct_from_user(&ruleset_attr, sizeof(ruleset_attr),
224
offsetofend(typeof(ruleset_attr),
225
handled_access_fs),
226
attr, size);
227
if (err)
228
return err;
229
230
/* Checks content (and 32-bits cast). */
231
if ((ruleset_attr.handled_access_fs | LANDLOCK_MASK_ACCESS_FS) !=
232
LANDLOCK_MASK_ACCESS_FS)
233
return -EINVAL;
234
235
/* Checks network content (and 32-bits cast). */
236
if ((ruleset_attr.handled_access_net | LANDLOCK_MASK_ACCESS_NET) !=
237
LANDLOCK_MASK_ACCESS_NET)
238
return -EINVAL;
239
240
/* Checks IPC scoping content (and 32-bits cast). */
241
if ((ruleset_attr.scoped | LANDLOCK_MASK_SCOPE) != LANDLOCK_MASK_SCOPE)
242
return -EINVAL;
243
244
/* Checks arguments and transforms to kernel struct. */
245
ruleset = landlock_create_ruleset(ruleset_attr.handled_access_fs,
246
ruleset_attr.handled_access_net,
247
ruleset_attr.scoped);
248
if (IS_ERR(ruleset))
249
return PTR_ERR(ruleset);
250
251
/* Creates anonymous FD referring to the ruleset. */
252
ruleset_fd = anon_inode_getfd("[landlock-ruleset]", &ruleset_fops,
253
ruleset, O_RDWR | O_CLOEXEC);
254
if (ruleset_fd < 0)
255
landlock_put_ruleset(ruleset);
256
return ruleset_fd;
257
}
258
259
/*
260
* Returns an owned ruleset from a FD. It is thus needed to call
261
* landlock_put_ruleset() on the return value.
262
*/
263
static struct landlock_ruleset *get_ruleset_from_fd(const int fd,
264
const fmode_t mode)
265
{
266
CLASS(fd, ruleset_f)(fd);
267
struct landlock_ruleset *ruleset;
268
269
if (fd_empty(ruleset_f))
270
return ERR_PTR(-EBADF);
271
272
/* Checks FD type and access right. */
273
if (fd_file(ruleset_f)->f_op != &ruleset_fops)
274
return ERR_PTR(-EBADFD);
275
if (!(fd_file(ruleset_f)->f_mode & mode))
276
return ERR_PTR(-EPERM);
277
ruleset = fd_file(ruleset_f)->private_data;
278
if (WARN_ON_ONCE(ruleset->num_layers != 1))
279
return ERR_PTR(-EINVAL);
280
landlock_get_ruleset(ruleset);
281
return ruleset;
282
}
283
284
/* Path handling */
285
286
/*
287
* @path: Must call put_path(@path) after the call if it succeeded.
288
*/
289
static int get_path_from_fd(const s32 fd, struct path *const path)
290
{
291
CLASS(fd_raw, f)(fd);
292
293
BUILD_BUG_ON(!__same_type(
294
fd, ((struct landlock_path_beneath_attr *)NULL)->parent_fd));
295
296
if (fd_empty(f))
297
return -EBADF;
298
/*
299
* Forbids ruleset FDs, internal filesystems (e.g. nsfs), including
300
* pseudo filesystems that will never be mountable (e.g. sockfs,
301
* pipefs).
302
*/
303
if ((fd_file(f)->f_op == &ruleset_fops) ||
304
(fd_file(f)->f_path.mnt->mnt_flags & MNT_INTERNAL) ||
305
(fd_file(f)->f_path.dentry->d_sb->s_flags & SB_NOUSER) ||
306
IS_PRIVATE(d_backing_inode(fd_file(f)->f_path.dentry)))
307
return -EBADFD;
308
309
*path = fd_file(f)->f_path;
310
path_get(path);
311
return 0;
312
}
313
314
static int add_rule_path_beneath(struct landlock_ruleset *const ruleset,
315
const void __user *const rule_attr)
316
{
317
struct landlock_path_beneath_attr path_beneath_attr;
318
struct path path;
319
int res, err;
320
access_mask_t mask;
321
322
/* Copies raw user space buffer. */
323
res = copy_from_user(&path_beneath_attr, rule_attr,
324
sizeof(path_beneath_attr));
325
if (res)
326
return -EFAULT;
327
328
/*
329
* Informs about useless rule: empty allowed_access (i.e. deny rules)
330
* are ignored in path walks.
331
*/
332
if (!path_beneath_attr.allowed_access)
333
return -ENOMSG;
334
335
/* Checks that allowed_access matches the @ruleset constraints. */
336
mask = ruleset->access_masks[0].fs;
337
if ((path_beneath_attr.allowed_access | mask) != mask)
338
return -EINVAL;
339
340
/* Gets and checks the new rule. */
341
err = get_path_from_fd(path_beneath_attr.parent_fd, &path);
342
if (err)
343
return err;
344
345
/* Imports the new rule. */
346
err = landlock_append_fs_rule(ruleset, &path,
347
path_beneath_attr.allowed_access);
348
path_put(&path);
349
return err;
350
}
351
352
static int add_rule_net_port(struct landlock_ruleset *ruleset,
353
const void __user *const rule_attr)
354
{
355
struct landlock_net_port_attr net_port_attr;
356
int res;
357
access_mask_t mask;
358
359
/* Copies raw user space buffer. */
360
res = copy_from_user(&net_port_attr, rule_attr, sizeof(net_port_attr));
361
if (res)
362
return -EFAULT;
363
364
/*
365
* Informs about useless rule: empty allowed_access (i.e. deny rules)
366
* are ignored by network actions.
367
*/
368
if (!net_port_attr.allowed_access)
369
return -ENOMSG;
370
371
/* Checks that allowed_access matches the @ruleset constraints. */
372
mask = landlock_get_net_access_mask(ruleset, 0);
373
if ((net_port_attr.allowed_access | mask) != mask)
374
return -EINVAL;
375
376
/* Denies inserting a rule with port greater than 65535. */
377
if (net_port_attr.port > U16_MAX)
378
return -EINVAL;
379
380
/* Imports the new rule. */
381
return landlock_append_net_rule(ruleset, net_port_attr.port,
382
net_port_attr.allowed_access);
383
}
384
385
/**
386
* sys_landlock_add_rule - Add a new rule to a ruleset
387
*
388
* @ruleset_fd: File descriptor tied to the ruleset that should be extended
389
* with the new rule.
390
* @rule_type: Identify the structure type pointed to by @rule_attr:
391
* %LANDLOCK_RULE_PATH_BENEATH or %LANDLOCK_RULE_NET_PORT.
392
* @rule_attr: Pointer to a rule (matching the @rule_type).
393
* @flags: Must be 0.
394
*
395
* This system call enables to define a new rule and add it to an existing
396
* ruleset.
397
*
398
* Possible returned errors are:
399
*
400
* - %EOPNOTSUPP: Landlock is supported by the kernel but disabled at boot time;
401
* - %EAFNOSUPPORT: @rule_type is %LANDLOCK_RULE_NET_PORT but TCP/IP is not
402
* supported by the running kernel;
403
* - %EINVAL: @flags is not 0;
404
* - %EINVAL: The rule accesses are inconsistent (i.e.
405
* &landlock_path_beneath_attr.allowed_access or
406
* &landlock_net_port_attr.allowed_access is not a subset of the ruleset
407
* handled accesses)
408
* - %EINVAL: &landlock_net_port_attr.port is greater than 65535;
409
* - %ENOMSG: Empty accesses (e.g. &landlock_path_beneath_attr.allowed_access is
410
* 0);
411
* - %EBADF: @ruleset_fd is not a file descriptor for the current thread, or a
412
* member of @rule_attr is not a file descriptor as expected;
413
* - %EBADFD: @ruleset_fd is not a ruleset file descriptor, or a member of
414
* @rule_attr is not the expected file descriptor type;
415
* - %EPERM: @ruleset_fd has no write access to the underlying ruleset;
416
* - %EFAULT: @rule_attr was not a valid address.
417
*/
418
SYSCALL_DEFINE4(landlock_add_rule, const int, ruleset_fd,
419
const enum landlock_rule_type, rule_type,
420
const void __user *const, rule_attr, const __u32, flags)
421
{
422
struct landlock_ruleset *ruleset __free(landlock_put_ruleset) = NULL;
423
424
if (!is_initialized())
425
return -EOPNOTSUPP;
426
427
/* No flag for now. */
428
if (flags)
429
return -EINVAL;
430
431
/* Gets and checks the ruleset. */
432
ruleset = get_ruleset_from_fd(ruleset_fd, FMODE_CAN_WRITE);
433
if (IS_ERR(ruleset))
434
return PTR_ERR(ruleset);
435
436
switch (rule_type) {
437
case LANDLOCK_RULE_PATH_BENEATH:
438
return add_rule_path_beneath(ruleset, rule_attr);
439
case LANDLOCK_RULE_NET_PORT:
440
return add_rule_net_port(ruleset, rule_attr);
441
default:
442
return -EINVAL;
443
}
444
}
445
446
/* Enforcement */
447
448
/**
449
* sys_landlock_restrict_self - Enforce a ruleset on the calling thread
450
*
451
* @ruleset_fd: File descriptor tied to the ruleset to merge with the target.
452
* @flags: Supported values:
453
*
454
* - %LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF
455
* - %LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON
456
* - %LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF
457
*
458
* This system call enables to enforce a Landlock ruleset on the current
459
* thread. Enforcing a ruleset requires that the task has %CAP_SYS_ADMIN in its
460
* namespace or is running with no_new_privs. This avoids scenarios where
461
* unprivileged tasks can affect the behavior of privileged children.
462
*
463
* Possible returned errors are:
464
*
465
* - %EOPNOTSUPP: Landlock is supported by the kernel but disabled at boot time;
466
* - %EINVAL: @flags contains an unknown bit.
467
* - %EBADF: @ruleset_fd is not a file descriptor for the current thread;
468
* - %EBADFD: @ruleset_fd is not a ruleset file descriptor;
469
* - %EPERM: @ruleset_fd has no read access to the underlying ruleset, or the
470
* current thread is not running with no_new_privs, or it doesn't have
471
* %CAP_SYS_ADMIN in its namespace.
472
* - %E2BIG: The maximum number of stacked rulesets is reached for the current
473
* thread.
474
*
475
* .. kernel-doc:: include/uapi/linux/landlock.h
476
* :identifiers: landlock_restrict_self_flags
477
*/
478
SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32,
479
flags)
480
{
481
struct landlock_ruleset *new_dom,
482
*ruleset __free(landlock_put_ruleset) = NULL;
483
struct cred *new_cred;
484
struct landlock_cred_security *new_llcred;
485
bool __maybe_unused log_same_exec, log_new_exec, log_subdomains,
486
prev_log_subdomains;
487
488
if (!is_initialized())
489
return -EOPNOTSUPP;
490
491
/*
492
* Similar checks as for seccomp(2), except that an -EPERM may be
493
* returned.
494
*/
495
if (!task_no_new_privs(current) &&
496
!ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN))
497
return -EPERM;
498
499
if ((flags | LANDLOCK_MASK_RESTRICT_SELF) !=
500
LANDLOCK_MASK_RESTRICT_SELF)
501
return -EINVAL;
502
503
/* Translates "off" flag to boolean. */
504
log_same_exec = !(flags & LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF);
505
/* Translates "on" flag to boolean. */
506
log_new_exec = !!(flags & LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON);
507
/* Translates "off" flag to boolean. */
508
log_subdomains = !(flags & LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF);
509
510
/*
511
* It is allowed to set LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF with
512
* -1 as ruleset_fd, but no other flag must be set.
513
*/
514
if (!(ruleset_fd == -1 &&
515
flags == LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF)) {
516
/* Gets and checks the ruleset. */
517
ruleset = get_ruleset_from_fd(ruleset_fd, FMODE_CAN_READ);
518
if (IS_ERR(ruleset))
519
return PTR_ERR(ruleset);
520
}
521
522
/* Prepares new credentials. */
523
new_cred = prepare_creds();
524
if (!new_cred)
525
return -ENOMEM;
526
527
new_llcred = landlock_cred(new_cred);
528
529
#ifdef CONFIG_AUDIT
530
prev_log_subdomains = !new_llcred->log_subdomains_off;
531
new_llcred->log_subdomains_off = !prev_log_subdomains ||
532
!log_subdomains;
533
#endif /* CONFIG_AUDIT */
534
535
/*
536
* The only case when a ruleset may not be set is if
537
* LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF is set and ruleset_fd is -1.
538
* We could optimize this case by not calling commit_creds() if this flag
539
* was already set, but it is not worth the complexity.
540
*/
541
if (!ruleset)
542
return commit_creds(new_cred);
543
544
/*
545
* There is no possible race condition while copying and manipulating
546
* the current credentials because they are dedicated per thread.
547
*/
548
new_dom = landlock_merge_ruleset(new_llcred->domain, ruleset);
549
if (IS_ERR(new_dom)) {
550
abort_creds(new_cred);
551
return PTR_ERR(new_dom);
552
}
553
554
#ifdef CONFIG_AUDIT
555
new_dom->hierarchy->log_same_exec = log_same_exec;
556
new_dom->hierarchy->log_new_exec = log_new_exec;
557
if ((!log_same_exec && !log_new_exec) || !prev_log_subdomains)
558
new_dom->hierarchy->log_status = LANDLOCK_LOG_DISABLED;
559
#endif /* CONFIG_AUDIT */
560
561
/* Replaces the old (prepared) domain. */
562
landlock_put_ruleset(new_llcred->domain);
563
new_llcred->domain = new_dom;
564
565
#ifdef CONFIG_AUDIT
566
new_llcred->domain_exec |= BIT(new_dom->num_layers - 1);
567
#endif /* CONFIG_AUDIT */
568
569
return commit_creds(new_cred);
570
}
571
572