Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
L
lxc
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Chen Yisong
lxc
Commits
858f6225
Unverified
Commit
858f6225
authored
Feb 16, 2021
by
Stéphane Graber
Committed by
GitHub
Feb 16, 2021
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #3675 from brauner/2021-02-16/fixes
cgroups: second batch of cgroup fixes
parents
136b349c
060e54d6
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
607 additions
and
580 deletions
+607
-580
cgfsng.c
src/lxc/cgroups/cgfsng.c
+499
-530
cgroup.c
src/lxc/cgroups/cgroup.c
+41
-20
cgroup.h
src/lxc/cgroups/cgroup.h
+28
-4
cgroup_utils.c
src/lxc/cgroups/cgroup_utils.c
+0
-16
cgroup_utils.h
src/lxc/cgroups/cgroup_utils.h
+0
-2
file_utils.c
src/lxc/file_utils.c
+18
-8
log.h
src/lxc/log.h
+14
-0
string_utils.c
src/lxc/string_utils.c
+2
-0
string_utils.h
src/lxc/string_utils.h
+5
-0
No files found.
src/lxc/cgroups/cgfsng.c
View file @
858f6225
...
...
@@ -46,6 +46,7 @@
#include "memory_utils.h"
#include "mount_utils.h"
#include "storage/storage.h"
#include "string_utils.h"
#include "syscall_wrappers.h"
#include "utils.h"
...
...
@@ -312,234 +313,11 @@ static ssize_t get_max_cpus(char *cpulist)
return
cpus
;
}
#define __ISOL_CPUS "/sys/devices/system/cpu/isolated"
#define __OFFLINE_CPUS "/sys/devices/system/cpu/offline"
static
bool
cg_legacy_filter_and_set_cpus
(
const
char
*
parent_cgroup
,
char
*
child_cgroup
,
bool
am_initialized
)
{
__do_free
char
*
cpulist
=
NULL
,
*
fpath
=
NULL
,
*
isolcpus
=
NULL
,
*
offlinecpus
=
NULL
,
*
posscpus
=
NULL
;
__do_free
uint32_t
*
isolmask
=
NULL
,
*
offlinemask
=
NULL
,
*
possmask
=
NULL
;
int
ret
;
ssize_t
i
;
ssize_t
maxisol
=
0
,
maxoffline
=
0
,
maxposs
=
0
;
bool
flipped_bit
=
false
;
fpath
=
must_make_path
(
parent_cgroup
,
"cpuset.cpus"
,
NULL
);
posscpus
=
read_file_at
(
-
EBADF
,
fpath
,
PROTECT_OPEN
,
0
);
if
(
!
posscpus
)
return
log_error_errno
(
false
,
errno
,
"Failed to read file
\"
%s
\"
"
,
fpath
);
/* Get maximum number of cpus found in possible cpuset. */
maxposs
=
get_max_cpus
(
posscpus
);
if
(
maxposs
<
0
||
maxposs
>=
INT_MAX
-
1
)
return
false
;
if
(
file_exists
(
__ISOL_CPUS
))
{
isolcpus
=
read_file_at
(
-
EBADF
,
__ISOL_CPUS
,
PROTECT_OPEN
,
0
);
if
(
!
isolcpus
)
return
log_error_errno
(
false
,
errno
,
"Failed to read file
\"
%s
\"
"
,
__ISOL_CPUS
);
if
(
isdigit
(
isolcpus
[
0
]))
{
/* Get maximum number of cpus found in isolated cpuset. */
maxisol
=
get_max_cpus
(
isolcpus
);
if
(
maxisol
<
0
||
maxisol
>=
INT_MAX
-
1
)
return
false
;
}
if
(
maxposs
<
maxisol
)
maxposs
=
maxisol
;
maxposs
++
;
}
else
{
TRACE
(
"The path
\"
"
__ISOL_CPUS
"
\"
to read isolated cpus from does not exist"
);
}
if
(
file_exists
(
__OFFLINE_CPUS
))
{
offlinecpus
=
read_file_at
(
-
EBADF
,
__OFFLINE_CPUS
,
PROTECT_OPEN
,
0
);
if
(
!
offlinecpus
)
return
log_error_errno
(
false
,
errno
,
"Failed to read file
\"
%s
\"
"
,
__OFFLINE_CPUS
);
if
(
isdigit
(
offlinecpus
[
0
]))
{
/* Get maximum number of cpus found in offline cpuset. */
maxoffline
=
get_max_cpus
(
offlinecpus
);
if
(
maxoffline
<
0
||
maxoffline
>=
INT_MAX
-
1
)
return
false
;
}
if
(
maxposs
<
maxoffline
)
maxposs
=
maxoffline
;
maxposs
++
;
}
else
{
TRACE
(
"The path
\"
"
__OFFLINE_CPUS
"
\"
to read offline cpus from does not exist"
);
}
if
((
maxisol
==
0
)
&&
(
maxoffline
==
0
))
{
cpulist
=
move_ptr
(
posscpus
);
goto
copy_parent
;
}
possmask
=
lxc_cpumask
(
posscpus
,
maxposs
);
if
(
!
possmask
)
return
log_error_errno
(
false
,
errno
,
"Failed to create cpumask for possible cpus"
);
if
(
maxisol
>
0
)
{
isolmask
=
lxc_cpumask
(
isolcpus
,
maxposs
);
if
(
!
isolmask
)
return
log_error_errno
(
false
,
errno
,
"Failed to create cpumask for isolated cpus"
);
}
if
(
maxoffline
>
0
)
{
offlinemask
=
lxc_cpumask
(
offlinecpus
,
maxposs
);
if
(
!
offlinemask
)
return
log_error_errno
(
false
,
errno
,
"Failed to create cpumask for offline cpus"
);
}
for
(
i
=
0
;
i
<=
maxposs
;
i
++
)
{
if
((
isolmask
&&
!
is_set
(
i
,
isolmask
))
||
(
offlinemask
&&
!
is_set
(
i
,
offlinemask
))
||
!
is_set
(
i
,
possmask
))
continue
;
flipped_bit
=
true
;
clear_bit
(
i
,
possmask
);
}
if
(
!
flipped_bit
)
{
cpulist
=
lxc_cpumask_to_cpulist
(
possmask
,
maxposs
);
TRACE
(
"No isolated or offline cpus present in cpuset"
);
}
else
{
cpulist
=
move_ptr
(
posscpus
);
TRACE
(
"Removed isolated or offline cpus from cpuset"
);
}
if
(
!
cpulist
)
return
log_error_errno
(
false
,
errno
,
"Failed to create cpu list"
);
copy_parent:
if
(
!
am_initialized
)
{
ret
=
lxc_write_openat
(
child_cgroup
,
"cpuset.cpus"
,
cpulist
,
strlen
(
cpulist
));
if
(
ret
<
0
)
return
log_error_errno
(
false
,
errno
,
"Failed to write cpu list to
\"
%s/cpuset.cpus
\"
"
,
child_cgroup
);
TRACE
(
"Copied cpu settings of parent cgroup"
);
}
return
true
;
}
/* Copy contents of parent(@path)/@file to @path/@file */
static
bool
copy_parent_file
(
const
char
*
parent_cgroup
,
const
char
*
child_cgroup
,
const
char
*
file
)
{
__do_free
char
*
parent_file
=
NULL
,
*
value
=
NULL
;
int
len
=
0
;
int
ret
;
parent_file
=
must_make_path
(
parent_cgroup
,
file
,
NULL
);
len
=
lxc_read_from_file
(
parent_file
,
NULL
,
0
);
if
(
len
<=
0
)
return
log_error_errno
(
false
,
errno
,
"Failed to determine buffer size"
);
value
=
must_realloc
(
NULL
,
len
+
1
);
value
[
len
]
=
'\0'
;
ret
=
lxc_read_from_file
(
parent_file
,
value
,
len
);
if
(
ret
!=
len
)
return
log_error_errno
(
false
,
errno
,
"Failed to read from parent file
\"
%s
\"
"
,
parent_file
);
ret
=
lxc_write_openat
(
child_cgroup
,
file
,
value
,
len
);
if
(
ret
<
0
&&
errno
!=
EACCES
)
return
log_error_errno
(
false
,
errno
,
"Failed to write
\"
%s
\"
to file
\"
%s/%s
\"
"
,
value
,
child_cgroup
,
file
);
return
true
;
}
static
inline
bool
is_unified_hierarchy
(
const
struct
hierarchy
*
h
)
{
return
h
->
version
==
CGROUP2_SUPER_MAGIC
;
}
/*
* Initialize the cpuset hierarchy in first directory of @cgroup_leaf and set
* cgroup.clone_children so that children inherit settings. Since the
* h->base_path is populated by init or ourselves, we know it is already
* initialized.
*
* returns -1 on error, 0 when we didn't created a cgroup, 1 if we created a
* cgroup.
*/
static
int
cg_legacy_handle_cpuset_hierarchy
(
struct
hierarchy
*
h
,
const
char
*
cgroup_leaf
)
{
__do_free
char
*
parent_cgroup
=
NULL
,
*
child_cgroup
=
NULL
,
*
dup
=
NULL
;
__do_close
int
cgroup_fd
=
-
EBADF
;
int
fret
=
-
1
;
int
ret
;
char
v
;
char
*
leaf
,
*
slash
;
if
(
is_unified_hierarchy
(
h
))
return
0
;
if
(
!
string_in_list
(
h
->
controllers
,
"cpuset"
))
return
0
;
if
(
!
cgroup_leaf
)
return
ret_set_errno
(
-
1
,
EINVAL
);
dup
=
strdup
(
cgroup_leaf
);
if
(
!
dup
)
return
ret_set_errno
(
-
1
,
ENOMEM
);
parent_cgroup
=
must_make_path
(
h
->
mountpoint
,
h
->
container_base_path
,
NULL
);
leaf
=
dup
;
leaf
+=
strspn
(
leaf
,
"/"
);
slash
=
strchr
(
leaf
,
'/'
);
if
(
slash
)
*
slash
=
'\0'
;
child_cgroup
=
must_make_path
(
parent_cgroup
,
leaf
,
NULL
);
if
(
slash
)
*
slash
=
'/'
;
fret
=
1
;
ret
=
mkdir
(
child_cgroup
,
0755
);
if
(
ret
<
0
)
{
if
(
errno
!=
EEXIST
)
return
log_error_errno
(
-
1
,
errno
,
"Failed to create directory
\"
%s
\"
"
,
child_cgroup
);
fret
=
0
;
}
cgroup_fd
=
lxc_open_dirfd
(
child_cgroup
);
if
(
cgroup_fd
<
0
)
return
-
1
;
ret
=
lxc_readat
(
cgroup_fd
,
"cgroup.clone_children"
,
&
v
,
1
);
if
(
ret
<
0
)
return
log_error_errno
(
-
1
,
errno
,
"Failed to read file
\"
%s/cgroup.clone_children
\"
"
,
child_cgroup
);
/* Make sure any isolated cpus are removed from cpuset.cpus. */
if
(
!
cg_legacy_filter_and_set_cpus
(
parent_cgroup
,
child_cgroup
,
v
==
'1'
))
return
log_error_errno
(
-
1
,
errno
,
"Failed to remove isolated cpus"
);
/* Already set for us by someone else. */
if
(
v
==
'1'
)
TRACE
(
"
\"
cgroup.clone_children
\"
was already set to
\"
1
\"
"
);
/* copy parent's settings */
if
(
!
copy_parent_file
(
parent_cgroup
,
child_cgroup
,
"cpuset.mems"
))
return
log_error_errno
(
-
1
,
errno
,
"Failed to copy
\"
cpuset.mems
\"
settings"
);
/* Set clone_children so children inherit our settings */
ret
=
lxc_writeat
(
cgroup_fd
,
"cgroup.clone_children"
,
"1"
,
1
);
if
(
ret
<
0
)
return
log_error_errno
(
-
1
,
errno
,
"Failed to write 1 to
\"
%s/cgroup.clone_children
\"
"
,
child_cgroup
);
return
fret
;
}
/* Given two null-terminated lists of strings, return true if any string is in
* both.
*/
...
...
@@ -691,26 +469,101 @@ static char **cg_unified_get_controllers(int dfd, const char *file)
return
move_ptr
(
aret
);
}
static
struct
hierarchy
*
add_hierarchy
(
struct
hierarchy
***
h
,
char
**
clist
,
char
*
mountpoint
,
char
*
container_base_path
,
int
type
)
static
bool
cgroup_use_wants_controllers
(
const
struct
cgroup_ops
*
ops
,
char
**
controllers
)
{
if
(
!
ops
->
cgroup_use
)
return
true
;
for
(
char
**
cur_ctrl
=
controllers
;
cur_ctrl
&&
*
cur_ctrl
;
cur_ctrl
++
)
{
bool
found
=
false
;
for
(
char
**
cur_use
=
ops
->
cgroup_use
;
cur_use
&&
*
cur_use
;
cur_use
++
)
{
if
(
!
strequal
(
*
cur_use
,
*
cur_ctrl
))
continue
;
found
=
true
;
break
;
}
if
(
found
)
continue
;
return
false
;
}
return
true
;
}
static
int
add_hierarchy
(
struct
cgroup_ops
*
ops
,
char
**
clist
,
char
*
mountpoint
,
char
*
container_base_path
,
int
type
)
{
struct
hierarchy
*
new
;
__do_close
int
dfd_base
=
-
EBADF
,
dfd_mnt
=
-
EBADF
;
__do_free
struct
hierarchy
*
new
=
NULL
;
__do_free_string_list
char
**
controllers
=
clist
;
int
newentry
;
if
(
abspath
(
container_base_path
))
return
syserrno
(
-
errno
,
"Container base path must be relative to controller mount"
);
if
(
!
controllers
&&
type
!=
CGROUP2_SUPER_MAGIC
)
return
syserrno_set
(
-
EINVAL
,
"Empty controller list for non-unified cgroup hierarchy passed"
);
dfd_mnt
=
open_at
(
-
EBADF
,
mountpoint
,
PROTECT_OPATH_DIRECTORY
,
PROTECT_LOOKUP_ABSOLUTE_XDEV
,
0
);
if
(
dfd_mnt
<
0
)
return
syserrno
(
-
errno
,
"Failed to open %s"
,
mountpoint
);
if
(
is_empty_string
(
container_base_path
))
dfd_base
=
dfd_mnt
;
else
dfd_base
=
open_at
(
dfd_mnt
,
container_base_path
,
PROTECT_OPATH_DIRECTORY
,
PROTECT_LOOKUP_BENEATH_XDEV
,
0
);
if
(
dfd_base
<
0
)
return
syserrno
(
-
errno
,
"Failed to open %d(%s)"
,
dfd_base
,
container_base_path
);
if
(
!
controllers
)
{
/*
* We assume that the cgroup we're currently in has been delegated to
* us and we are free to further delege all of the controllers listed
* in cgroup.controllers further down the hierarchy.
*/
controllers
=
cg_unified_get_controllers
(
dfd_base
,
"cgroup.controllers"
);
if
(
!
controllers
)
controllers
=
cg_unified_make_empty_controller
();
if
(
!
controllers
[
0
])
TRACE
(
"No controllers are enabled for delegation"
);
}
/* Exclude all controllers that cgroup use does not want. */
if
(
!
cgroup_use_wants_controllers
(
ops
,
controllers
))
return
log_trace
(
0
,
"Skipping cgroup hiearchy with non-requested controllers"
);
new
=
zalloc
(
sizeof
(
*
new
));
if
(
!
new
)
return
ret_set_errno
(
NULL
,
ENOMEM
);
new
->
controllers
=
clist
;
new
->
mountpoint
=
mountpoint
;
new
->
container_base_path
=
container_base_path
;
new
->
version
=
type
;
new
->
cgfd_con
=
-
EBADF
;
new
->
cgfd_limit
=
-
EBADF
;
new
->
cgfd_mon
=
-
EBADF
;
newentry
=
append_null_to_list
((
void
***
)
h
);
(
*
h
)[
newentry
]
=
new
;
return
new
;
return
ret_errno
(
ENOMEM
);
new
->
version
=
type
;
new
->
controllers
=
move_ptr
(
controllers
);
new
->
mountpoint
=
mountpoint
;
new
->
container_base_path
=
container_base_path
;
new
->
cgfd_con
=
-
EBADF
;
new
->
cgfd_limit
=
-
EBADF
;
new
->
cgfd_mon
=
-
EBADF
;
TRACE
(
"Adding cgroup hierarchy with mountpoint %s and base cgroup %s"
,
mountpoint
,
container_base_path
);
for
(
char
*
const
*
it
=
new
->
controllers
;
it
&&
*
it
;
it
++
)
TRACE
(
"The detected hierarchy contains the %s controller"
,
*
it
);
newentry
=
append_null_to_list
((
void
***
)
&
ops
->
hierarchies
);
new
->
dfd_mnt
=
move_fd
(
dfd_mnt
);
new
->
dfd_base
=
move_fd
(
dfd_base
);
if
(
type
==
CGROUP2_SUPER_MAGIC
)
ops
->
unified
=
new
;
(
ops
->
hierarchies
)[
newentry
]
=
move_ptr
(
new
);
return
0
;
}
/* Get a copy of the mountpoint from @line, which is a line from
...
...
@@ -788,38 +641,69 @@ static bool controller_in_clist(char *cgline, char *c)
return
false
;
}
static
inline
char
*
trim
(
char
*
s
)
{
size_t
len
;
len
=
strlen
(
s
);
while
((
len
>
1
)
&&
(
s
[
len
-
1
]
==
'\n'
))
s
[
--
len
]
=
'\0'
;
return
s
;
}
/* @basecginfo is a copy of /proc/$$/cgroup. Return the current cgroup for
* @controller.
*/
static
char
*
cg_hybrid_get_current_cgroup
(
char
*
basecginfo
,
char
*
controller
,
int
type
)
static
char
*
cg_hybrid_get_current_cgroup
(
bool
relative
,
char
*
basecginfo
,
char
*
controller
,
int
type
)
{
char
*
p
=
basecginfo
;
char
*
base_cgrou
p
=
basecginfo
;
for
(;;)
{
bool
is_cgv2_base_cgroup
=
false
;
/* cgroup v2 entry in "/proc/<pid>/cgroup": "0::/some/path" */
if
((
type
==
CGROUP2_SUPER_MAGIC
)
&&
(
*
p
==
'0'
))
if
((
type
==
CGROUP2_SUPER_MAGIC
)
&&
(
*
base_cgrou
p
==
'0'
))
is_cgv2_base_cgroup
=
true
;
p
=
strchr
(
p
,
':'
);
if
(
!
p
)
base_cgroup
=
strchr
(
base_cgrou
p
,
':'
);
if
(
!
base_cgrou
p
)
return
NULL
;
p
++
;
base_cgroup
++
;
if
(
is_cgv2_base_cgroup
||
(
controller
&&
controller_in_clist
(
base_cgroup
,
controller
)))
{
__do_free
char
*
copy
=
NULL
;
if
(
is_cgv2_base_cgroup
||
(
controller
&&
controller_in_clist
(
p
,
controller
)))
{
p
=
strchr
(
p
,
':'
);
if
(
!
p
)
base_cgroup
=
strchr
(
base_cgroup
,
':'
);
if
(
!
base_cgroup
)
return
NULL
;
p
++
;
return
copy_to_eol
(
p
);
base_cgroup
++
;
copy
=
copy_to_eol
(
base_cgroup
);
if
(
!
copy
)
return
NULL
;
trim
(
copy
);
if
(
!
relative
)
{
base_cgroup
=
prune_init_scope
(
copy
);
if
(
!
base_cgroup
)
return
NULL
;
}
else
{
base_cgroup
=
copy
;
}
if
(
abspath
(
base_cgroup
))
base_cgroup
=
deabs
(
base_cgroup
);
/* We're allowing base_cgroup to be "". */
return
strdup
(
base_cgroup
);
}
p
=
strchr
(
p
,
'\n'
);
if
(
!
p
)
base_cgroup
=
strchr
(
base_cgrou
p
,
'\n'
);
if
(
!
base_cgrou
p
)
return
NULL
;
p
++
;
base_cgrou
p
++
;
}
}
...
...
@@ -877,40 +761,6 @@ static int get_existing_subsystems(char ***klist, char ***nlist)
return
0
;
}
static
char
*
trim
(
char
*
s
)
{
size_t
len
;
len
=
strlen
(
s
);
while
((
len
>
1
)
&&
(
s
[
len
-
1
]
==
'\n'
))
s
[
--
len
]
=
'\0'
;
return
s
;
}
static
void
lxc_cgfsng_print_hierarchies
(
struct
cgroup_ops
*
ops
)
{
int
i
;
struct
hierarchy
**
it
;
if
(
!
ops
->
hierarchies
)
{
TRACE
(
" No hierarchies found"
);
return
;
}
TRACE
(
" Hierarchies:"
);
for
(
i
=
0
,
it
=
ops
->
hierarchies
;
it
&&
*
it
;
it
++
,
i
++
)
{
int
j
;
char
**
cit
;
TRACE
(
" %d: base_cgroup: %s"
,
i
,
(
*
it
)
->
container_base_path
?
(
*
it
)
->
container_base_path
:
"(null)"
);
TRACE
(
" mountpoint: %s"
,
(
*
it
)
->
mountpoint
?
(
*
it
)
->
mountpoint
:
"(null)"
);
TRACE
(
" controllers:"
);
for
(
j
=
0
,
cit
=
(
*
it
)
->
controllers
;
cit
&&
*
cit
;
cit
++
,
j
++
)
TRACE
(
" %d: %s"
,
j
,
*
cit
);
}
}
static
void
lxc_cgfsng_print_basecg_debuginfo
(
char
*
basecginfo
,
char
**
klist
,
char
**
nlist
)
{
...
...
@@ -1023,118 +873,223 @@ __cgfsng_ops static void cgfsng_payload_destroy(struct cgroup_ops *ops,
}
else
{
ret
=
cgroup_tree_remove
(
ops
->
hierarchies
,
ops
->
container_cgroup
);
}
if
(
ret
<
0
)
SYSWARN
(
"Failed to destroy cgroups"
);
}
if
(
ret
<
0
)
SYSWARN
(
"Failed to destroy cgroups"
);
}
#define __ISOL_CPUS "/sys/devices/system/cpu/isolated"
#define __OFFLINE_CPUS "/sys/devices/system/cpu/offline"
static
bool
cpuset1_cpus_initialize
(
int
dfd_parent
,
int
dfd_child
,
bool
am_initialized
)
{
__do_free
char
*
cpulist
=
NULL
,
*
fpath
=
NULL
,
*
isolcpus
=
NULL
,
*
offlinecpus
=
NULL
,
*
posscpus
=
NULL
;
__do_free
uint32_t
*
isolmask
=
NULL
,
*
offlinemask
=
NULL
,
*
possmask
=
NULL
;
int
ret
;
ssize_t
i
;
ssize_t
maxisol
=
0
,
maxoffline
=
0
,
maxposs
=
0
;
bool
flipped_bit
=
false
;
posscpus
=
read_file_at
(
dfd_parent
,
"cpuset.cpus"
,
PROTECT_OPEN
,
0
);
if
(
!
posscpus
)
return
log_error_errno
(
false
,
errno
,
"Failed to read file
\"
%s
\"
"
,
fpath
);
/* Get maximum number of cpus found in possible cpuset. */
maxposs
=
get_max_cpus
(
posscpus
);
if
(
maxposs
<
0
||
maxposs
>=
INT_MAX
-
1
)
return
false
;
if
(
file_exists
(
__ISOL_CPUS
))
{
isolcpus
=
read_file_at
(
-
EBADF
,
__ISOL_CPUS
,
PROTECT_OPEN
,
0
);
if
(
!
isolcpus
)
return
log_error_errno
(
false
,
errno
,
"Failed to read file
\"
%s
\"
"
,
__ISOL_CPUS
);
if
(
isdigit
(
isolcpus
[
0
]))
{
/* Get maximum number of cpus found in isolated cpuset. */
maxisol
=
get_max_cpus
(
isolcpus
);
if
(
maxisol
<
0
||
maxisol
>=
INT_MAX
-
1
)
return
false
;
}
if
(
maxposs
<
maxisol
)
maxposs
=
maxisol
;
maxposs
++
;
}
else
{
TRACE
(
"The path
\"
"
__ISOL_CPUS
"
\"
to read isolated cpus from does not exist"
);
}
if
(
file_exists
(
__OFFLINE_CPUS
))
{
offlinecpus
=
read_file_at
(
-
EBADF
,
__OFFLINE_CPUS
,
PROTECT_OPEN
,
0
);
if
(
!
offlinecpus
)
return
log_error_errno
(
false
,
errno
,
"Failed to read file
\"
%s
\"
"
,
__OFFLINE_CPUS
);
if
(
isdigit
(
offlinecpus
[
0
]))
{
/* Get maximum number of cpus found in offline cpuset. */
maxoffline
=
get_max_cpus
(
offlinecpus
);
if
(
maxoffline
<
0
||
maxoffline
>=
INT_MAX
-
1
)
return
false
;
}
if
(
maxposs
<
maxoffline
)
maxposs
=
maxoffline
;
maxposs
++
;
}
else
{
TRACE
(
"The path
\"
"
__OFFLINE_CPUS
"
\"
to read offline cpus from does not exist"
);
}
if
((
maxisol
==
0
)
&&
(
maxoffline
==
0
))
{
cpulist
=
move_ptr
(
posscpus
);
goto
copy_parent
;
}
__cgfsng_ops
static
void
cgfsng_monitor_destroy
(
struct
cgroup_ops
*
ops
,
struct
lxc_handler
*
handler
)
{
int
len
;
char
pidstr
[
INTTYPE_TO_STRLEN
(
pid_t
)];
const
struct
lxc_conf
*
conf
;
possmask
=
lxc_cpumask
(
posscpus
,
maxposs
);
if
(
!
possmask
)
return
log_error_errno
(
false
,
errno
,
"Failed to create cpumask for possible cpus"
);
if
(
!
ops
)
{
ERROR
(
"Called with uninitialized cgroup operations"
);
return
;
if
(
maxisol
>
0
)
{
isolmask
=
lxc_cpumask
(
isolcpus
,
maxposs
);
if
(
!
isolmask
)
return
log_error_errno
(
false
,
errno
,
"Failed to create cpumask for isolated cpus"
);
}
if
(
!
ops
->
hierarchies
)
return
;
if
(
maxoffline
>
0
)
{
offlinemask
=
lxc_cpumask
(
offlinecpus
,
maxposs
);
if
(
!
offlinemask
)
return
log_error_errno
(
false
,
errno
,
"Failed to create cpumask for offline cpus"
);
}
if
(
!
handler
)
{
ERROR
(
"Called with uninitialized handler"
);
return
;
for
(
i
=
0
;
i
<=
maxposs
;
i
++
)
{
if
((
isolmask
&&
!
is_set
(
i
,
isolmask
))
||
(
offlinemask
&&
!
is_set
(
i
,
offlinemask
))
||
!
is_set
(
i
,
possmask
))
continue
;
flipped_bit
=
true
;
clear_bit
(
i
,
possmask
);
}
if
(
!
handler
->
conf
)
{
ERROR
(
"Called with uninitialized conf"
);
return
;
if
(
!
flipped_bit
)
{
cpulist
=
lxc_cpumask_to_cpulist
(
possmask
,
maxposs
);
TRACE
(
"No isolated or offline cpus present in cpuset"
);
}
else
{
cpulist
=
move_ptr
(
posscpus
);
TRACE
(
"Removed isolated or offline cpus from cpuset"
);
}
conf
=
handler
->
conf
;
if
(
!
cpulist
)
return
log_error_errno
(
false
,
errno
,
"Failed to create cpu list"
);
len
=
strnprintf
(
pidstr
,
sizeof
(
pidstr
),
"%d"
,
handler
->
monitor_pid
);
if
(
len
<
0
)
return
;
copy_parent:
if
(
!
am_initialized
)
{
ret
=
lxc_writeat
(
dfd_child
,
"cpuset.cpus"
,
cpulist
,
strlen
(
cpulist
));
if
(
ret
<
0
)
return
log_error_errno
(
false
,
errno
,
"Failed to write cpu list to
\"
%d/cpuset.cpus
\"
"
,
dfd_child
);
for
(
int
i
=
0
;
ops
->
hierarchies
[
i
];
i
++
)
{
__do_free
char
*
pivot_path
=
NULL
;
struct
hierarchy
*
h
=
ops
->
hierarchies
[
i
];
size_t
offset
;
int
ret
;
TRACE
(
"Copied cpu settings of parent cgroup"
);
}
if
(
!
h
->
monitor_full_path
)
continue
;
return
true
;
}
/* Monitor might have died before we entered the cgroup. */
if
(
handler
->
monitor_pid
<=
0
)
{
WARN
(
"No valid monitor process found while destroying cgroups"
)
;
goto
try_lxc_rm_rf
;
}
static
bool
cpuset1_initialize
(
int
dfd_base
,
int
dfd_next
)
{
char
mems
[
PATH_MAX
]
;
ssize_t
bytes
;
char
v
;
if
(
conf
->
cgroup_meta
.
monitor_pivot_dir
)
pivot_path
=
must_make_path
(
h
->
mountpoint
,
h
->
container_base_path
,
conf
->
cgroup_meta
.
monitor_pivot_dir
,
CGROUP_PIVOT
,
NULL
);
else
if
(
conf
->
cgroup_meta
.
monitor_dir
)
pivot_path
=
must_make_path
(
h
->
mountpoint
,
h
->
container_base_path
,
conf
->
cgroup_meta
.
monitor_dir
,
CGROUP_PIVOT
,
NULL
);
else
if
(
conf
->
cgroup_meta
.
dir
)
pivot_path
=
must_make_path
(
h
->
mountpoint
,
h
->
container_base_path
,
conf
->
cgroup_meta
.
dir
,
CGROUP_PIVOT
,
NULL
);
else
pivot_path
=
must_make_path
(
h
->
mountpoint
,
h
->
container_base_path
,
CGROUP_PIVOT
,
NULL
);
/*
* Determine whether the base cgroup has cpuset
* inheritance turned on.
*/
bytes
=
lxc_readat
(
dfd_base
,
"cgroup.clone_children"
,
&
v
,
1
);
if
(
bytes
<
0
)
return
syserrno
(
false
,
"Failed to read file %d(cgroup.clone_children)"
,
dfd_base
);
offset
=
strlen
(
h
->
mountpoint
)
+
strlen
(
h
->
container_base_path
);
/*
* Initialize cpuset.cpus and make remove any isolated
* and offline cpus.
*/
if
(
!
cpuset1_cpus_initialize
(
dfd_base
,
dfd_next
,
v
==
'1'
))
return
syserrno
(
false
,
"Failed to initialize cpuset.cpus"
);
if
(
cg_legacy_handle_cpuset_hierarchy
(
h
,
pivot_path
+
offset
))
SYSWARN
(
"Failed to initialize cpuset %s/"
CGROUP_PIVOT
,
pivot_path
);
/* Read cpuset.mems from parent... */
bytes
=
lxc_readat
(
dfd_base
,
"cpuset.mems"
,
mems
,
sizeof
(
mems
));
if
(
bytes
<
0
)
return
syserrno
(
false
,
"Failed to read file %d(cpuset.mems)"
,
dfd_base
);
ret
=
mkdir_p
(
pivot_path
,
0755
);
if
(
ret
<
0
&&
errno
!=
EEXIST
)
{
ERROR
(
"Failed to create %s"
,
pivot_path
);
goto
try_lxc_rm_rf
;
}
/* ... and copy to first cgroup in the tree... */
bytes
=
lxc_writeat
(
dfd_next
,
"cpuset.mems"
,
mems
,
bytes
);
if
(
bytes
<
0
)
return
syserrno
(
false
,
"Failed to write %d(cpuset.mems)"
,
dfd_next
);
ret
=
lxc_write_openat
(
pivot_path
,
"cgroup.procs"
,
pidstr
,
len
);
if
(
ret
!=
0
)
{
SYSWARN
(
"Failed to move monitor %s to
\"
%s
\"
"
,
pidstr
,
pivot_path
);
continue
;
}
/* ... and finally turn on cpuset inheritance. */
bytes
=
lxc_writeat
(
dfd_next
,
"cgroup.clone_children"
,
"1"
,
1
);
if
(
bytes
<
0
)
return
syserrno
(
false
,
"Failed to write %d(cgroup.clone_children)"
,
dfd_next
);
try_lxc_rm_rf:
ret
=
lxc_rm_rf
(
h
->
monitor_full_path
);
if
(
ret
<
0
)
WARN
(
"Failed to destroy
\"
%s
\"
"
,
h
->
monitor_full_path
);
}
return
log_trace
(
true
,
"Initialized cpuset in the legacy hierarchy"
);
}
static
int
mkdir_eexist_on_last
(
const
char
*
dir
,
mode_t
mode
)
static
int
__cgroup_tree_create
(
int
dfd_base
,
const
char
*
path
,
mode_t
mode
,
bool
cpuset_v1
,
bool
eexist_ignore
)
{
const
char
*
tmp
=
dir
;
const
char
*
orig
=
dir
;
size_t
orig_len
;
__do_close
int
dfd_final
=
-
EBADF
;
int
dfd_cur
=
dfd_base
;
int
ret
=
0
;
size_t
len
;
char
*
cur
;
char
buf
[
PATH_MAX
];
orig_len
=
strlen
(
dir
);
do
{
__do_free
char
*
makeme
=
NULL
;
int
ret
;
size_t
cur_len
;
if
(
is_empty_string
(
path
))
return
ret_errno
(
-
EINVAL
);
len
=
strlcpy
(
buf
,
path
,
sizeof
(
buf
));
if
(
len
>=
sizeof
(
buf
))
return
-
E2BIG
;
lxc_iterate_parts
(
cur
,
buf
,
"/"
)
{
/*
* Even though we vetted the paths when we parsed the config
* we're paranoid here and check that the path is neither
* absolute nor walks upwards.
*/
if
(
abspath
(
buf
))
return
syserrno_set
(
-
EINVAL
,
"No absolute paths allowed"
);
dir
=
tmp
+
strspn
(
tmp
,
"/"
);
tmp
=
dir
+
strcspn
(
dir
,
"/
"
);
if
(
strnequal
(
buf
,
".."
,
STRLITERALLEN
(
".."
)))
return
syserrno_set
(
-
EINVAL
,
"No upward walking paths allowed
"
);
cur_len
=
dir
-
orig
;
makeme
=
strndup
(
orig
,
cur_len
);
if
(
!
makeme
)
return
ret_set_errno
(
-
1
,
ENOMEM
);
ret
=
mkdirat
(
dfd_cur
,
cur
,
mode
)
;
if
(
ret
<
0
)
{
if
(
errno
!=
EEXIST
)
return
syserrno
(
-
errno
,
"Failed to create %d(%s)"
,
dfd_cur
,
cur
);
ret
=
mkdir
(
makeme
,
mode
);
if
(
ret
<
0
&&
((
errno
!=
EEXIST
)
||
(
orig_len
==
cur_len
)))
return
log_warn_errno
(
-
1
,
errno
,
"Failed to create directory
\"
%s
\"
"
,
makeme
);
}
while
(
tmp
!=
dir
);
ret
=
-
EEXIST
;
}
TRACE
(
"%s %d(%s) cgroup"
,
!
ret
?
"Created"
:
"Reusing"
,
dfd_cur
,
cur
);
dfd_final
=
open_at
(
dfd_cur
,
cur
,
PROTECT_OPATH_DIRECTORY
,
PROTECT_LOOKUP_BENEATH
,
0
);
if
(
dfd_final
<
0
)
return
syserrno
(
-
errno
,
"Fail to open%s directory %d(%s)"
,
!
ret
?
" newly created"
:
""
,
dfd_base
,
cur
);
if
(
dfd_cur
!=
dfd_base
)
close
(
dfd_cur
);
else
if
(
cpuset_v1
&&
!
cpuset1_initialize
(
dfd_base
,
dfd_final
))
return
syserrno
(
-
EINVAL
,
"Failed to initialize cpuset controller in the legacy hierarchy"
);
/*
* Leave dfd_final pointing to the last fd we opened so
* it will be automatically zapped if we return early.
*/
dfd_cur
=
dfd_final
;
}
return
0
;
/* The final cgroup must be succesfully creatd by us. */
if
(
ret
)
{
if
(
ret
!=
-
EEXIST
||
!
eexist_ignore
)
return
syserrno_set
(
ret
,
"Creating the final cgroup %d(%s) failed"
,
dfd_base
,
path
);
}
return
move_fd
(
dfd_final
);
}
static
bool
cgroup_tree_create
(
struct
cgroup_ops
*
ops
,
struct
lxc_conf
*
conf
,
...
...
@@ -1142,34 +1097,27 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf,
const
char
*
cgroup_leaf
,
bool
payload
,
const
char
*
cgroup_limit_dir
)
{
__do_close
int
fd_limit
=
-
EBADF
,
fd_final
=
-
EBADF
;
__do_free
char
*
path
=
NULL
,
*
limit_path
=
NULL
;
int
ret
,
ret_cpuset
;
bool
cpuset_v1
=
false
;
path
=
must_make_path
(
h
->
mountpoint
,
h
->
container_base_path
,
cgroup_leaf
,
NULL
);
if
(
dir_exists
(
path
))
return
log_warn_errno
(
false
,
errno
,
"The %s cgroup already existed"
,
path
);
/* Don't bother with all the rest if the final cgroup already exists. */
if
(
exists_dir_at
(
h
->
dfd_base
,
cgroup_leaf
))
return
syswarn
(
false
,
"The %d(%s) cgroup already existed"
,
h
->
dfd_base
,
cgroup_leaf
);
ret_cpuset
=
cg_legacy_handle_cpuset_hierarchy
(
h
,
cgroup_leaf
);
if
(
ret_cpuset
<
0
)
return
log_error_errno
(
false
,
errno
,
"Failed to handle legacy cpuset controller"
);
/*
* The legacy cpuset controller needs massaging in case inheriting
* settings from its immediate ancestor cgroup hasn't been turned on.
*/
cpuset_v1
=
!
is_unified_hierarchy
(
h
)
&&
string_in_list
(
h
->
controllers
,
"cpuset"
);
if
(
payload
&&
cgroup_limit_dir
)
{
/*
with isolation both parts need to not already exist
*/
limit_path
=
must_make_path
(
h
->
mountpoint
,
h
->
container_base_path
,
cgroup_limit_dir
,
NULL
);
/*
With isolation both parts need to not already exist.
*/
fd_limit
=
__cgroup_tree_create
(
h
->
dfd_base
,
cgroup_limit_dir
,
0755
,
cpuset_v1
,
false
);
if
(
fd_limit
<
0
)
return
syserrno
(
false
,
"Failed to create limiting cgroup %d(%s)"
,
h
->
dfd_base
,
cgroup_limit_dir
);
ret
=
mkdir_eexist_on_last
(
limit_path
,
0755
);
if
(
ret
<
0
)
return
log_debug_errno
(
false
,
errno
,
"Failed to create %s limiting cgroup"
,
limit_path
);
h
->
cgfd_limit
=
lxc_open_dirfd
(
limit_path
);
if
(
h
->
cgfd_limit
<
0
)
return
log_error_errno
(
false
,
errno
,
"Failed to open %s"
,
path
);
h
->
container_limit_path
=
move_ptr
(
limit_path
);
limit_path
=
must_make_path
(
h
->
mountpoint
,
h
->
container_base_path
,
cgroup_limit_dir
,
NULL
);
/*
* With isolation the devices legacy cgroup needs to be
...
...
@@ -1182,30 +1130,26 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf,
return
log_error
(
false
,
"Failed to setup legacy device limits"
);
}
ret
=
mkdir_eexist_on_last
(
path
,
0755
);
if
(
ret
<
0
)
{
/*
* This is the cpuset controller and
* cg_legacy_handle_cpuset_hierarchy() has created our target
* directory for us to ensure correct initialization.
*/
if
(
ret_cpuset
!=
1
||
cgroup_tree
)
return
log_debug_errno
(
false
,
errno
,
"Failed to create %s cgroup"
,
path
);
}
fd_final
=
__cgroup_tree_create
(
h
->
dfd_base
,
cgroup_leaf
,
0755
,
cpuset_v1
,
false
);
if
(
fd_final
<
0
)
return
syserrno
(
false
,
"Failed to create %s cgroup %d(%s)"
,
payload
?
"payload"
:
"monitor"
,
h
->
dfd_base
,
cgroup_limit_dir
);
path
=
must_make_path
(
h
->
mountpoint
,
h
->
container_base_path
,
cgroup_leaf
,
NULL
);
if
(
payload
)
{
h
->
cgfd_con
=
lxc_open_dirfd
(
path
);
if
(
h
->
cgfd_con
<
0
)
return
log_error_errno
(
false
,
errno
,
"Failed to open %s"
,
path
);
h
->
cgfd_con
=
move_fd
(
fd_final
);
h
->
container_full_path
=
move_ptr
(
path
);
if
(
h
->
cgfd_limit
<
0
)
if
(
fd_limit
<
0
)
h
->
cgfd_limit
=
h
->
cgfd_con
;
if
(
!
h
->
container_limit_path
)
else
h
->
cgfd_limit
=
move_fd
(
fd_limit
);
if
(
!
limit_path
)
h
->
container_limit_path
=
h
->
container_full_path
;
else
h
->
container_limit_path
=
move_ptr
(
limit_path
);
}
else
{
h
->
cgfd_mon
=
lxc_open_dirfd
(
path
);
if
(
h
->
cgfd_mon
<
0
)
return
log_error_errno
(
false
,
errno
,
"Failed to open %s"
,
path
);
h
->
cgfd_mon
=
move_fd
(
fd_final
);
h
->
monitor_full_path
=
move_ptr
(
path
);
}
...
...
@@ -1234,6 +1178,82 @@ static void cgroup_tree_leaf_remove(struct hierarchy *h, bool payload)
SYSWARN
(
"Failed to rmdir(
\"
%s
\"
) cgroup"
,
limit_path
);
}
__cgfsng_ops
static
void
cgfsng_monitor_destroy
(
struct
cgroup_ops
*
ops
,
struct
lxc_handler
*
handler
)
{
int
len
;
char
pidstr
[
INTTYPE_TO_STRLEN
(
pid_t
)];
const
struct
lxc_conf
*
conf
;
if
(
!
ops
)
{
ERROR
(
"Called with uninitialized cgroup operations"
);
return
;
}
if
(
!
ops
->
hierarchies
)
return
;
if
(
!
handler
)
{
ERROR
(
"Called with uninitialized handler"
);
return
;
}
if
(
!
handler
->
conf
)
{
ERROR
(
"Called with uninitialized conf"
);
return
;
}
conf
=
handler
->
conf
;
len
=
strnprintf
(
pidstr
,
sizeof
(
pidstr
),
"%d"
,
handler
->
monitor_pid
);
if
(
len
<
0
)
return
;
for
(
int
i
=
0
;
ops
->
hierarchies
[
i
];
i
++
)
{
__do_close
int
fd_pivot
=
-
EBADF
;
__do_free
char
*
pivot_path
=
NULL
;
struct
hierarchy
*
h
=
ops
->
hierarchies
[
i
];
bool
cpuset_v1
=
false
;
int
ret
;
if
(
!
h
->
monitor_full_path
)
continue
;
/* Monitor might have died before we entered the cgroup. */
if
(
handler
->
monitor_pid
<=
0
)
{
WARN
(
"No valid monitor process found while destroying cgroups"
);
goto
try_lxc_rm_rf
;
}
if
(
conf
->
cgroup_meta
.
monitor_pivot_dir
)
pivot_path
=
must_make_path
(
conf
->
cgroup_meta
.
monitor_pivot_dir
,
CGROUP_PIVOT
,
NULL
);
else
if
(
conf
->
cgroup_meta
.
monitor_dir
)
pivot_path
=
must_make_path
(
conf
->
cgroup_meta
.
monitor_dir
,
CGROUP_PIVOT
,
NULL
);
else
if
(
conf
->
cgroup_meta
.
dir
)
pivot_path
=
must_make_path
(
conf
->
cgroup_meta
.
dir
,
CGROUP_PIVOT
,
NULL
);
else
pivot_path
=
must_make_path
(
CGROUP_PIVOT
,
NULL
);
cpuset_v1
=
!
is_unified_hierarchy
(
h
)
&&
string_in_list
(
h
->
controllers
,
"cpuset"
);
fd_pivot
=
__cgroup_tree_create
(
h
->
dfd_base
,
pivot_path
,
0755
,
cpuset_v1
,
true
);
if
(
fd_pivot
<
0
)
{
SYSWARN
(
"Failed to create pivot cgroup %d(%s)"
,
h
->
dfd_base
,
pivot_path
);
continue
;
}
ret
=
lxc_writeat
(
fd_pivot
,
"cgroup.procs"
,
pidstr
,
len
);
if
(
ret
!=
0
)
{
SYSWARN
(
"Failed to move monitor %s to
\"
%s
\"
"
,
pidstr
,
pivot_path
);
continue
;
}
try_lxc_rm_rf:
ret
=
lxc_rm_rf
(
h
->
monitor_full_path
);
if
(
ret
<
0
)
WARN
(
"Failed to destroy
\"
%s
\"
"
,
h
->
monitor_full_path
);
}
}
/*
* Check we have no lxc.cgroup.dir, and that lxc.cgroup.dir.limit_prefix is a
* proper prefix directory of lxc.cgroup.dir.payload.
...
...
@@ -1332,7 +1352,7 @@ __cgfsng_ops static bool cgfsng_monitor_create(struct cgroup_ops *ops, struct lx
monitor_cgroup
,
false
,
NULL
))
continue
;
DEBUG
(
"Failed to create cgroup
\"
%s
\"
"
,
ops
->
hierarchies
[
i
]
->
monitor_full_path
?:
"(null)"
);
DEBUG
(
"Failed to create cgroup
\"
%s
\"
"
,
maybe_empty
(
ops
->
hierarchies
[
i
]
->
monitor_full_path
)
);
for
(
int
j
=
0
;
j
<
i
;
j
++
)
cgroup_tree_leaf_remove
(
ops
->
hierarchies
[
j
],
false
);
...
...
@@ -3251,32 +3271,6 @@ __cgfsng_ops static bool cgfsng_payload_delegate_controllers(struct cgroup_ops *
return
__cgfsng_delegate_controllers
(
ops
,
ops
->
container_cgroup
);
}
static
bool
cgroup_use_wants_controllers
(
const
struct
cgroup_ops
*
ops
,
char
**
controllers
)
{
if
(
!
ops
->
cgroup_use
)
return
true
;
for
(
char
**
cur_ctrl
=
controllers
;
cur_ctrl
&&
*
cur_ctrl
;
cur_ctrl
++
)
{
bool
found
=
false
;
for
(
char
**
cur_use
=
ops
->
cgroup_use
;
cur_use
&&
*
cur_use
;
cur_use
++
)
{
if
(
!
strequal
(
*
cur_use
,
*
cur_ctrl
))
continue
;
found
=
true
;
break
;
}
if
(
found
)
continue
;
return
false
;
}
return
true
;
}
static
void
cg_unified_delegate
(
char
***
delegate
)
{
__do_free
char
*
buf
=
NULL
;
...
...
@@ -3343,7 +3337,6 @@ static int cg_hybrid_init(struct cgroup_ops *ops, bool relative, bool unprivileg
__do_free_string_list
char
**
controller_list
=
NULL
;
int
type
;
bool
writeable
;
struct
hierarchy
*
new
;
type
=
get_cgroup_version
(
line
);
if
(
type
==
0
)
...
...
@@ -3382,16 +3375,14 @@ static int cg_hybrid_init(struct cgroup_ops *ops, bool relative, bool unprivileg
}
if
(
type
==
CGROUP_SUPER_MAGIC
)
base_cgroup
=
cg_hybrid_get_current_cgroup
(
basecginfo
,
controller_list
[
0
],
CGROUP_SUPER_MAGIC
);
base_cgroup
=
cg_hybrid_get_current_cgroup
(
relative
,
basecginfo
,
controller_list
[
0
],
CGROUP_SUPER_MAGIC
);
else
base_cgroup
=
cg_hybrid_get_current_cgroup
(
basecginfo
,
NULL
,
CGROUP2_SUPER_MAGIC
);
base_cgroup
=
cg_hybrid_get_current_cgroup
(
relative
,
basecginfo
,
NULL
,
CGROUP2_SUPER_MAGIC
);
if
(
!
base_cgroup
)
{
WARN
(
"Failed to find current cgroup"
);
continue
;
}
trim
(
base_cgroup
);
prune_init_scope
(
base_cgroup
);
if
(
type
==
CGROUP2_SUPER_MAGIC
)
writeable
=
test_writeable_v2
(
mountpoint
,
base_cgroup
);
else
...
...
@@ -3401,41 +3392,16 @@ static int cg_hybrid_init(struct cgroup_ops *ops, bool relative, bool unprivileg
continue
;
}
if
(
type
==
CGROUP2_SUPER_MAGIC
)
{
char
*
cgv2_ctrl_path
;
cgv2_ctrl_path
=
must_make_path
(
mountpoint
,
base_cgroup
,
"cgroup.controllers"
,
NULL
);
controller_list
=
cg_unified_get_controllers
(
-
EBADF
,
cgv2_ctrl_path
);
free
(
cgv2_ctrl_path
);
if
(
!
controller_list
)
{
controller_list
=
cg_unified_make_empty_controller
();
TRACE
(
"No controllers are enabled for "
"delegation in the unified hierarchy"
);
}
}
/* Exclude all controllers that cgroup use does not want. */
if
(
!
cgroup_use_wants_controllers
(
ops
,
controller_list
))
{
TRACE
(
"Skipping controller"
);
continue
;
}
new
=
add_hierarchy
(
&
ops
->
hierarchies
,
move_ptr
(
controller_list
),
move_ptr
(
mountpoint
),
move_ptr
(
base_cgroup
),
type
);
if
(
!
new
)
return
log_error_errno
(
-
1
,
errno
,
"Failed to add cgroup hierarchy"
);
if
(
type
==
CGROUP2_SUPER_MAGIC
&&
!
ops
->
unified
)
{
if
(
unprivileged
)
cg_unified_delegate
(
&
new
->
cgroup2_chown
);
ops
->
unified
=
new
;
}
if
(
type
==
CGROUP2_SUPER_MAGIC
)
ret
=
add_hierarchy
(
ops
,
NULL
,
move_ptr
(
mountpoint
),
move_ptr
(
base_cgroup
),
type
);
else
ret
=
add_hierarchy
(
ops
,
move_ptr
(
controller_list
),
move_ptr
(
mountpoint
),
move_ptr
(
base_cgroup
),
type
);
if
(
ret
)
return
syserrno
(
ret
,
"Failed to add cgroup hierarchy"
);
if
(
ops
->
unified
&&
unprivileged
)
cg_unified_delegate
(
&
(
ops
->
unified
)
->
cgroup2_chown
);
}
TRACE
(
"Writable cgroup hierarchies:"
);
lxc_cgfsng_print_hierarchies
(
ops
);
/* verify that all controllers in cgroup.use and all crucial
* controllers are accounted for
*/
...
...
@@ -3448,8 +3414,7 @@ static int cg_hybrid_init(struct cgroup_ops *ops, bool relative, bool unprivileg
/* Get current cgroup from /proc/self/cgroup for the cgroupfs v2 hierarchy. */
static
char
*
cg_unified_get_current_cgroup
(
bool
relative
)
{
__do_free
char
*
basecginfo
=
NULL
;
char
*
copy
;
__do_free
char
*
basecginfo
=
NULL
,
*
copy
=
NULL
;
char
*
base_cgroup
;
if
(
!
relative
&&
(
geteuid
()
==
0
))
...
...
@@ -3467,48 +3432,32 @@ static char *cg_unified_get_current_cgroup(bool relative)
copy
=
copy_to_eol
(
base_cgroup
);
if
(
!
copy
)
return
NULL
;
trim
(
copy
);
if
(
!
relative
)
{
base_cgroup
=
prune_init_scope
(
copy
);
if
(
!
base_cgroup
)
return
NULL
;
}
else
{
base_cgroup
=
copy
;
}
if
(
abspath
(
base_cgroup
))
base_cgroup
=
deabs
(
base_cgroup
);
return
trim
(
copy
);
/* We're allowing base_cgroup to be "". */
return
strdup
(
base_cgroup
);
}
static
int
cg_unified_init
(
struct
cgroup_ops
*
ops
,
bool
relative
,
bool
unprivileged
)
{
__do_close
int
cgroup_root_fd
=
-
EBADF
;
__do_free
char
*
base_cgroup
=
NULL
,
*
controllers_path
=
NULL
;
__do_free_string_list
char
**
delegatable
=
NULL
;
__do_free
struct
hierarchy
*
new
=
NULL
;
__do_free
char
*
base_cgroup
=
NULL
;
int
ret
;
ret
=
unified_cgroup_hierarchy
();
if
(
ret
==
-
ENOMEDIUM
)
return
ret_errno
(
ENOMEDIUM
);
if
(
ret
!=
CGROUP2_SUPER_MAGIC
)
return
0
;
base_cgroup
=
cg_unified_get_current_cgroup
(
relative
);
if
(
!
base_cgroup
)
return
ret_errno
(
EINVAL
);
if
(
!
relative
)
prune_init_scope
(
base_cgroup
);
cgroup_root_fd
=
openat
(
-
EBADF
,
DEFAULT_CGROUP_MOUNTPOINT
,
O_NOCTTY
|
O_CLOEXEC
|
O_NOFOLLOW
|
O_DIRECTORY
);
if
(
cgroup_root_fd
<
0
)
return
-
errno
;
/*
* We assume that the cgroup we're currently in has been delegated to
* us and we are free to further delege all of the controllers listed
* in cgroup.controllers further down the hierarchy.
*/
controllers_path
=
must_make_path_relative
(
base_cgroup
,
"cgroup.controllers"
,
NULL
);
delegatable
=
cg_unified_get_controllers
(
cgroup_root_fd
,
controllers_path
);
if
(
!
delegatable
)
delegatable
=
cg_unified_make_empty_controller
();
if
(
!
delegatable
[
0
])
TRACE
(
"No controllers are enabled for delegation"
);
/* TODO: If the user requested specific controllers via lxc.cgroup.use
* we should verify here. The reason I'm not doing it right is that I'm
...
...
@@ -3517,31 +3466,41 @@ static int cg_unified_init(struct cgroup_ops *ops, bool relative,
* controllers per container.
*/
new
=
add_hierarchy
(
&
ops
->
hierarchies
,
move_ptr
(
delegatable
),
ret
=
add_hierarchy
(
ops
,
NULL
,
must_copy_string
(
DEFAULT_CGROUP_MOUNTPOINT
),
move_ptr
(
base_cgroup
),
CGROUP2_SUPER_MAGIC
);
if
(
!
new
)
return
log_error_errno
(
-
1
,
errno
,
"Failed to add unified cgroup hierarchy"
);
move_ptr
(
base_cgroup
),
CGROUP2_SUPER_MAGIC
);
if
(
ret
)
return
syserrno
(
ret
,
"Failed to add unified cgroup hierarchy"
);
if
(
unprivileged
)
cg_unified_delegate
(
&
new
->
cgroup2_chown
);
cg_unified_delegate
(
&
(
ops
->
unified
)
->
cgroup2_chown
);
if
(
bpf_devices_cgroup_supported
())
new
->
bpf_device_controller
=
1
;
ops
->
unified
->
bpf_device_controller
=
1
;
ops
->
cgroup_layout
=
CGROUP_LAYOUT_UNIFIED
;
ops
->
unified
=
move_ptr
(
new
);
return
CGROUP2_SUPER_MAGIC
;
}
static
int
cg
_init
(
struct
cgroup_ops
*
ops
,
struct
lxc_conf
*
conf
)
static
int
__cgroup
_init
(
struct
cgroup_ops
*
ops
,
struct
lxc_conf
*
conf
)
{
__do_close
int
dfd
=
-
EBADF
;
bool
relative
=
conf
->
cgroup_meta
.
relative
;
int
ret
;
const
char
*
tmp
;
bool
relative
=
conf
->
cgroup_meta
.
relative
;
if
(
ops
->
dfd_mnt_cgroupfs_host
>=
0
)
return
ret_errno
(
EINVAL
);
/*
* I don't see the need for allowing symlinks here. If users want to
* have their hierarchy available in different locations I strongly
* suggest bind-mounts.
*/
dfd
=
open_at
(
-
EBADF
,
DEFAULT_CGROUP_MOUNTPOINT
,
PROTECT_OPATH_DIRECTORY
,
PROTECT_LOOKUP_ABSOLUTE_XDEV
,
0
);
if
(
dfd
<
0
)
return
syserrno
(
-
errno
,
"Failed to open "
DEFAULT_CGROUP_MOUNTPOINT
);
tmp
=
lxc_global_config_value
(
"lxc.cgroup.use"
);
if
(
tmp
)
{
...
...
@@ -3555,14 +3514,23 @@ static int cg_init(struct cgroup_ops *ops, struct lxc_conf *conf)
must_append_string
(
&
ops
->
cgroup_use
,
cur
);
}
ret
=
cg_unified_init
(
ops
,
relative
,
!
lxc_list_empty
(
&
conf
->
id_map
));
if
(
ret
<
0
)
return
-
1
;
/*
* Keep dfd referenced by the cleanup function and actually move the fd
* once we know the initialization succeeded. So if we fail we clean up
* the dfd.
*/
ops
->
dfd_mnt_cgroupfs_host
=
dfd
;
if
(
ret
==
CGROUP2_SUPER_MAGIC
)
return
0
;
if
(
unified_cgroup_fd
(
dfd
))
ret
=
cg_unified_init
(
ops
,
relative
,
!
lxc_list_empty
(
&
conf
->
id_map
));
else
ret
=
cg_hybrid_init
(
ops
,
relative
,
!
lxc_list_empty
(
&
conf
->
id_map
));
if
(
ret
<
0
)
return
syserrno
(
ret
,
"Failed to initialize cgroups"
);
return
cg_hybrid_init
(
ops
,
relative
,
!
lxc_list_empty
(
&
conf
->
id_map
));
/* Transfer ownership to cgroup_ops. */
move_fd
(
dfd
);
return
0
;
}
__cgfsng_ops
static
int
cgfsng_data_init
(
struct
cgroup_ops
*
ops
)
...
...
@@ -3589,8 +3557,9 @@ struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf)
return
ret_set_errno
(
NULL
,
ENOMEM
);
cgfsng_ops
->
cgroup_layout
=
CGROUP_LAYOUT_UNKNOWN
;
cgfsng_ops
->
dfd_mnt_cgroupfs_host
=
-
EBADF
;
if
(
cg
_init
(
cgfsng_ops
,
conf
))
if
(
__cgroup
_init
(
cgfsng_ops
,
conf
))
return
NULL
;
cgfsng_ops
->
data_init
=
cgfsng_data_init
;
...
...
src/lxc/cgroups/cgroup.c
View file @
858f6225
...
...
@@ -33,10 +33,14 @@ struct cgroup_ops *cgroup_init(struct lxc_conf *conf)
if
(
!
cgroup_ops
)
return
log_error_errno
(
NULL
,
errno
,
"Failed to initialize cgroup driver"
);
if
(
!
cgroup_ops
->
hierarchies
)
{
cgroup_exit
(
cgroup_ops
);
return
log_error_errno
(
NULL
,
ENOENT
,
"No cgroup hierarchies found"
);
}
if
(
cgroup_ops
->
data_init
(
cgroup_ops
))
{
cgroup_exit
(
cgroup_ops
);
return
log_error_errno
(
NULL
,
errno
,
"Failed to initialize cgroup data"
);
return
log_error_errno
(
NULL
,
errno
,
"Failed to initialize cgroup data"
);
}
TRACE
(
"Initialized cgroup driver %s"
,
cgroup_ops
->
driver
);
...
...
@@ -68,6 +72,9 @@ void cgroup_exit(struct cgroup_ops *ops)
if
(
ops
->
cgroup2_devices
)
bpf_program_free
(
ops
->
cgroup2_devices
);
if
(
ops
->
dfd_mnt_cgroupfs_host
>=
0
)
close
(
ops
->
dfd_mnt_cgroupfs_host
);
for
(
struct
hierarchy
**
it
=
ops
->
hierarchies
;
it
&&
*
it
;
it
++
)
{
for
(
char
**
p
=
(
*
it
)
->
controllers
;
p
&&
*
p
;
p
++
)
free
(
*
p
);
...
...
@@ -79,12 +86,34 @@ void cgroup_exit(struct cgroup_ops *ops)
free
((
*
it
)
->
mountpoint
);
free
((
*
it
)
->
container_base_path
);
free
((
*
it
)
->
container_full_path
);
free
((
*
it
)
->
monitor_full_path
);
if
((
*
it
)
->
cgfd_con
>=
0
)
close
((
*
it
)
->
cgfd_con
);
{
free
((
*
it
)
->
container_full_path
);
if
((
*
it
)
->
container_full_path
!=
(
*
it
)
->
container_limit_path
)
free
((
*
it
)
->
monitor_full_path
);
}
{
if
((
*
it
)
->
cgfd_limit
>=
0
&&
(
*
it
)
->
cgfd_con
!=
(
*
it
)
->
cgfd_limit
)
close
((
*
it
)
->
cgfd_limit
);
if
((
*
it
)
->
cgfd_con
>=
0
)
close
((
*
it
)
->
cgfd_con
);
}
if
((
*
it
)
->
cgfd_mon
>=
0
)
close
((
*
it
)
->
cgfd_mon
);
{
if
((
*
it
)
->
dfd_base
>=
0
&&
(
*
it
)
->
dfd_mnt
!=
(
*
it
)
->
dfd_base
)
close
((
*
it
)
->
dfd_base
);
if
((
*
it
)
->
dfd_mnt
>=
0
)
close
((
*
it
)
->
dfd_mnt
);
}
free
(
*
it
);
}
free
(
ops
->
hierarchies
);
...
...
@@ -95,21 +124,13 @@ void cgroup_exit(struct cgroup_ops *ops)
}
#define INIT_SCOPE "/init.scope"
void
prune_init_scope
(
char
*
cg
)
char
*
prune_init_scope
(
char
*
cg
)
{
char
*
point
;
if
(
is_empty_string
(
cg
))
return
NULL
;
if
(
!
cg
)
return
;
point
=
cg
+
strlen
(
cg
)
-
strlen
(
INIT_SCOPE
);
if
(
point
<
cg
)
return
;
if
(
strnequal
(
cg
,
INIT_SCOPE
,
STRLITERALLEN
(
INIT_SCOPE
)))
return
cg
+
STRLITERALLEN
(
INIT_SCOPE
);
if
(
strequal
(
point
,
INIT_SCOPE
))
{
if
(
point
==
cg
)
*
(
point
+
1
)
=
'\0'
;
else
*
point
=
'\0'
;
}
return
cg
;
}
src/lxc/cgroups/cgroup.h
View file @
858f6225
...
...
@@ -91,12 +91,24 @@ struct hierarchy {
unsigned
int
bpf_device_controller
:
1
;
unsigned
int
freezer_controller
:
1
;
/*
container cgroup fd
*/
/*
File descriptor for the container's cgroup @container_full_path.
*/
int
cgfd_con
;
/* limiting cgroup fd (may be equal to cgfd_con if not separated) */
/*
* File descriptor for the container's limiting cgroup
* @container_limit_path.
* Will be equal to @cgfd_con if no limiting cgroup has been requested.
*/
int
cgfd_limit
;
/* monitor cgroup fd */
/* File descriptor for the monitor's cgroup @monitor_full_path. */
int
cgfd_mon
;
/* File descriptor for the controller's mountpoint @mountpoint. */
int
dfd_mnt
;
/* File descriptor for the controller's base cgroup path @container_base_path. */
int
dfd_base
;
};
struct
cgroup_ops
{
...
...
@@ -106,6 +118,18 @@ struct cgroup_ops {
/* string constant */
const
char
*
version
;
/*
* File descriptor for the host's cgroupfs mount. On
* CGROUP_LAYOUT_LEGACY or CGROUP_LAYOUT_HYBRID hybrid systems
* @dfd_mnt_cgroupfs_host will be a tmpfs fd and the individual
* controllers will be cgroupfs fds. On CGROUP_LAYOUT_UNIFIED it will
* be a cgroupfs fd itself.
*
* So for CGROUP_LAYOUT_LEGACY or CGROUP_LAYOUT_HYBRID we allow
* mountpoint crossing iff we cross from a tmpfs into a cgroupfs mount.
* */
int
dfd_mnt_cgroupfs_host
;
/* What controllers is the container supposed to use. */
char
**
cgroup_use
;
char
*
cgroup_pattern
;
...
...
@@ -186,7 +210,7 @@ __hidden extern struct cgroup_ops *cgroup_init(struct lxc_conf *conf);
__hidden
extern
void
cgroup_exit
(
struct
cgroup_ops
*
ops
);
define_cleanup_function
(
struct
cgroup_ops
*
,
cgroup_exit
);
__hidden
extern
void
prune_init_scope
(
char
*
cg
);
__hidden
extern
char
*
prune_init_scope
(
char
*
cg
);
__hidden
extern
int
cgroup_attach
(
const
struct
lxc_conf
*
conf
,
const
char
*
name
,
const
char
*
lxcpath
,
pid_t
pid
);
...
...
src/lxc/cgroups/cgroup_utils.c
View file @
858f6225
...
...
@@ -83,22 +83,6 @@ bool test_writeable_v2(char *mountpoint, char *path)
return
(
access
(
cgroup_threads_file
,
W_OK
)
==
0
);
}
int
unified_cgroup_hierarchy
(
void
)
{
int
ret
;
struct
statfs
fs
;
ret
=
statfs
(
DEFAULT_CGROUP_MOUNTPOINT
,
&
fs
);
if
(
ret
<
0
)
return
-
ENOMEDIUM
;
if
(
is_fs_type
(
&
fs
,
CGROUP2_SUPER_MAGIC
))
return
CGROUP2_SUPER_MAGIC
;
return
0
;
}
int
unified_cgroup_fd
(
int
fd
)
{
...
...
src/lxc/cgroups/cgroup_utils.h
View file @
858f6225
...
...
@@ -29,8 +29,6 @@ __hidden extern bool test_writeable_v1(char *mountpoint, char *path);
*/
__hidden
extern
bool
test_writeable_v2
(
char
*
mountpoint
,
char
*
path
);
__hidden
extern
int
unified_cgroup_hierarchy
(
void
);
__hidden
extern
int
unified_cgroup_fd
(
int
fd
);
static
inline
bool
cgns_supported
(
void
)
...
...
src/lxc/file_utils.c
View file @
858f6225
...
...
@@ -31,15 +31,15 @@ int lxc_readat(int dirfd, const char *filename, void *buf, size_t count)
__do_close
int
fd
=
-
EBADF
;
ssize_t
ret
;
fd
=
open
at
(
dirfd
,
filename
,
O_RDONLY
|
O_CLOEXEC
);
fd
=
open
_at
(
dirfd
,
filename
,
PROTECT_OPEN
,
PROTECT_LOOKUP_BENEATH
,
0
);
if
(
fd
<
0
)
return
-
1
;
return
-
errno
;
ret
=
lxc_read_nointr
(
fd
,
buf
,
count
);
if
(
ret
<
0
||
(
size_t
)
ret
!=
count
)
return
-
1
;
if
(
ret
<
0
)
return
-
errno
;
return
0
;
return
ret
;
}
int
lxc_writeat
(
int
dirfd
,
const
char
*
filename
,
const
void
*
buf
,
size_t
count
)
...
...
@@ -630,21 +630,31 @@ int timens_offset_write(clockid_t clk_id, int64_t s_offset, int64_t ns_offset)
bool
exists_dir_at
(
int
dir_fd
,
const
char
*
path
)
{
struct
stat
sb
;
int
ret
;
struct
stat
sb
;
ret
=
fstatat
(
dir_fd
,
path
,
&
sb
,
0
);
if
(
ret
<
0
)
return
false
;
return
S_ISDIR
(
sb
.
st_mode
);
ret
=
S_ISDIR
(
sb
.
st_mode
);
if
(
ret
)
errno
=
EEXIST
;
else
errno
=
ENOTDIR
;
return
ret
;
}
bool
exists_file_at
(
int
dir_fd
,
const
char
*
path
)
{
int
ret
;
struct
stat
sb
;
return
fstatat
(
dir_fd
,
path
,
&
sb
,
0
)
==
0
;
ret
=
fstatat
(
dir_fd
,
path
,
&
sb
,
0
);
if
(
ret
==
0
)
errno
=
EEXIST
;
return
ret
==
0
;
}
int
open_at
(
int
dfd
,
const
char
*
path
,
unsigned
int
o_flags
,
...
...
src/lxc/log.h
View file @
858f6225
...
...
@@ -501,6 +501,20 @@ __lxc_unused static inline void LXC_##LEVEL(struct lxc_log_locinfo* locinfo, \
__internal_ret__; \
})
#define syswarn(__ret__, format, ...) \
({ \
typeof(__ret__) __internal_ret__ = (__ret__); \
SYSWARN(format, ##__VA_ARGS__); \
__internal_ret__; \
})
#define sysdebug(__ret__, format, ...) \
({ \
typeof(__ret__) __internal_ret__ = (__ret__); \
SYSDEBUG(format, ##__VA_ARGS__); \
__internal_ret__; \
})
#define syserrno_set(__ret__, format, ...) \
({ \
typeof(__ret__) __internal_ret__ = (__ret__); \
...
...
src/lxc/string_utils.c
View file @
858f6225
...
...
@@ -813,6 +813,8 @@ char *must_make_path(const char *first, ...)
va_start
(
args
,
first
);
while
((
cur
=
va_arg
(
args
,
char
*
))
!=
NULL
)
{
buf_len
=
strlen
(
cur
);
if
(
buf_len
==
0
)
continue
;
full_len
+=
buf_len
;
if
(
cur
[
0
]
!=
'/'
)
...
...
src/lxc/string_utils.h
View file @
858f6225
...
...
@@ -150,6 +150,11 @@ static inline bool abspath(const char *str)
return
*
str
==
'/'
;
}
static
inline
char
*
deabs
(
char
*
str
)
{
return
str
+
strspn
(
str
,
"/"
);
}
#define strnprintf(buf, buf_size, ...) \
({ \
int __ret_strnprintf; \
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment