Skip to content

Commit 248b1a6

Browse files
authored
Merge pull request containerd#12952 from chrishenzie/mount-option-removal
Preserve cgroup mount options for privileged containers
2 parents b7ddef2 + 0eef29a commit 248b1a6

7 files changed

Lines changed: 240 additions & 9 deletions

File tree

Vagrantfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,7 @@ EOF
278278
'GOTESTSUM_JSONFILE': ENV['GOTESTSUM_JSONFILE'],
279279
'GITHUB_WORKSPACE': '',
280280
'CGROUP_DRIVER': ENV['CGROUP_DRIVER'],
281+
'RUNC_FLAVOR': ENV['RUNC_FLAVOR'] || "runc",
281282
}
282283
sh.inline = <<~SHELL
283284
#!/usr/bin/env bash
@@ -306,6 +307,7 @@ EOF
306307
'GOTEST': ENV['GOTEST'] || "go test",
307308
'REPORT_DIR': ENV['REPORT_DIR'],
308309
'CGROUP_DRIVER': ENV['CGROUP_DRIVER'],
310+
'RUNC_FLAVOR': ENV['RUNC_FLAVOR'] || "runc",
309311
}
310312
sh.inline = <<~SHELL
311313
#!/usr/bin/env bash
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
/*
2+
Copyright The containerd Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package integration
18+
19+
import (
20+
"os"
21+
"strings"
22+
"testing"
23+
24+
"github.com/containerd/cgroups/v3"
25+
"github.com/containerd/containerd/v2/core/mount"
26+
"github.com/containerd/containerd/v2/integration/images"
27+
"github.com/stretchr/testify/assert"
28+
"github.com/stretchr/testify/require"
29+
)
30+
31+
func TestPrivilegedContainerCgroupMountOptions(t *testing.T) {
32+
if f := os.Getenv("RUNC_FLAVOR"); f == "crun" {
33+
t.Skip("Skipping until crun supports cgroup v2 mount options (https://github.com/containers/crun/pull/2040)")
34+
}
35+
if cgroups.Mode() != cgroups.Unified {
36+
t.Skip("Requires cgroup v2")
37+
}
38+
39+
hostMountBefore, err := mount.Lookup("/sys/fs/cgroup")
40+
require.NoError(t, err)
41+
42+
if !strings.Contains(hostMountBefore.VFSOptions, "nsdelegate") && !strings.Contains(hostMountBefore.VFSOptions, "memory_recursiveprot") {
43+
t.Skip("requires host cgroup mount to have nsdelegate or memory_recursiveprot")
44+
}
45+
46+
testImage := images.Get(images.BusyBox)
47+
EnsureImageExists(t, testImage)
48+
49+
t.Log("Create a sandbox with privileged=true")
50+
sb, sbConfig := PodSandboxConfigWithCleanup(t, "sandbox", "privileged-cgroup-mount-test", WithPodSecurityContext(true))
51+
52+
t.Log("Create a container with privileged=true")
53+
cnConfig := ContainerConfig("container", testImage, WithCommand("sh", "-c", "sleep 1d"), WithSecurityContext(true))
54+
cn, err := runtimeService.CreateContainer(sb, cnConfig, sbConfig)
55+
require.NoError(t, err)
56+
t.Cleanup(func() {
57+
if err := runtimeService.RemoveContainer(cn); err != nil {
58+
t.Logf("failed to remove container %s: %v", cn, err)
59+
}
60+
})
61+
62+
t.Log("Start the container")
63+
require.NoError(t, runtimeService.StartContainer(cn))
64+
t.Cleanup(func() {
65+
if err := runtimeService.StopContainer(cn, 10); err != nil {
66+
t.Logf("failed to stop container %s: %v", cn, err)
67+
}
68+
})
69+
70+
hostMountAfter, err := mount.Lookup("/sys/fs/cgroup")
71+
require.NoError(t, err)
72+
73+
if strings.Contains(hostMountBefore.VFSOptions, "nsdelegate") {
74+
assert.Contains(t, hostMountAfter.VFSOptions, "nsdelegate", "nsdelegate should be preserved on the host cgroup mount")
75+
}
76+
if strings.Contains(hostMountBefore.VFSOptions, "memory_recursiveprot") {
77+
assert.Contains(t, hostMountAfter.VFSOptions, "memory_recursiveprot", "memory_recursiveprot should be preserved on the host cgroup mount")
78+
}
79+
}

internal/cri/opts/spec_linux_opts.go

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
"maps"
2424
"os"
2525
"path/filepath"
26+
"slices"
2627
"sort"
2728
"strconv"
2829
"strings"
@@ -71,11 +72,35 @@ func withMounts(osi osinterface.OS, config *runtime.ContainerConfig, extra []*ru
7172
if cgroupWritable {
7273
mode = "rw"
7374
}
75+
76+
cgroupOptions := []string{"nosuid", "noexec", "nodev", "relatime", mode}
77+
78+
hasCgroupNS := false
79+
if s.Linux != nil {
80+
hasCgroupNS = slices.ContainsFunc(s.Linux.Namespaces, func(ns runtimespec.LinuxNamespace) bool {
81+
return ns.Type == runtimespec.CgroupNamespace
82+
})
83+
}
84+
85+
// If a container shares the host's cgroup namespace, mounting cgroup2
86+
// inside the container applies the new mount options to the single shared
87+
// cgroup2 VFS superblock. Therefore, explicitly copy these options from
88+
// the host's /sys/fs/cgroup to avoid being stripped.
89+
if !hasCgroupNS {
90+
if mountInfo, err := osi.LookupMount("/sys/fs/cgroup"); err == nil {
91+
for opt := range strings.SplitSeq(mountInfo.VFSOptions, ",") {
92+
if opt == "nsdelegate" || opt == "memory_recursiveprot" {
93+
cgroupOptions = append(cgroupOptions, opt)
94+
}
95+
}
96+
}
97+
}
98+
7499
s.Mounts = append(s.Mounts, runtimespec.Mount{
75100
Source: "cgroup",
76101
Destination: "/sys/fs/cgroup",
77102
Type: "cgroup",
78-
Options: []string{"nosuid", "noexec", "nodev", "relatime", mode},
103+
Options: cgroupOptions,
79104
})
80105

81106
// Copy all mounts from default mounts, except for

internal/cri/opts/spec_linux_test.go

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,15 @@
1717
package opts
1818

1919
import (
20+
"context"
2021
"testing"
2122

23+
"github.com/containerd/containerd/v2/core/mount"
24+
ostesting "github.com/containerd/containerd/v2/pkg/os/testing"
25+
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
2226
"github.com/stretchr/testify/assert"
2327
"github.com/stretchr/testify/require"
28+
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
2429
)
2530

2631
func TestMergeGids(t *testing.T) {
@@ -45,3 +50,73 @@ func TestRestrictOOMScoreAdj(t *testing.T) {
4550
require.NoError(t, err)
4651
assert.Equal(t, got, current+1)
4752
}
53+
54+
func TestWithMountsCgroupNamespaceOptions(t *testing.T) {
55+
tests := []struct {
56+
name string
57+
hasCgroupNS bool
58+
hostMountOpts string
59+
expectedOpts []string
60+
}{
61+
{
62+
name: "has cgroupns, should use default options",
63+
hasCgroupNS: true,
64+
hostMountOpts: "rw,nosuid,nodev,noexec,relatime,nsdelegate,memory_recursiveprot",
65+
expectedOpts: []string{"nosuid", "noexec", "nodev", "relatime", "ro"},
66+
},
67+
{
68+
name: "no cgroupns, with host options present",
69+
hasCgroupNS: false,
70+
hostMountOpts: "rw,nosuid,nodev,noexec,relatime,nsdelegate,memory_recursiveprot",
71+
expectedOpts: []string{"nosuid", "noexec", "nodev", "relatime", "ro", "nsdelegate", "memory_recursiveprot"},
72+
},
73+
{
74+
name: "no cgroupns, with host missing nsdelegate",
75+
hasCgroupNS: false,
76+
hostMountOpts: "rw,nosuid,nodev,noexec,relatime,memory_recursiveprot",
77+
expectedOpts: []string{"nosuid", "noexec", "nodev", "relatime", "ro", "memory_recursiveprot"},
78+
},
79+
{
80+
name: "no cgroupns, with host missing all extra options",
81+
hasCgroupNS: false,
82+
hostMountOpts: "rw,nosuid,nodev,noexec,relatime",
83+
expectedOpts: []string{"nosuid", "noexec", "nodev", "relatime", "ro"},
84+
},
85+
}
86+
87+
for _, tt := range tests {
88+
t.Run(tt.name, func(t *testing.T) {
89+
fakeOS := ostesting.NewFakeOS()
90+
fakeOS.LookupMountFn = func(path string) (mount.Info, error) {
91+
if path == "/sys/fs/cgroup" {
92+
return mount.Info{VFSOptions: tt.hostMountOpts}, nil
93+
}
94+
return mount.Info{}, nil
95+
}
96+
97+
config := &runtime.ContainerConfig{
98+
Linux: &runtime.LinuxContainerConfig{},
99+
}
100+
101+
spec := &runtimespec.Spec{}
102+
if tt.hasCgroupNS {
103+
spec.Linux = &runtimespec.Linux{Namespaces: []runtimespec.LinuxNamespace{{Type: runtimespec.CgroupNamespace}}}
104+
}
105+
106+
opt := withMounts(fakeOS, config, nil, "", nil, false)
107+
err := opt(context.Background(), nil, nil, spec)
108+
require.NoError(t, err)
109+
110+
var cgroupMount *runtimespec.Mount
111+
for _, m := range spec.Mounts {
112+
if m.Destination == "/sys/fs/cgroup" {
113+
cgroupMount = &m
114+
break
115+
}
116+
}
117+
118+
require.NotNil(t, cgroupMount)
119+
assert.ElementsMatch(t, tt.expectedOpts, cgroupMount.Options)
120+
})
121+
}
122+
}

internal/cri/server/container_create.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -792,6 +792,14 @@ func (c *criService) buildLinuxSpec(
792792
}
793793
}()
794794

795+
// cgroupns is used for hiding /sys/fs/cgroup from containers.
796+
// For compatibility, cgroupns is not used when running in cgroup v1 mode or in privileged.
797+
// https://github.com/containers/libpod/issues/4363
798+
// https://github.com/kubernetes/enhancements/blob/0e409b47497e398b369c281074485c8de129694f/keps/sig-node/20191118-cgroups-v2.md#cgroup-namespace
799+
if isUnifiedCgroupsMode() && !securityContext.GetPrivileged() {
800+
specOpts = append(specOpts, oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.CgroupNamespace}))
801+
}
802+
795803
var ociSpecOpts oci.SpecOpts
796804
if ociRuntime.CgroupWritable {
797805
ociSpecOpts = customopts.WithMountsCgroupWritable(c.os, config, extraMounts, mountLabel, runtimeHandler)
@@ -930,14 +938,6 @@ func (c *criService) buildLinuxSpec(
930938
annotations.DefaultCRIAnnotations(sandboxID, containerName, imageName, sandboxConfig, false)...,
931939
)
932940

933-
// cgroupns is used for hiding /sys/fs/cgroup from containers.
934-
// For compatibility, cgroupns is not used when running in cgroup v1 mode or in privileged.
935-
// https://github.com/containers/libpod/issues/4363
936-
// https://github.com/kubernetes/enhancements/blob/0e409b47497e398b369c281074485c8de129694f/keps/sig-node/20191118-cgroups-v2.md#cgroup-namespace
937-
if isUnifiedCgroupsMode() && !securityContext.GetPrivileged() {
938-
specOpts = append(specOpts, oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.CgroupNamespace}))
939-
}
940-
941941
return specOpts, nil
942942
}
943943

internal/cri/server/container_create_linux_test.go

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -487,6 +487,52 @@ func TestPrivilegedBindMount(t *testing.T) {
487487
}
488488
}
489489

490+
func TestCgroupNamespace(t *testing.T) {
491+
testPid := uint32(1234)
492+
c := newTestCRIService()
493+
testSandboxID := "sandbox-id"
494+
testContainerName := "container-name"
495+
containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData()
496+
ociRuntime := config.Runtime{}
497+
498+
tests := []struct {
499+
desc string
500+
privileged bool
501+
expectCgroupNamespace bool
502+
}{
503+
{
504+
desc: "non-privileged container should get cgroup namespace",
505+
privileged: false,
506+
expectCgroupNamespace: true,
507+
},
508+
{
509+
desc: "privileged container should not get cgroup namespace",
510+
privileged: true,
511+
expectCgroupNamespace: false,
512+
},
513+
}
514+
515+
for _, tt := range tests {
516+
t.Run(tt.desc, func(t *testing.T) {
517+
containerConfig.Linux.SecurityContext.Privileged = tt.privileged
518+
sandboxConfig.Linux.SecurityContext.Privileged = tt.privileged
519+
520+
spec, err := c.buildContainerSpec(currentPlatform, t.Name(), testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime, nil)
521+
assert.NoError(t, err)
522+
523+
hasCgroupNS := false
524+
for _, ns := range spec.Linux.Namespaces {
525+
if ns.Type == runtimespec.CgroupNamespace {
526+
hasCgroupNS = true
527+
break
528+
}
529+
}
530+
531+
assert.Equal(t, tt.expectCgroupNamespace, hasCgroupNS)
532+
})
533+
}
534+
}
535+
490536
func TestMountPropagation(t *testing.T) {
491537

492538
sharedLookupMountFn := func(string) (mount.Info, error) {

script/test/cri-integration.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@ CMD=""
4646
if [ -n "${sudo}" ]; then
4747
CMD+="${sudo} "
4848
fi
49+
CMD+="env "
50+
if [ -n "${RUNC_FLAVOR:-}" ]; then
51+
CMD+="RUNC_FLAVOR=${RUNC_FLAVOR} "
52+
fi
4953
CMD+="${PWD}/bin/cri-integration.test"
5054

5155
${CMD} --test.run="${FOCUS}" --test.v \

0 commit comments

Comments
 (0)