Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
154 changes: 152 additions & 2 deletions internal/guest/runtime/hcsv2/uvm.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"os/exec"
"path"
"path/filepath"
"regexp"
"strings"
"sync"
"syscall"
Expand All @@ -40,6 +41,7 @@ import (
"github.com/Microsoft/hcsshim/internal/guest/storage/pmem"
"github.com/Microsoft/hcsshim/internal/guest/storage/scsi"
"github.com/Microsoft/hcsshim/internal/guest/transport"
"github.com/Microsoft/hcsshim/internal/guestpath"
"github.com/Microsoft/hcsshim/internal/log"
"github.com/Microsoft/hcsshim/internal/logfields"
"github.com/Microsoft/hcsshim/internal/oci"
Expand All @@ -54,6 +56,27 @@ import (
// for V2 where the specific message is targeted at the UVM itself.
const UVMContainerID = "00000000-0000-0000-0000-000000000000"

// Prevent path traversal via malformed container / sandbox IDs. Container IDs
// can be either UVMContainerID, or a 64 character hex string. This is also used
// to check that sandbox IDs (which is also used in paths) are valid, which has
// the same format.
const validContainerIDRegexRaw = `[0-9a-fA-F]{64}`

var validContainerIDRegex = regexp.MustCompile("^" + validContainerIDRegexRaw + "$")

// idType just changes the error message
func checkValidContainerID(id string, idType string) error {
if id == UVMContainerID {
return nil
}

if !validContainerIDRegex.MatchString(id) {
return errors.Errorf("invalid %s id: %s (must match %s)", idType, id, validContainerIDRegex.String())
}

return nil
}

// VirtualPod represents a virtual pod that shares a UVM/Sandbox with other pods
type VirtualPod struct {
VirtualSandboxID string
Expand Down Expand Up @@ -245,12 +268,68 @@ func setupSandboxLogDir(sandboxID, virtualSandboxID string) error {
// TODO: unify workload and standalone logic for non-sandbox features (e.g., block devices, huge pages, uVM mounts)
// TODO(go1.24): use [os.Root] instead of `!strings.HasPrefix(<path>, <root>)`

// Returns whether this host has a security policy set, i.e. if it's running
// confidential containers.
func (h *Host) HasSecurityPolicy() bool {
return len(h.securityOptions.PolicyEnforcer.EncodedSecurityPolicy()) > 0
}

// For confidential containers, make sure that the host can't use unexpected
// bundle paths / scratch dir / rootfs
func checkContainerSettings(sandboxID, containerID string, settings *prot.VMHostedContainerSettingsV2) error {
if settings.OCISpecification == nil {
return errors.Errorf("OCISpecification is nil")
}
if settings.OCISpecification.Root == nil {
return errors.Errorf("OCISpecification.Root is nil")
}

// matches with CreateContainer / createLinuxContainerDocument in internal/hcsoci
containerRootInUVM := path.Join(guestpath.LCOWRootPrefixInUVM, containerID)
if settings.OCIBundlePath != containerRootInUVM {
return errors.Errorf("OCIBundlePath %q must equal expected %q",
settings.OCIBundlePath, containerRootInUVM)
}
expectedContainerRootfs := path.Join(containerRootInUVM, guestpath.RootfsPath)
if settings.OCISpecification.Root.Path != expectedContainerRootfs {
return errors.Errorf("OCISpecification.Root.Path %q must equal expected %q",
settings.OCISpecification.Root.Path, expectedContainerRootfs)
}

// matches with MountLCOWLayers
scratchDirPath := settings.ScratchDirPath
expectedScratchDirPathNonShared := path.Join(containerRootInUVM, guestpath.ScratchDir, containerID)
expectedScratchDirPathShared := path.Join(guestpath.LCOWRootPrefixInUVM, sandboxID, guestpath.ScratchDir, containerID)
if scratchDirPath != expectedScratchDirPathNonShared &&
scratchDirPath != expectedScratchDirPathShared {
return errors.Errorf("ScratchDirPath %q must be either %q or %q",
scratchDirPath, expectedScratchDirPathNonShared, expectedScratchDirPathShared)
}

if settings.OCISpecification.Hooks != nil {
return errors.Errorf("OCISpecification.Hooks must be nil.")
}

return nil
}

func (h *Host) CreateContainer(ctx context.Context, id string, settings *prot.VMHostedContainerSettingsV2) (_ *Container, err error) {
criType, isCRI := settings.OCISpecification.Annotations[annotations.KubernetesContainerType]

// Check for virtual pod annotation
virtualPodID, isVirtualPod := settings.OCISpecification.Annotations[annotations.VirtualPodID]

if h.HasSecurityPolicy() {
if err = checkValidContainerID(id, "container"); err != nil {
return nil, err
}
if virtualPodID != "" {
if err = checkValidContainerID(virtualPodID, "virtual pod"); err != nil {
return nil, err
}
}
}

// Special handling for virtual pod sandbox containers:
// The first container in a virtual pod (containerID == virtualPodID) should be treated as a sandbox
// even if the CRI annotation might indicate otherwise due to host-side UVM setup differences
Expand Down Expand Up @@ -393,6 +472,11 @@ func (h *Host) CreateContainer(ctx context.Context, id string, settings *prot.VM
case "container":
sid, ok := settings.OCISpecification.Annotations[annotations.KubernetesSandboxID]
sandboxID = sid
if h.HasSecurityPolicy() {
if err = checkValidContainerID(sid, "sandbox"); err != nil {
return nil, err
}
}
if !ok || sid == "" {
return nil, errors.Errorf("unsupported 'io.kubernetes.cri.sandbox-id': '%s'", sid)
}
Expand All @@ -402,7 +486,7 @@ func (h *Host) CreateContainer(ctx context.Context, id string, settings *prot.VM

// Add SEV device when security policy is not empty, except when privileged annotation is
// set to "true", in which case all UVMs devices are added.
if len(h.securityOptions.PolicyEnforcer.EncodedSecurityPolicy()) > 0 && !oci.ParseAnnotationsBool(ctx,
if h.HasSecurityPolicy() && !oci.ParseAnnotationsBool(ctx,
settings.OCISpecification.Annotations, annotations.LCOWPrivileged, false) {
if err := specGuest.AddDevSev(ctx, settings.OCISpecification); err != nil {
log.G(ctx).WithError(err).Debug("failed to add SEV device")
Expand Down Expand Up @@ -448,6 +532,12 @@ func (h *Host) CreateContainer(ctx context.Context, id string, settings *prot.VM
})
}

if h.HasSecurityPolicy() {
if err = checkContainerSettings(sandboxID, id, settings); err != nil {
return nil, err
}
}

user, groups, umask, err := h.securityOptions.PolicyEnforcer.GetUserInfo(settings.OCISpecification.Process, settings.OCISpecification.Root.Path)
if err != nil {
return nil, err
Expand Down Expand Up @@ -605,6 +695,12 @@ func writeSpecToFile(ctx context.Context, configFile string, spec *specs.Spec) e
}

func (h *Host) modifyHostSettings(ctx context.Context, containerID string, req *guestrequest.ModificationRequest) (retErr error) {
if h.HasSecurityPolicy() {
if err := checkValidContainerID(containerID, "container"); err != nil {
return err
}
}

switch req.ResourceType {
case guestresource.ResourceTypeSCSIDevice:
return modifySCSIDevice(ctx, req.RequestType, req.Settings.(*guestresource.SCSIDevice))
Expand Down Expand Up @@ -689,6 +785,12 @@ func (h *Host) modifyHostSettings(ctx context.Context, containerID string, req *
}

func (h *Host) modifyContainerSettings(ctx context.Context, containerID string, req *guestrequest.ModificationRequest) error {
if h.HasSecurityPolicy() {
if err := checkValidContainerID(containerID, "container"); err != nil {
return err
}
}

c, err := h.GetCreatedContainer(containerID)
if err != nil {
return err
Expand Down Expand Up @@ -1060,6 +1162,9 @@ func modifyMappedVirtualDisk(
if err != nil {
return err
}
if mvd.Filesystem != "" && mvd.Filesystem != "ext4" {
return errors.Errorf("filesystem must be ext4 for read-only scsi mounts")
}
}
}
switch rt {
Expand All @@ -1076,6 +1181,11 @@ func modifyMappedVirtualDisk(
if err != nil {
return errors.Wrapf(err, "mounting scsi device controller %d lun %d onto %s denied by policy", mvd.Controller, mvd.Lun, mvd.MountPath)
}
} else {
err = securityPolicy.EnforceRWDeviceMountPolicy(ctx, mvd.MountPath, mvd.Encrypted, mvd.EnsureFilesystem, mvd.Filesystem)
if err != nil {
return errors.Wrapf(err, "mounting scsi device controller %d lun %d onto %s denied by policy", mvd.Controller, mvd.Lun, mvd.MountPath)
}
}
config := &scsi.Config{
Encrypted: mvd.Encrypted,
Expand All @@ -1094,6 +1204,10 @@ func modifyMappedVirtualDisk(
if err := securityPolicy.EnforceDeviceUnmountPolicy(ctx, mvd.MountPath); err != nil {
return fmt.Errorf("unmounting scsi device at %s denied by policy: %w", mvd.MountPath, err)
}
} else {
if err := securityPolicy.EnforceRWDeviceUnmountPolicy(ctx, mvd.MountPath); err != nil {
return fmt.Errorf("unmounting scsi device at %s denied by policy: %w", mvd.MountPath, err)
}
}
config := &scsi.Config{
Encrypted: mvd.Encrypted,
Expand Down Expand Up @@ -1192,8 +1306,42 @@ func modifyCombinedLayers(
scratchEncrypted bool,
securityPolicy securitypolicy.SecurityPolicyEnforcer,
) (err error) {
isConfidential := len(securityPolicy.EncodedSecurityPolicy()) > 0
containerID := cl.ContainerID

switch rt {
case guestrequest.RequestTypeAdd:
if isConfidential {
if err := checkValidContainerID(containerID, "container"); err != nil {
return err
}

// We check this regardless of what the policy says, as long as we're in
// confidential mode. This matches with checkContainerSettings called for
// container creation request.
expectedContainerRootfs := path.Join(guestpath.LCOWRootPrefixInUVM, containerID, guestpath.RootfsPath)
if cl.ContainerRootPath != expectedContainerRootfs {
return fmt.Errorf("combined layers target %q does not match expected path %q",
cl.ContainerRootPath, expectedContainerRootfs)
}

if cl.ScratchPath != "" {
// At this point, we do not know what the sandbox ID would be yet, so we
// have to allow anything reasonable.
scratchDirRegexStr := fmt.Sprintf(
"^%s/%s/%s/%s$",
guestpath.LCOWRootPrefixInUVM,
validContainerIDRegexRaw,
guestpath.ScratchDir,
containerID,
)
scratchDirRegex := regexp.MustCompile(scratchDirRegexStr)
if !scratchDirRegex.MatchString(cl.ScratchPath) {
return fmt.Errorf("scratch path %q must match regex %q",
cl.ScratchPath, scratchDirRegexStr)
}
}
}
layerPaths := make([]string, len(cl.Layers))
for i, layer := range cl.Layers {
layerPaths[i] = layer.Path
Expand All @@ -1214,12 +1362,14 @@ func modifyCombinedLayers(
}
}

if err := securityPolicy.EnforceOverlayMountPolicy(ctx, cl.ContainerID, layerPaths, cl.ContainerRootPath); err != nil {
if err := securityPolicy.EnforceOverlayMountPolicy(ctx, containerID, layerPaths, cl.ContainerRootPath); err != nil {
return fmt.Errorf("overlay creation denied by policy: %w", err)
}

return overlay.MountLayer(ctx, layerPaths, upperdirPath, workdirPath, cl.ContainerRootPath, readonly)
case guestrequest.RequestTypeRemove:
// cl.ContainerID is not set on remove requests, but rego checks that we can
// only umount previously mounted targets anyway
if err := securityPolicy.EnforceOverlayUnmountPolicy(ctx, cl.ContainerRootPath); err != nil {
return errors.Wrap(err, "overlay removal denied by policy")
}
Expand Down
14 changes: 8 additions & 6 deletions internal/guestpath/paths.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,17 @@ const (
// LCOWMountPathPrefixFmt is the path format in the LCOW UVM where
// non-global mounts, such as Plan9 mounts are added
LCOWMountPathPrefixFmt = "/mounts/m%d"
// LCOWGlobalMountPrefixFmt is the path format in the LCOW UVM where global
// mounts are added
LCOWGlobalMountPrefixFmt = "/run/mounts/m%d"
// LCOWGlobalScsiMountPrefixFmt is the path format in the LCOW UVM where
// global desk mounts are added
LCOWGlobalScsiMountPrefixFmt = "/run/mounts/scsi/m%d"
// LCOWGlobalDriverPrefixFmt is the path format in the LCOW UVM where drivers
// are mounted as read/write
LCOWGlobalDriverPrefixFmt = "/run/drivers/%s"
// WCOWGlobalMountPrefixFmt is the path prefix format in the WCOW UVM where
// mounts are added
WCOWGlobalMountPrefixFmt = "C:\\mounts\\m%d"
// WCOWGlobalScsiMountPrefixFmt is the path prefix format in the WCOW UVM
// where global desk mounts are added
WCOWGlobalScsiMountPrefixFmt = `c:\mounts\scsi\m%d`
// RootfsPath is part of the container's rootfs path
RootfsPath = "rootfs"
// ScratchDir is the name of the directory used for overlay upper and work
ScratchDir = "scratch"
)
4 changes: 3 additions & 1 deletion internal/layers/lcow.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,9 @@ func MountLCOWLayers(
// handles the case where we want to share a scratch disk for multiple containers instead
// of mounting a new one. Pass a unique value for `ScratchPath` to avoid container upper and
// work directories colliding in the UVM.
containerScratchPathInUVM := ospath.Join("linux", scsiMount.GuestPath(), "scratch", containerID)
// Note that in the shared scratch case, AddVirtualDisk above is a no-op and
// will return the existing mount.
containerScratchPathInUVM := ospath.Join("linux", scsiMount.GuestPath(), guestpath.ScratchDir, containerID)

defer func() {
if err != nil {
Expand Down
5 changes: 3 additions & 2 deletions internal/uvm/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (

"github.com/Microsoft/hcsshim/internal/gcs"
"github.com/Microsoft/hcsshim/internal/gcs/prot"
"github.com/Microsoft/hcsshim/internal/guestpath"
"github.com/Microsoft/hcsshim/internal/hcs"
"github.com/Microsoft/hcsshim/internal/hcs/schema1"
hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2"
Expand Down Expand Up @@ -357,9 +358,9 @@ func (uvm *UtilityVM) Start(ctx context.Context) (err error) {
} else {
gb = scsi.NewHCSGuestBackend(uvm.hcsSystem, uvm.OS())
}
guestMountFmt := `c:\mounts\scsi\m%d`
guestMountFmt := guestpath.WCOWGlobalScsiMountPrefixFmt
if uvm.OS() == "linux" {
guestMountFmt = "/run/mounts/scsi/m%d"
guestMountFmt = guestpath.LCOWGlobalScsiMountPrefixFmt
}
mgr, err := scsi.NewManager(
scsi.NewHCSHostBackend(uvm.hcsSystem),
Expand Down
38 changes: 20 additions & 18 deletions pkg/securitypolicy/api.rego
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,24 @@ package api
version := "@@API_VERSION@@"

enforcement_points := {
"mount_device": {"introducedVersion": "0.1.0", "default_results": {"allowed": false}},
"mount_overlay": {"introducedVersion": "0.1.0", "default_results": {"allowed": false}},
"mount_cims": {"introducedVersion": "0.11.0", "default_results": {"allowed": false}},
"create_container": {"introducedVersion": "0.1.0", "default_results": {"allowed": false, "env_list": null, "allow_stdio_access": false}},
"unmount_device": {"introducedVersion": "0.2.0", "default_results": {"allowed": true}},
"unmount_overlay": {"introducedVersion": "0.6.0", "default_results": {"allowed": true}},
"exec_in_container": {"introducedVersion": "0.2.0", "default_results": {"allowed": true, "env_list": null}},
"exec_external": {"introducedVersion": "0.3.0", "default_results": {"allowed": true, "env_list": null, "allow_stdio_access": false}},
"shutdown_container": {"introducedVersion": "0.4.0", "default_results": {"allowed": true}},
"signal_container_process": {"introducedVersion": "0.5.0", "default_results": {"allowed": true}},
"plan9_mount": {"introducedVersion": "0.6.0", "default_results": {"allowed": true}},
"plan9_unmount": {"introducedVersion": "0.6.0", "default_results": {"allowed": true}},
"get_properties": {"introducedVersion": "0.7.0", "default_results": {"allowed": true}},
"dump_stacks": {"introducedVersion": "0.7.0", "default_results": {"allowed": true}},
"runtime_logging": {"introducedVersion": "0.8.0", "default_results": {"allowed": true}},
"load_fragment": {"introducedVersion": "0.9.0", "default_results": {"allowed": false, "add_module": false}},
"scratch_mount": {"introducedVersion": "0.10.0", "default_results": {"allowed": true}},
"scratch_unmount": {"introducedVersion": "0.10.0", "default_results": {"allowed": true}},
"mount_device": {"introducedVersion": "0.1.0", "default_results": {"allowed": false}, "use_framework": false},
"rw_mount_device": {"introducedVersion": "0.11.0", "default_results": {}, "use_framework": true},
"mount_overlay": {"introducedVersion": "0.1.0", "default_results": {"allowed": false}, "use_framework": false},
"mount_cims": {"introducedVersion": "0.11.0", "default_results": {"allowed": false}, "use_framework": false},
"create_container": {"introducedVersion": "0.1.0", "default_results": {"allowed": false, "env_list": null, "allow_stdio_access": false}, "use_framework": false},
"unmount_device": {"introducedVersion": "0.2.0", "default_results": {"allowed": true}, "use_framework": false},
"rw_unmount_device": {"introducedVersion": "0.11.0", "default_results": {}, "use_framework": true},
"unmount_overlay": {"introducedVersion": "0.6.0", "default_results": {"allowed": true}, "use_framework": false},
"exec_in_container": {"introducedVersion": "0.2.0", "default_results": {"allowed": true, "env_list": null}, "use_framework": false},
"exec_external": {"introducedVersion": "0.3.0", "default_results": {"allowed": true, "env_list": null, "allow_stdio_access": false}, "use_framework": false},
"shutdown_container": {"introducedVersion": "0.4.0", "default_results": {"allowed": true}, "use_framework": false},
"signal_container_process": {"introducedVersion": "0.5.0", "default_results": {"allowed": true}, "use_framework": false},
"plan9_mount": {"introducedVersion": "0.6.0", "default_results": {"allowed": true}, "use_framework": false},
"plan9_unmount": {"introducedVersion": "0.6.0", "default_results": {"allowed": true}, "use_framework": false},
"get_properties": {"introducedVersion": "0.7.0", "default_results": {"allowed": true}, "use_framework": false},
"dump_stacks": {"introducedVersion": "0.7.0", "default_results": {"allowed": true}, "use_framework": false},
"runtime_logging": {"introducedVersion": "0.8.0", "default_results": {"allowed": true}, "use_framework": false},
"load_fragment": {"introducedVersion": "0.9.0", "default_results": {"allowed": false, "add_module": false}, "use_framework": false},
"scratch_mount": {"introducedVersion": "0.10.0", "default_results": {"allowed": true}, "use_framework": false},
"scratch_unmount": {"introducedVersion": "0.10.0", "default_results": {"allowed": true}, "use_framework": false},
}
8 changes: 4 additions & 4 deletions pkg/securitypolicy/api_test.rego
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ package api
version := "0.0.2"

enforcement_points := {
"__fixture_for_future_test__": {"introducedVersion": "100.0.0", "default_results": {"allowed": true}},
"__fixture_for_allowed_test_true__": {"introducedVersion": "0.0.2", "default_results": {"allowed": true}},
"__fixture_for_allowed_test_false__": {"introducedVersion": "0.0.2", "default_results": {"allowed": false}},
"__fixture_for_allowed_extra__": {"introducedVersion": "0.0.1", "default_results": {"allowed": false, "__test__": "test"}}
"__fixture_for_future_test__": {"introducedVersion": "100.0.0", "default_results": {"allowed": true}, "use_framework": false},
"__fixture_for_allowed_test_true__": {"introducedVersion": "0.0.2", "default_results": {"allowed": true}, "use_framework": false},
"__fixture_for_allowed_test_false__": {"introducedVersion": "0.0.2", "default_results": {"allowed": false}, "use_framework": false},
"__fixture_for_allowed_extra__": {"introducedVersion": "0.0.1", "default_results": {"allowed": false, "__test__": "test"}, "use_framework": false}
}
Loading