From 9635791cfefb15ffa1f753679529e744153db39f Mon Sep 17 00:00:00 2001 From: Danny Canter Date: Fri, 9 Jan 2026 10:04:33 -0800 Subject: [PATCH] Allow filtering container statistics It's possible a user doesn't want the full stats list, and only wants cpu/mem etc. This plumbs through the ability to filter to only what is requested. This, while we're already here, adds in memory.event output to the stats list. For that specifically, I think eventually we may want a streaming variant of this so you can get alerted of changes in the file immediately instead of polling/one off reads, but this is useful for now. --- .../ContainerStatistics.swift | 70 +++- Sources/Containerization/LinuxContainer.swift | 4 +- Sources/Containerization/LinuxPod.swift | 4 +- .../SandboxContext/SandboxContext.grpc.swift | 100 ++++++ .../SandboxContext/SandboxContext.pb.swift | 318 ++++++++++++++++++ .../SandboxContext/SandboxContext.proto | 51 +++ .../VirtualMachineAgent.swift | 4 +- Sources/Containerization/Vminitd.swift | 134 +++++--- Sources/Integration/ContainerTests.swift | 76 ++++- Sources/Integration/PodTests.swift | 69 +++- Sources/Integration/Suite.swift | 2 + .../Sources/vminitd/ManagedContainer.swift | 4 + vminitd/Sources/vminitd/Server+GRPC.swift | 194 ++++++++--- 13 files changed, 903 insertions(+), 127 deletions(-) diff --git a/Sources/Containerization/ContainerStatistics.swift b/Sources/Containerization/ContainerStatistics.swift index 9ae5ef36..ada91e9e 100644 --- a/Sources/Containerization/ContainerStatistics.swift +++ b/Sources/Containerization/ContainerStatistics.swift @@ -17,19 +17,21 @@ /// Statistics for a container. public struct ContainerStatistics: Sendable { public var id: String - public var process: ProcessStatistics - public var memory: MemoryStatistics - public var cpu: CPUStatistics - public var blockIO: BlockIOStatistics - public var networks: [NetworkStatistics] + public var process: ProcessStatistics? + public var memory: MemoryStatistics? + public var cpu: CPUStatistics? + public var blockIO: BlockIOStatistics? + public var networks: [NetworkStatistics]? + public var memoryEvents: MemoryEventStatistics? public init( id: String, - process: ProcessStatistics, - memory: MemoryStatistics, - cpu: CPUStatistics, - blockIO: BlockIOStatistics, - networks: [NetworkStatistics] + process: ProcessStatistics? = nil, + memory: MemoryStatistics? = nil, + cpu: CPUStatistics? = nil, + blockIO: BlockIOStatistics? = nil, + networks: [NetworkStatistics]? = nil, + memoryEvents: MemoryEventStatistics? = nil ) { self.id = id self.process = process @@ -37,6 +39,7 @@ public struct ContainerStatistics: Sendable { self.cpu = cpu self.blockIO = blockIO self.networks = networks + self.memoryEvents = memoryEvents } /// Process statistics for a container. @@ -174,4 +177,51 @@ public struct ContainerStatistics: Sendable { self.transmittedErrors = transmittedErrors } } + + /// Memory event counters from cgroup2's memory.events file. + public struct MemoryEventStatistics: Sendable { + /// Number of times the cgroup was reclaimed due to low memory. + public var low: UInt64 + /// Number of times the cgroup exceeded its high memory limit. + public var high: UInt64 + /// Number of times the cgroup hit its max memory limit. + public var max: UInt64 + /// Number of times the cgroup triggered OOM. + public var oom: UInt64 + /// Number of processes killed by OOM killer. + public var oomKill: UInt64 + + public init(low: UInt64, high: UInt64, max: UInt64, oom: UInt64, oomKill: UInt64) { + self.low = low + self.high = high + self.max = max + self.oom = oom + self.oomKill = oomKill + } + } +} + +/// Categories of statistics that can be requested. +public struct StatCategory: OptionSet, Sendable { + public let rawValue: Int + + public init(rawValue: Int) { + self.rawValue = rawValue + } + + /// Process statistics (pids.current, pids.max). + public static let process = StatCategory(rawValue: 1 << 0) + /// Memory usage statistics. + public static let memory = StatCategory(rawValue: 1 << 1) + /// CPU usage statistics. + public static let cpu = StatCategory(rawValue: 1 << 2) + /// Block I/O statistics. + public static let blockIO = StatCategory(rawValue: 1 << 3) + /// Network interface statistics. + public static let network = StatCategory(rawValue: 1 << 4) + /// Memory event counters (OOM kills, pressure events, etc.). + public static let memoryEvents = StatCategory(rawValue: 1 << 5) + + /// All available statistics categories. + public static let all: StatCategory = [.process, .memory, .cpu, .blockIO, .network, .memoryEvents] } diff --git a/Sources/Containerization/LinuxContainer.swift b/Sources/Containerization/LinuxContainer.swift index 206d4236..1ca79f53 100644 --- a/Sources/Containerization/LinuxContainer.swift +++ b/Sources/Containerization/LinuxContainer.swift @@ -778,12 +778,12 @@ extension LinuxContainer { } /// Get statistics for the container. - public func statistics() async throws -> ContainerStatistics { + public func statistics(categories: StatCategory = .all) async throws -> ContainerStatistics { try await self.state.withLock { let state = try $0.startedState("statistics") let stats = try await state.vm.withAgent { agent in - let allStats = try await agent.containerStatistics(containerIDs: [self.id]) + let allStats = try await agent.containerStatistics(containerIDs: [self.id], categories: categories) guard let containerStats = allStats.first else { throw ContainerizationError( .notFound, diff --git a/Sources/Containerization/LinuxPod.swift b/Sources/Containerization/LinuxPod.swift index 1459245c..d607cb39 100644 --- a/Sources/Containerization/LinuxPod.swift +++ b/Sources/Containerization/LinuxPod.swift @@ -727,7 +727,7 @@ extension LinuxPod { } /// Get statistics for containers in the pod. - public func statistics(containerIDs: [String]? = nil) async throws -> [ContainerStatistics] { + public func statistics(containerIDs: [String]? = nil, categories: StatCategory = .all) async throws -> [ContainerStatistics] { let (createdState, ids) = try await self.state.withLock { state in let createdState = try state.phase.createdState("statistics") let ids = containerIDs ?? Array(state.containers.keys) @@ -735,7 +735,7 @@ extension LinuxPod { } let stats = try await createdState.vm.withAgent { agent in - try await agent.containerStatistics(containerIDs: ids) + try await agent.containerStatistics(containerIDs: ids, categories: categories) } return stats diff --git a/Sources/Containerization/SandboxContext/SandboxContext.grpc.swift b/Sources/Containerization/SandboxContext/SandboxContext.grpc.swift index 39b2bd69..579254f0 100644 --- a/Sources/Containerization/SandboxContext/SandboxContext.grpc.swift +++ b/Sources/Containerization/SandboxContext/SandboxContext.grpc.swift @@ -129,6 +129,11 @@ public protocol Com_Apple_Containerization_Sandbox_V3_SandboxContextClientProtoc callOptions: CallOptions? ) -> UnaryCall + func getMemoryEvents( + _ request: Com_Apple_Containerization_Sandbox_V3_GetMemoryEventsRequest, + callOptions: CallOptions? + ) -> UnaryCall + func proxyVsock( _ request: Com_Apple_Containerization_Sandbox_V3_ProxyVsockRequest, callOptions: CallOptions? @@ -531,6 +536,24 @@ extension Com_Apple_Containerization_Sandbox_V3_SandboxContextClientProtocol { ) } + /// Get memory events for a container (OOM kills, memory pressure, etc.). + /// + /// - Parameters: + /// - request: Request to send to GetMemoryEvents. + /// - callOptions: Call options. + /// - Returns: A `UnaryCall` with futures for the metadata, status and response. + public func getMemoryEvents( + _ request: Com_Apple_Containerization_Sandbox_V3_GetMemoryEventsRequest, + callOptions: CallOptions? = nil + ) -> UnaryCall { + return self.makeUnaryCall( + path: Com_Apple_Containerization_Sandbox_V3_SandboxContextClientMetadata.Methods.getMemoryEvents.path, + request: request, + callOptions: callOptions ?? self.defaultCallOptions, + interceptors: self.interceptors?.makeGetMemoryEventsInterceptors() ?? [] + ) + } + /// Proxy a vsock port to a unix domain socket in the guest, or vice versa. /// /// - Parameters: @@ -869,6 +892,11 @@ public protocol Com_Apple_Containerization_Sandbox_V3_SandboxContextAsyncClientP callOptions: CallOptions? ) -> GRPCAsyncUnaryCall + func makeGetMemoryEventsCall( + _ request: Com_Apple_Containerization_Sandbox_V3_GetMemoryEventsRequest, + callOptions: CallOptions? + ) -> GRPCAsyncUnaryCall + func makeProxyVsockCall( _ request: Com_Apple_Containerization_Sandbox_V3_ProxyVsockRequest, callOptions: CallOptions? @@ -1156,6 +1184,18 @@ extension Com_Apple_Containerization_Sandbox_V3_SandboxContextAsyncClientProtoco ) } + public func makeGetMemoryEventsCall( + _ request: Com_Apple_Containerization_Sandbox_V3_GetMemoryEventsRequest, + callOptions: CallOptions? = nil + ) -> GRPCAsyncUnaryCall { + return self.makeAsyncUnaryCall( + path: Com_Apple_Containerization_Sandbox_V3_SandboxContextClientMetadata.Methods.getMemoryEvents.path, + request: request, + callOptions: callOptions ?? self.defaultCallOptions, + interceptors: self.interceptors?.makeGetMemoryEventsInterceptors() ?? [] + ) + } + public func makeProxyVsockCall( _ request: Com_Apple_Containerization_Sandbox_V3_ProxyVsockRequest, callOptions: CallOptions? = nil @@ -1519,6 +1559,18 @@ extension Com_Apple_Containerization_Sandbox_V3_SandboxContextAsyncClientProtoco ) } + public func getMemoryEvents( + _ request: Com_Apple_Containerization_Sandbox_V3_GetMemoryEventsRequest, + callOptions: CallOptions? = nil + ) async throws -> Com_Apple_Containerization_Sandbox_V3_GetMemoryEventsResponse { + return try await self.performAsyncUnaryCall( + path: Com_Apple_Containerization_Sandbox_V3_SandboxContextClientMetadata.Methods.getMemoryEvents.path, + request: request, + callOptions: callOptions ?? self.defaultCallOptions, + interceptors: self.interceptors?.makeGetMemoryEventsInterceptors() ?? [] + ) + } + public func proxyVsock( _ request: Com_Apple_Containerization_Sandbox_V3_ProxyVsockRequest, callOptions: CallOptions? = nil @@ -1716,6 +1768,9 @@ public protocol Com_Apple_Containerization_Sandbox_V3_SandboxContextClientInterc /// - Returns: Interceptors to use when invoking 'containerStatistics'. func makeContainerStatisticsInterceptors() -> [ClientInterceptor] + /// - Returns: Interceptors to use when invoking 'getMemoryEvents'. + func makeGetMemoryEventsInterceptors() -> [ClientInterceptor] + /// - Returns: Interceptors to use when invoking 'proxyVsock'. func makeProxyVsockInterceptors() -> [ClientInterceptor] @@ -1771,6 +1826,7 @@ public enum Com_Apple_Containerization_Sandbox_V3_SandboxContextClientMetadata { Com_Apple_Containerization_Sandbox_V3_SandboxContextClientMetadata.Methods.resizeProcess, Com_Apple_Containerization_Sandbox_V3_SandboxContextClientMetadata.Methods.closeProcessStdin, Com_Apple_Containerization_Sandbox_V3_SandboxContextClientMetadata.Methods.containerStatistics, + Com_Apple_Containerization_Sandbox_V3_SandboxContextClientMetadata.Methods.getMemoryEvents, Com_Apple_Containerization_Sandbox_V3_SandboxContextClientMetadata.Methods.proxyVsock, Com_Apple_Containerization_Sandbox_V3_SandboxContextClientMetadata.Methods.stopVsockProxy, Com_Apple_Containerization_Sandbox_V3_SandboxContextClientMetadata.Methods.ipLinkSet, @@ -1899,6 +1955,12 @@ public enum Com_Apple_Containerization_Sandbox_V3_SandboxContextClientMetadata { type: GRPCCallType.unary ) + public static let getMemoryEvents = GRPCMethodDescriptor( + name: "GetMemoryEvents", + path: "/com.apple.containerization.sandbox.v3.SandboxContext/GetMemoryEvents", + type: GRPCCallType.unary + ) + public static let proxyVsock = GRPCMethodDescriptor( name: "ProxyVsock", path: "/com.apple.containerization.sandbox.v3.SandboxContext/ProxyVsock", @@ -2025,6 +2087,9 @@ public protocol Com_Apple_Containerization_Sandbox_V3_SandboxContextProvider: Ca /// Get statistics for containers. func containerStatistics(request: Com_Apple_Containerization_Sandbox_V3_ContainerStatisticsRequest, context: StatusOnlyCallContext) -> EventLoopFuture + /// Get memory events for a container (OOM kills, memory pressure, etc.). + func getMemoryEvents(request: Com_Apple_Containerization_Sandbox_V3_GetMemoryEventsRequest, context: StatusOnlyCallContext) -> EventLoopFuture + /// Proxy a vsock port to a unix domain socket in the guest, or vice versa. func proxyVsock(request: Com_Apple_Containerization_Sandbox_V3_ProxyVsockRequest, context: StatusOnlyCallContext) -> EventLoopFuture @@ -2239,6 +2304,15 @@ extension Com_Apple_Containerization_Sandbox_V3_SandboxContextProvider { userFunction: self.containerStatistics(request:context:) ) + case "GetMemoryEvents": + return UnaryServerHandler( + context: context, + requestDeserializer: ProtobufDeserializer(), + responseSerializer: ProtobufSerializer(), + interceptors: self.interceptors?.makeGetMemoryEventsInterceptors() ?? [], + userFunction: self.getMemoryEvents(request:context:) + ) + case "ProxyVsock": return UnaryServerHandler( context: context, @@ -2459,6 +2533,12 @@ public protocol Com_Apple_Containerization_Sandbox_V3_SandboxContextAsyncProvide context: GRPCAsyncServerCallContext ) async throws -> Com_Apple_Containerization_Sandbox_V3_ContainerStatisticsResponse + /// Get memory events for a container (OOM kills, memory pressure, etc.). + func getMemoryEvents( + request: Com_Apple_Containerization_Sandbox_V3_GetMemoryEventsRequest, + context: GRPCAsyncServerCallContext + ) async throws -> Com_Apple_Containerization_Sandbox_V3_GetMemoryEventsResponse + /// Proxy a vsock port to a unix domain socket in the guest, or vice versa. func proxyVsock( request: Com_Apple_Containerization_Sandbox_V3_ProxyVsockRequest, @@ -2710,6 +2790,15 @@ extension Com_Apple_Containerization_Sandbox_V3_SandboxContextAsyncProvider { wrapping: { try await self.containerStatistics(request: $0, context: $1) } ) + case "GetMemoryEvents": + return GRPCAsyncServerHandler( + context: context, + requestDeserializer: ProtobufDeserializer(), + responseSerializer: ProtobufSerializer(), + interceptors: self.interceptors?.makeGetMemoryEventsInterceptors() ?? [], + wrapping: { try await self.getMemoryEvents(request: $0, context: $1) } + ) + case "ProxyVsock": return GRPCAsyncServerHandler( context: context, @@ -2884,6 +2973,10 @@ public protocol Com_Apple_Containerization_Sandbox_V3_SandboxContextServerInterc /// Defaults to calling `self.makeInterceptors()`. func makeContainerStatisticsInterceptors() -> [ServerInterceptor] + /// - Returns: Interceptors to use when handling 'getMemoryEvents'. + /// Defaults to calling `self.makeInterceptors()`. + func makeGetMemoryEventsInterceptors() -> [ServerInterceptor] + /// - Returns: Interceptors to use when handling 'proxyVsock'. /// Defaults to calling `self.makeInterceptors()`. func makeProxyVsockInterceptors() -> [ServerInterceptor] @@ -2949,6 +3042,7 @@ public enum Com_Apple_Containerization_Sandbox_V3_SandboxContextServerMetadata { Com_Apple_Containerization_Sandbox_V3_SandboxContextServerMetadata.Methods.resizeProcess, Com_Apple_Containerization_Sandbox_V3_SandboxContextServerMetadata.Methods.closeProcessStdin, Com_Apple_Containerization_Sandbox_V3_SandboxContextServerMetadata.Methods.containerStatistics, + Com_Apple_Containerization_Sandbox_V3_SandboxContextServerMetadata.Methods.getMemoryEvents, Com_Apple_Containerization_Sandbox_V3_SandboxContextServerMetadata.Methods.proxyVsock, Com_Apple_Containerization_Sandbox_V3_SandboxContextServerMetadata.Methods.stopVsockProxy, Com_Apple_Containerization_Sandbox_V3_SandboxContextServerMetadata.Methods.ipLinkSet, @@ -3077,6 +3171,12 @@ public enum Com_Apple_Containerization_Sandbox_V3_SandboxContextServerMetadata { type: GRPCCallType.unary ) + public static let getMemoryEvents = GRPCMethodDescriptor( + name: "GetMemoryEvents", + path: "/com.apple.containerization.sandbox.v3.SandboxContext/GetMemoryEvents", + type: GRPCCallType.unary + ) + public static let proxyVsock = GRPCMethodDescriptor( name: "ProxyVsock", path: "/com.apple.containerization.sandbox.v3.SandboxContext/ProxyVsock", diff --git a/Sources/Containerization/SandboxContext/SandboxContext.pb.swift b/Sources/Containerization/SandboxContext/SandboxContext.pb.swift index 28b8eef4..30fedfe6 100644 --- a/Sources/Containerization/SandboxContext/SandboxContext.pb.swift +++ b/Sources/Containerization/SandboxContext/SandboxContext.pb.swift @@ -37,6 +37,61 @@ fileprivate struct _GeneratedWithProtocGenSwiftVersion: SwiftProtobuf.ProtobufAP typealias Version = _2 } +/// Categories of statistics that can be requested. +public enum Com_Apple_Containerization_Sandbox_V3_StatCategory: SwiftProtobuf.Enum, Swift.CaseIterable { + public typealias RawValue = Int + case unspecified // = 0 + case process // = 1 + case memory // = 2 + case cpu // = 3 + case blockIo // = 4 + case network // = 5 + case memoryEvents // = 6 + case UNRECOGNIZED(Int) + + public init() { + self = .unspecified + } + + public init?(rawValue: Int) { + switch rawValue { + case 0: self = .unspecified + case 1: self = .process + case 2: self = .memory + case 3: self = .cpu + case 4: self = .blockIo + case 5: self = .network + case 6: self = .memoryEvents + default: self = .UNRECOGNIZED(rawValue) + } + } + + public var rawValue: Int { + switch self { + case .unspecified: return 0 + case .process: return 1 + case .memory: return 2 + case .cpu: return 3 + case .blockIo: return 4 + case .network: return 5 + case .memoryEvents: return 6 + case .UNRECOGNIZED(let i): return i + } + } + + // The compiler won't synthesize support with the UNRECOGNIZED case. + public static let allCases: [Com_Apple_Containerization_Sandbox_V3_StatCategory] = [ + .unspecified, + .process, + .memory, + .cpu, + .blockIo, + .network, + .memoryEvents, + ] + +} + public struct Com_Apple_Containerization_Sandbox_V3_Stdio: Sendable { // SwiftProtobuf.Message conformance is added in an extension below. See the // `Message` and `Message+*Additions` files in the SwiftProtobuf library for @@ -1149,6 +1204,9 @@ public struct Com_Apple_Containerization_Sandbox_V3_ContainerStatisticsRequest: /// Empty = all containers public var containerIds: [String] = [] + /// Empty = all categories + public var categories: [Com_Apple_Containerization_Sandbox_V3_StatCategory] = [] + public var unknownFields = SwiftProtobuf.UnknownStorage() public init() {} @@ -1217,6 +1275,15 @@ public struct Com_Apple_Containerization_Sandbox_V3_ContainerStats: @unchecked S set {_uniqueStorage()._networks = newValue} } + public var memoryEvents: Com_Apple_Containerization_Sandbox_V3_MemoryEventStats { + get {return _storage._memoryEvents ?? Com_Apple_Containerization_Sandbox_V3_MemoryEventStats()} + set {_uniqueStorage()._memoryEvents = newValue} + } + /// Returns true if `memoryEvents` has been explicitly set. + public var hasMemoryEvents: Bool {return _storage._memoryEvents != nil} + /// Clears the value of `memoryEvents`. Subsequent reads from it will return its default value. + public mutating func clearMemoryEvents() {_uniqueStorage()._memoryEvents = nil} + public var unknownFields = SwiftProtobuf.UnknownStorage() public init() {} @@ -1347,10 +1414,91 @@ public struct Com_Apple_Containerization_Sandbox_V3_NetworkStats: Sendable { public init() {} } +/// Memory event counters from cgroup2's memory.events file. +public struct Com_Apple_Containerization_Sandbox_V3_MemoryEventStats: Sendable { + // SwiftProtobuf.Message conformance is added in an extension below. See the + // `Message` and `Message+*Additions` files in the SwiftProtobuf library for + // methods supported on all messages. + + /// Number of times the cgroup was reclaimed due to low memory. + public var low: UInt64 = 0 + + /// Number of times the cgroup exceeded its high memory limit. + public var high: UInt64 = 0 + + /// Number of times the cgroup hit its max memory limit. + public var max: UInt64 = 0 + + /// Number of times the cgroup triggered OOM. + public var oom: UInt64 = 0 + + /// Number of processes killed by OOM killer. + public var oomKill: UInt64 = 0 + + /// Number of times charge for memory failed because of limit. + public var oomGroupKill: UInt64 = 0 + + public var unknownFields = SwiftProtobuf.UnknownStorage() + + public init() {} +} + +public struct Com_Apple_Containerization_Sandbox_V3_GetMemoryEventsRequest: Sendable { + // SwiftProtobuf.Message conformance is added in an extension below. See the + // `Message` and `Message+*Additions` files in the SwiftProtobuf library for + // methods supported on all messages. + + public var containerID: String = String() + + public var unknownFields = SwiftProtobuf.UnknownStorage() + + public init() {} +} + +public struct Com_Apple_Containerization_Sandbox_V3_GetMemoryEventsResponse: Sendable { + // SwiftProtobuf.Message conformance is added in an extension below. See the + // `Message` and `Message+*Additions` files in the SwiftProtobuf library for + // methods supported on all messages. + + /// Number of times the cgroup was reclaimed due to low memory. + public var low: UInt64 = 0 + + /// Number of times the cgroup exceeded its high memory limit. + public var high: UInt64 = 0 + + /// Number of times the cgroup hit its max memory limit. + public var max: UInt64 = 0 + + /// Number of times the cgroup triggered OOM. + public var oom: UInt64 = 0 + + /// Number of processes killed by OOM killer. + public var oomKill: UInt64 = 0 + + /// Number of times charge for memory failed because of limit. + public var oomGroupKill: UInt64 = 0 + + public var unknownFields = SwiftProtobuf.UnknownStorage() + + public init() {} +} + // MARK: - Code below here is support for the SwiftProtobuf runtime. fileprivate let _protobuf_package = "com.apple.containerization.sandbox.v3" +extension Com_Apple_Containerization_Sandbox_V3_StatCategory: SwiftProtobuf._ProtoNameProviding { + public static let _protobuf_nameMap: SwiftProtobuf._NameMap = [ + 0: .same(proto: "STAT_CATEGORY_UNSPECIFIED"), + 1: .same(proto: "STAT_CATEGORY_PROCESS"), + 2: .same(proto: "STAT_CATEGORY_MEMORY"), + 3: .same(proto: "STAT_CATEGORY_CPU"), + 4: .same(proto: "STAT_CATEGORY_BLOCK_IO"), + 5: .same(proto: "STAT_CATEGORY_NETWORK"), + 6: .same(proto: "STAT_CATEGORY_MEMORY_EVENTS"), + ] +} + extension Com_Apple_Containerization_Sandbox_V3_Stdio: SwiftProtobuf.Message, SwiftProtobuf._MessageImplementationBase, SwiftProtobuf._ProtoNameProviding { public static let protoMessageName: String = _protobuf_package + ".Stdio" public static let _protobuf_nameMap: SwiftProtobuf._NameMap = [ @@ -3503,6 +3651,7 @@ extension Com_Apple_Containerization_Sandbox_V3_ContainerStatisticsRequest: Swif public static let protoMessageName: String = _protobuf_package + ".ContainerStatisticsRequest" public static let _protobuf_nameMap: SwiftProtobuf._NameMap = [ 1: .standard(proto: "container_ids"), + 2: .same(proto: "categories"), ] public mutating func decodeMessage(decoder: inout D) throws { @@ -3512,6 +3661,7 @@ extension Com_Apple_Containerization_Sandbox_V3_ContainerStatisticsRequest: Swif // enabled. https://github.com/apple/swift-protobuf/issues/1034 switch fieldNumber { case 1: try { try decoder.decodeRepeatedStringField(value: &self.containerIds) }() + case 2: try { try decoder.decodeRepeatedEnumField(value: &self.categories) }() default: break } } @@ -3521,11 +3671,15 @@ extension Com_Apple_Containerization_Sandbox_V3_ContainerStatisticsRequest: Swif if !self.containerIds.isEmpty { try visitor.visitRepeatedStringField(value: self.containerIds, fieldNumber: 1) } + if !self.categories.isEmpty { + try visitor.visitPackedEnumField(value: self.categories, fieldNumber: 2) + } try unknownFields.traverse(visitor: &visitor) } public static func ==(lhs: Com_Apple_Containerization_Sandbox_V3_ContainerStatisticsRequest, rhs: Com_Apple_Containerization_Sandbox_V3_ContainerStatisticsRequest) -> Bool { if lhs.containerIds != rhs.containerIds {return false} + if lhs.categories != rhs.categories {return false} if lhs.unknownFields != rhs.unknownFields {return false} return true } @@ -3572,6 +3726,7 @@ extension Com_Apple_Containerization_Sandbox_V3_ContainerStats: SwiftProtobuf.Me 4: .same(proto: "cpu"), 5: .standard(proto: "block_io"), 6: .same(proto: "networks"), + 7: .standard(proto: "memory_events"), ] fileprivate class _StorageClass { @@ -3581,6 +3736,7 @@ extension Com_Apple_Containerization_Sandbox_V3_ContainerStats: SwiftProtobuf.Me var _cpu: Com_Apple_Containerization_Sandbox_V3_CPUStats? = nil var _blockIo: Com_Apple_Containerization_Sandbox_V3_BlockIOStats? = nil var _networks: [Com_Apple_Containerization_Sandbox_V3_NetworkStats] = [] + var _memoryEvents: Com_Apple_Containerization_Sandbox_V3_MemoryEventStats? = nil // This property is used as the initial default value for new instances of the type. // The type itself is protecting the reference to its storage via CoW semantics. @@ -3597,6 +3753,7 @@ extension Com_Apple_Containerization_Sandbox_V3_ContainerStats: SwiftProtobuf.Me _cpu = source._cpu _blockIo = source._blockIo _networks = source._networks + _memoryEvents = source._memoryEvents } } @@ -3621,6 +3778,7 @@ extension Com_Apple_Containerization_Sandbox_V3_ContainerStats: SwiftProtobuf.Me case 4: try { try decoder.decodeSingularMessageField(value: &_storage._cpu) }() case 5: try { try decoder.decodeSingularMessageField(value: &_storage._blockIo) }() case 6: try { try decoder.decodeRepeatedMessageField(value: &_storage._networks) }() + case 7: try { try decoder.decodeSingularMessageField(value: &_storage._memoryEvents) }() default: break } } @@ -3651,6 +3809,9 @@ extension Com_Apple_Containerization_Sandbox_V3_ContainerStats: SwiftProtobuf.Me if !_storage._networks.isEmpty { try visitor.visitRepeatedMessageField(value: _storage._networks, fieldNumber: 6) } + try { if let v = _storage._memoryEvents { + try visitor.visitSingularMessageField(value: v, fieldNumber: 7) + } }() } try unknownFields.traverse(visitor: &visitor) } @@ -3666,6 +3827,7 @@ extension Com_Apple_Containerization_Sandbox_V3_ContainerStats: SwiftProtobuf.Me if _storage._cpu != rhs_storage._cpu {return false} if _storage._blockIo != rhs_storage._blockIo {return false} if _storage._networks != rhs_storage._networks {return false} + if _storage._memoryEvents != rhs_storage._memoryEvents {return false} return true } if !storagesAreEqual {return false} @@ -4016,3 +4178,159 @@ extension Com_Apple_Containerization_Sandbox_V3_NetworkStats: SwiftProtobuf.Mess return true } } + +extension Com_Apple_Containerization_Sandbox_V3_MemoryEventStats: SwiftProtobuf.Message, SwiftProtobuf._MessageImplementationBase, SwiftProtobuf._ProtoNameProviding { + public static let protoMessageName: String = _protobuf_package + ".MemoryEventStats" + public static let _protobuf_nameMap: SwiftProtobuf._NameMap = [ + 1: .same(proto: "low"), + 2: .same(proto: "high"), + 3: .same(proto: "max"), + 4: .same(proto: "oom"), + 5: .standard(proto: "oom_kill"), + 6: .standard(proto: "oom_group_kill"), + ] + + public mutating func decodeMessage(decoder: inout D) throws { + while let fieldNumber = try decoder.nextFieldNumber() { + // The use of inline closures is to circumvent an issue where the compiler + // allocates stack space for every case branch when no optimizations are + // enabled. https://github.com/apple/swift-protobuf/issues/1034 + switch fieldNumber { + case 1: try { try decoder.decodeSingularUInt64Field(value: &self.low) }() + case 2: try { try decoder.decodeSingularUInt64Field(value: &self.high) }() + case 3: try { try decoder.decodeSingularUInt64Field(value: &self.max) }() + case 4: try { try decoder.decodeSingularUInt64Field(value: &self.oom) }() + case 5: try { try decoder.decodeSingularUInt64Field(value: &self.oomKill) }() + case 6: try { try decoder.decodeSingularUInt64Field(value: &self.oomGroupKill) }() + default: break + } + } + } + + public func traverse(visitor: inout V) throws { + if self.low != 0 { + try visitor.visitSingularUInt64Field(value: self.low, fieldNumber: 1) + } + if self.high != 0 { + try visitor.visitSingularUInt64Field(value: self.high, fieldNumber: 2) + } + if self.max != 0 { + try visitor.visitSingularUInt64Field(value: self.max, fieldNumber: 3) + } + if self.oom != 0 { + try visitor.visitSingularUInt64Field(value: self.oom, fieldNumber: 4) + } + if self.oomKill != 0 { + try visitor.visitSingularUInt64Field(value: self.oomKill, fieldNumber: 5) + } + if self.oomGroupKill != 0 { + try visitor.visitSingularUInt64Field(value: self.oomGroupKill, fieldNumber: 6) + } + try unknownFields.traverse(visitor: &visitor) + } + + public static func ==(lhs: Com_Apple_Containerization_Sandbox_V3_MemoryEventStats, rhs: Com_Apple_Containerization_Sandbox_V3_MemoryEventStats) -> Bool { + if lhs.low != rhs.low {return false} + if lhs.high != rhs.high {return false} + if lhs.max != rhs.max {return false} + if lhs.oom != rhs.oom {return false} + if lhs.oomKill != rhs.oomKill {return false} + if lhs.oomGroupKill != rhs.oomGroupKill {return false} + if lhs.unknownFields != rhs.unknownFields {return false} + return true + } +} + +extension Com_Apple_Containerization_Sandbox_V3_GetMemoryEventsRequest: SwiftProtobuf.Message, SwiftProtobuf._MessageImplementationBase, SwiftProtobuf._ProtoNameProviding { + public static let protoMessageName: String = _protobuf_package + ".GetMemoryEventsRequest" + public static let _protobuf_nameMap: SwiftProtobuf._NameMap = [ + 1: .standard(proto: "container_id"), + ] + + public mutating func decodeMessage(decoder: inout D) throws { + while let fieldNumber = try decoder.nextFieldNumber() { + // The use of inline closures is to circumvent an issue where the compiler + // allocates stack space for every case branch when no optimizations are + // enabled. https://github.com/apple/swift-protobuf/issues/1034 + switch fieldNumber { + case 1: try { try decoder.decodeSingularStringField(value: &self.containerID) }() + default: break + } + } + } + + public func traverse(visitor: inout V) throws { + if !self.containerID.isEmpty { + try visitor.visitSingularStringField(value: self.containerID, fieldNumber: 1) + } + try unknownFields.traverse(visitor: &visitor) + } + + public static func ==(lhs: Com_Apple_Containerization_Sandbox_V3_GetMemoryEventsRequest, rhs: Com_Apple_Containerization_Sandbox_V3_GetMemoryEventsRequest) -> Bool { + if lhs.containerID != rhs.containerID {return false} + if lhs.unknownFields != rhs.unknownFields {return false} + return true + } +} + +extension Com_Apple_Containerization_Sandbox_V3_GetMemoryEventsResponse: SwiftProtobuf.Message, SwiftProtobuf._MessageImplementationBase, SwiftProtobuf._ProtoNameProviding { + public static let protoMessageName: String = _protobuf_package + ".GetMemoryEventsResponse" + public static let _protobuf_nameMap: SwiftProtobuf._NameMap = [ + 1: .same(proto: "low"), + 2: .same(proto: "high"), + 3: .same(proto: "max"), + 4: .same(proto: "oom"), + 5: .standard(proto: "oom_kill"), + 6: .standard(proto: "oom_group_kill"), + ] + + public mutating func decodeMessage(decoder: inout D) throws { + while let fieldNumber = try decoder.nextFieldNumber() { + // The use of inline closures is to circumvent an issue where the compiler + // allocates stack space for every case branch when no optimizations are + // enabled. https://github.com/apple/swift-protobuf/issues/1034 + switch fieldNumber { + case 1: try { try decoder.decodeSingularUInt64Field(value: &self.low) }() + case 2: try { try decoder.decodeSingularUInt64Field(value: &self.high) }() + case 3: try { try decoder.decodeSingularUInt64Field(value: &self.max) }() + case 4: try { try decoder.decodeSingularUInt64Field(value: &self.oom) }() + case 5: try { try decoder.decodeSingularUInt64Field(value: &self.oomKill) }() + case 6: try { try decoder.decodeSingularUInt64Field(value: &self.oomGroupKill) }() + default: break + } + } + } + + public func traverse(visitor: inout V) throws { + if self.low != 0 { + try visitor.visitSingularUInt64Field(value: self.low, fieldNumber: 1) + } + if self.high != 0 { + try visitor.visitSingularUInt64Field(value: self.high, fieldNumber: 2) + } + if self.max != 0 { + try visitor.visitSingularUInt64Field(value: self.max, fieldNumber: 3) + } + if self.oom != 0 { + try visitor.visitSingularUInt64Field(value: self.oom, fieldNumber: 4) + } + if self.oomKill != 0 { + try visitor.visitSingularUInt64Field(value: self.oomKill, fieldNumber: 5) + } + if self.oomGroupKill != 0 { + try visitor.visitSingularUInt64Field(value: self.oomGroupKill, fieldNumber: 6) + } + try unknownFields.traverse(visitor: &visitor) + } + + public static func ==(lhs: Com_Apple_Containerization_Sandbox_V3_GetMemoryEventsResponse, rhs: Com_Apple_Containerization_Sandbox_V3_GetMemoryEventsResponse) -> Bool { + if lhs.low != rhs.low {return false} + if lhs.high != rhs.high {return false} + if lhs.max != rhs.max {return false} + if lhs.oom != rhs.oom {return false} + if lhs.oomKill != rhs.oomKill {return false} + if lhs.oomGroupKill != rhs.oomGroupKill {return false} + if lhs.unknownFields != rhs.unknownFields {return false} + return true + } +} diff --git a/Sources/Containerization/SandboxContext/SandboxContext.proto b/Sources/Containerization/SandboxContext/SandboxContext.proto index a36f7a06..ea6d61cf 100644 --- a/Sources/Containerization/SandboxContext/SandboxContext.proto +++ b/Sources/Containerization/SandboxContext/SandboxContext.proto @@ -48,6 +48,9 @@ service SandboxContext { // Get statistics for containers. rpc ContainerStatistics(ContainerStatisticsRequest) returns (ContainerStatisticsResponse); + // Get memory events for a container (OOM kills, memory pressure, etc.). + rpc GetMemoryEvents(GetMemoryEventsRequest) returns (GetMemoryEventsResponse); + // Proxy a vsock port to a unix domain socket in the guest, or vice versa. rpc ProxyVsock(ProxyVsockRequest) returns (ProxyVsockResponse); // Stop a vsock proxy to a unix domain socket. @@ -331,8 +334,20 @@ message KillRequest { message KillResponse { int32 result = 1; } +// Categories of statistics that can be requested. +enum StatCategory { + STAT_CATEGORY_UNSPECIFIED = 0; + STAT_CATEGORY_PROCESS = 1; + STAT_CATEGORY_MEMORY = 2; + STAT_CATEGORY_CPU = 3; + STAT_CATEGORY_BLOCK_IO = 4; + STAT_CATEGORY_NETWORK = 5; + STAT_CATEGORY_MEMORY_EVENTS = 6; +} + message ContainerStatisticsRequest { repeated string container_ids = 1; // Empty = all containers + repeated StatCategory categories = 2; // Empty = all categories } message ContainerStatisticsResponse { @@ -346,6 +361,7 @@ message ContainerStats { CPUStats cpu = 4; BlockIOStats block_io = 5; repeated NetworkStats networks = 6; + MemoryEventStats memory_events = 7; } message ProcessStats { @@ -396,3 +412,38 @@ message NetworkStats { uint64 receivedErrors = 6; uint64 transmittedErrors = 7; } + +// Memory event counters from cgroup2's memory.events file. +message MemoryEventStats { + // Number of times the cgroup was reclaimed due to low memory. + uint64 low = 1; + // Number of times the cgroup exceeded its high memory limit. + uint64 high = 2; + // Number of times the cgroup hit its max memory limit. + uint64 max = 3; + // Number of times the cgroup triggered OOM. + uint64 oom = 4; + // Number of processes killed by OOM killer. + uint64 oom_kill = 5; + // Number of times charge for memory failed because of limit. + uint64 oom_group_kill = 6; +} + +message GetMemoryEventsRequest { + string container_id = 1; +} + +message GetMemoryEventsResponse { + // Number of times the cgroup was reclaimed due to low memory. + uint64 low = 1; + // Number of times the cgroup exceeded its high memory limit. + uint64 high = 2; + // Number of times the cgroup hit its max memory limit. + uint64 max = 3; + // Number of times the cgroup triggered OOM. + uint64 oom = 4; + // Number of processes killed by OOM killer. + uint64 oom_kill = 5; + // Number of times charge for memory failed because of limit. + uint64 oom_group_kill = 6; +} diff --git a/Sources/Containerization/VirtualMachineAgent.swift b/Sources/Containerization/VirtualMachineAgent.swift index 32871679..0d1ca0ea 100644 --- a/Sources/Containerization/VirtualMachineAgent.swift +++ b/Sources/Containerization/VirtualMachineAgent.swift @@ -94,7 +94,7 @@ public protocol VirtualMachineAgent: Sendable { func configureHosts(config: Hosts, location: String) async throws // Container statistics - func containerStatistics(containerIDs: [String]) async throws -> [ContainerStatistics] + func containerStatistics(containerIDs: [String], categories: StatCategory) async throws -> [ContainerStatistics] } extension VirtualMachineAgent { @@ -110,7 +110,7 @@ extension VirtualMachineAgent { throw ContainerizationError(.unsupported, message: "writeFile") } - public func containerStatistics(containerIDs: [String]) async throws -> [ContainerStatistics] { + public func containerStatistics(containerIDs: [String], categories: StatCategory) async throws -> [ContainerStatistics] { throw ContainerizationError(.unsupported, message: "containerStatistics") } diff --git a/Sources/Containerization/Vminitd.swift b/Sources/Containerization/Vminitd.swift index ce55896c..090317ac 100644 --- a/Sources/Containerization/Vminitd.swift +++ b/Sources/Containerization/Vminitd.swift @@ -80,62 +80,76 @@ extension Vminitd: VirtualMachineAgent { } /// Get statistics for containers. If `containerIDs` is empty returns stats for all containers - /// in the guest. - public func containerStatistics(containerIDs: [String]) async throws -> [ContainerStatistics] { + /// in the guest. If `categories` is empty, all categories are returned. + public func containerStatistics(containerIDs: [String], categories: StatCategory) async throws -> [ContainerStatistics] { let response = try await client.containerStatistics( .with { $0.containerIds = containerIDs + $0.categories = categories.toProtoCategories() }) return response.containers.map { protoStats in ContainerStatistics( id: protoStats.containerID, - process: .init( - current: protoStats.process.current, - limit: protoStats.process.limit - ), - memory: .init( - usageBytes: protoStats.memory.usageBytes, - limitBytes: protoStats.memory.limitBytes, - swapUsageBytes: protoStats.memory.swapUsageBytes, - swapLimitBytes: protoStats.memory.swapLimitBytes, - cacheBytes: protoStats.memory.cacheBytes, - kernelStackBytes: protoStats.memory.kernelStackBytes, - slabBytes: protoStats.memory.slabBytes, - pageFaults: protoStats.memory.pageFaults, - majorPageFaults: protoStats.memory.majorPageFaults - ), - cpu: .init( - usageUsec: protoStats.cpu.usageUsec, - userUsec: protoStats.cpu.userUsec, - systemUsec: protoStats.cpu.systemUsec, - throttlingPeriods: protoStats.cpu.throttlingPeriods, - throttledPeriods: protoStats.cpu.throttledPeriods, - throttledTimeUsec: protoStats.cpu.throttledTimeUsec - ), - blockIO: .init( - devices: protoStats.blockIo.devices.map { device in - .init( - major: device.major, - minor: device.minor, - readBytes: device.readBytes, - writeBytes: device.writeBytes, - readOperations: device.readOperations, - writeOperations: device.writeOperations + process: categories.contains(.process) && protoStats.hasProcess + ? .init( + current: protoStats.process.current, + limit: protoStats.process.limit + ) : nil, + memory: categories.contains(.memory) && protoStats.hasMemory + ? .init( + usageBytes: protoStats.memory.usageBytes, + limitBytes: protoStats.memory.limitBytes, + swapUsageBytes: protoStats.memory.swapUsageBytes, + swapLimitBytes: protoStats.memory.swapLimitBytes, + cacheBytes: protoStats.memory.cacheBytes, + kernelStackBytes: protoStats.memory.kernelStackBytes, + slabBytes: protoStats.memory.slabBytes, + pageFaults: protoStats.memory.pageFaults, + majorPageFaults: protoStats.memory.majorPageFaults + ) : nil, + cpu: categories.contains(.cpu) && protoStats.hasCpu + ? .init( + usageUsec: protoStats.cpu.usageUsec, + userUsec: protoStats.cpu.userUsec, + systemUsec: protoStats.cpu.systemUsec, + throttlingPeriods: protoStats.cpu.throttlingPeriods, + throttledPeriods: protoStats.cpu.throttledPeriods, + throttledTimeUsec: protoStats.cpu.throttledTimeUsec + ) : nil, + blockIO: categories.contains(.blockIO) && protoStats.hasBlockIo + ? .init( + devices: protoStats.blockIo.devices.map { device in + .init( + major: device.major, + minor: device.minor, + readBytes: device.readBytes, + writeBytes: device.writeBytes, + readOperations: device.readOperations, + writeOperations: device.writeOperations + ) + } + ) : nil, + networks: categories.contains(.network) + ? protoStats.networks.map { network in + ContainerStatistics.NetworkStatistics( + interface: network.interface, + receivedPackets: network.receivedPackets, + transmittedPackets: network.transmittedPackets, + receivedBytes: network.receivedBytes, + transmittedBytes: network.transmittedBytes, + receivedErrors: network.receivedErrors, + transmittedErrors: network.transmittedErrors ) - } - ), - networks: protoStats.networks.map { network in - ContainerStatistics.NetworkStatistics( - interface: network.interface, - receivedPackets: network.receivedPackets, - transmittedPackets: network.transmittedPackets, - receivedBytes: network.receivedBytes, - transmittedBytes: network.transmittedBytes, - receivedErrors: network.receivedErrors, - transmittedErrors: network.transmittedErrors - ) - } + } : nil, + memoryEvents: categories.contains(.memoryEvents) && protoStats.hasMemoryEvents + ? .init( + low: protoStats.memoryEvents.low, + high: protoStats.memoryEvents.high, + max: protoStats.memoryEvents.max, + oom: protoStats.memoryEvents.oom, + oomKill: protoStats.memoryEvents.oomKill + ) : nil ) } } @@ -537,3 +551,29 @@ extension Vminitd.Client { try await self.channel.close().get() } } + +extension StatCategory { + /// Convert StatCategory to proto enum values. + func toProtoCategories() -> [Com_Apple_Containerization_Sandbox_V3_StatCategory] { + var categories: [Com_Apple_Containerization_Sandbox_V3_StatCategory] = [] + if contains(.process) { + categories.append(.process) + } + if contains(.memory) { + categories.append(.memory) + } + if contains(.cpu) { + categories.append(.cpu) + } + if contains(.blockIO) { + categories.append(.blockIo) + } + if contains(.network) { + categories.append(.network) + } + if contains(.memoryEvents) { + categories.append(.memoryEvents) + } + return categories + } +} diff --git a/Sources/Integration/ContainerTests.swift b/Sources/Integration/ContainerTests.swift index fc923a64..5fcf1568 100644 --- a/Sources/Integration/ContainerTests.swift +++ b/Sources/Integration/ContainerTests.swift @@ -783,23 +783,23 @@ extension IntegrationSuite { throw IntegrationError.assert(msg: "stats container ID '\(stats.id)' != '\(id)'") } - guard stats.process.current > 0 else { - throw IntegrationError.assert(msg: "process count should be > 0, got \(stats.process.current)") + guard let process = stats.process, process.current > 0 else { + throw IntegrationError.assert(msg: "process count should be > 0, got \(stats.process?.current ?? 0)") } - guard stats.memory.usageBytes > 0 else { - throw IntegrationError.assert(msg: "memory usage should be > 0, got \(stats.memory.usageBytes)") + guard let memory = stats.memory, memory.usageBytes > 0 else { + throw IntegrationError.assert(msg: "memory usage should be > 0, got \(stats.memory?.usageBytes ?? 0)") } - guard stats.cpu.usageUsec > 0 else { - throw IntegrationError.assert(msg: "CPU usage should be > 0, got \(stats.cpu.usageUsec)") + guard let cpu = stats.cpu, cpu.usageUsec > 0 else { + throw IntegrationError.assert(msg: "CPU usage should be > 0, got \(stats.cpu?.usageUsec ?? 0)") } print("Container statistics:") - print(" Processes: \(stats.process.current)") - print(" Memory: \(stats.memory.usageBytes) bytes") - print(" CPU: \(stats.cpu.usageUsec) usec") - print(" Networks: \(stats.networks.count) interfaces") + print(" Processes: \(process.current)") + print(" Memory: \(memory.usageBytes) bytes") + print(" CPU: \(cpu.usageUsec) usec") + print(" Networks: \(stats.networks?.count ?? 0) interfaces") try await container.stop() } catch { @@ -903,6 +903,62 @@ extension IntegrationSuite { } } + func testMemoryEventsOOMKill() async throws { + let id = "test-memory-events-oom-kill" + + let bs = try await bootstrap(id) + let container = try LinuxContainer(id, rootfs: bs.rootfs, vmm: bs.vmm) { config in + config.process.arguments = ["sleep", "infinity"] + config.bootLog = bs.bootLog + } + + do { + try await container.create() + try await container.start() + + // Run a process that will exceed the memory limit and get OOM-killed + let exec = try await container.exec("oom-trigger") { config in + // First set a 2MB memory limit on the container's cgroup, then allocate more + config.arguments = [ + "sh", + "-c", + "echo 2097152 > /sys/fs/cgroup/memory.max && dd if=/dev/zero of=/dev/null bs=100M", + ] + } + + try await exec.start() + let status = try await exec.wait() + if status.exitCode == 0 { + throw IntegrationError.assert(msg: "expected exit code > 0") + } + try await exec.delete() + + let stats = try await container.statistics(categories: .memoryEvents) + + guard let events = stats.memoryEvents else { + throw IntegrationError.assert(msg: "expected memoryEvents to be present") + } + + print("Memory events for container \(id):") + print(" low: \(events.low)") + print(" high: \(events.high)") + print(" max: \(events.max)") + print(" oom: \(events.oom)") + print(" oomKill: \(events.oomKill)") + + guard events.oomKill > 0 else { + throw IntegrationError.assert(msg: "expected oomKill > 0, got \(events.oomKill)") + } + + try await container.kill(SIGKILL) + try await container.wait() + try await container.stop() + } catch { + try? await container.stop() + throw error + } + } + func testNoSerialConsole() async throws { let id = "test-no-serial-console" diff --git a/Sources/Integration/PodTests.swift b/Sources/Integration/PodTests.swift index ed417495..17e977f0 100644 --- a/Sources/Integration/PodTests.swift +++ b/Sources/Integration/PodTests.swift @@ -335,18 +335,18 @@ extension IntegrationSuite { } for stat in stats { - guard stat.process.current > 0 else { + guard let process = stat.process, process.current > 0 else { throw IntegrationError.assert(msg: "container \(stat.id) process count should be > 0") } - guard stat.memory.usageBytes > 0 else { + guard let memory = stat.memory, memory.usageBytes > 0 else { throw IntegrationError.assert(msg: "container \(stat.id) memory usage should be > 0") } print("Container \(stat.id) statistics:") - print(" Processes: \(stat.process.current)") - print(" Memory: \(stat.memory.usageBytes) bytes") - print(" CPU: \(stat.cpu.usageUsec) usec") + print(" Processes: \(process.current)") + print(" Memory: \(memory.usageBytes) bytes") + print(" CPU: \(stat.cpu?.usageUsec ?? 0) usec") } try await pod.stop() @@ -356,6 +356,65 @@ extension IntegrationSuite { } } + func testPodMemoryEventsOOMKill() async throws { + let id = "test-pod-memory-events-oom-kill" + + let bs = try await bootstrap(id) + let pod = try LinuxPod(id, vmm: bs.vmm) { config in + config.cpus = 4 + config.memoryInBytes = 1024.mib() + config.bootLog = bs.bootLog + } + + try await pod.addContainer("container1", rootfs: bs.rootfs) { config in + config.process.arguments = ["/bin/sleep", "infinity"] + } + + do { + try await pod.create() + try await pod.startContainer("container1") + + let exec = try await pod.execInContainer("container1", processID: "oom-trigger") { config in + config.arguments = [ + "sh", + "-c", + "echo 2097152 > /sys/fs/cgroup/memory.max && dd if=/dev/zero of=/dev/null bs=100M", + ] + } + + try await exec.start() + let status = try await exec.wait() + if status.exitCode == 0 { + throw IntegrationError.assert(msg: "expected exit code > 0") + } + try await exec.delete() + + let stats = try await pod.statistics(containerIDs: ["container1"], categories: .memoryEvents) + + guard let containerStats = stats.first, let events = containerStats.memoryEvents else { + throw IntegrationError.assert(msg: "expected memoryEvents to be present") + } + + print("Memory events for pod container container1:") + print(" low: \(events.low)") + print(" high: \(events.high)") + print(" max: \(events.max)") + print(" oom: \(events.oom)") + print(" oomKill: \(events.oomKill)") + + guard events.oomKill > 0 else { + throw IntegrationError.assert(msg: "expected oomKill > 0, got \(events.oomKill)") + } + + try await pod.killContainer("container1", signal: SIGKILL) + try await pod.waitContainer("container1") + try await pod.stop() + } catch { + try? await pod.stop() + throw error + } + } + func testPodContainerResourceLimits() async throws { let id = "test-pod-container-resource-limits" diff --git a/Sources/Integration/Suite.swift b/Sources/Integration/Suite.swift index f8477997..543c309e 100644 --- a/Sources/Integration/Suite.swift +++ b/Sources/Integration/Suite.swift @@ -293,6 +293,7 @@ struct IntegrationSuite: AsyncParsableCommand { Test("container /dev/console", testContainerDevConsole), Test("container statistics", testContainerStatistics), Test("container cgroup limits", testCgroupLimits), + Test("container memory events OOM kill", testMemoryEventsOOMKill), Test("container no serial console", testNoSerialConsole), Test("unix socket into guest", testUnixSocketIntoGuest), Test("container non-closure constructor", testNonClosureConstructor), @@ -322,6 +323,7 @@ struct IntegrationSuite: AsyncParsableCommand { Test("pod stop container idempotency", testPodStopContainerIdempotency), Test("pod list containers", testPodListContainers), Test("pod container statistics", testPodContainerStatistics), + Test("pod memory events OOM kill", testPodMemoryEventsOOMKill), Test("pod container resource limits", testPodContainerResourceLimits), Test("pod container filesystem isolation", testPodContainerFilesystemIsolation), Test("pod container PID namespace isolation", testPodContainerPIDNamespaceIsolation), diff --git a/vminitd/Sources/vminitd/ManagedContainer.swift b/vminitd/Sources/vminitd/ManagedContainer.swift index 2a726b86..e5aeb20f 100644 --- a/vminitd/Sources/vminitd/ManagedContainer.swift +++ b/vminitd/Sources/vminitd/ManagedContainer.swift @@ -231,6 +231,10 @@ extension ManagedContainer { try self.cgroupManager.stats() } + func getMemoryEvents() throws -> MemoryEvents { + try self.cgroupManager.getMemoryEvents() + } + func getExecOrInit(execID: String) throws -> any ContainerProcess { if execID == self.id { return self.initProcess diff --git a/vminitd/Sources/vminitd/Server+GRPC.swift b/vminitd/Sources/vminitd/Server+GRPC.swift index bba5288b..95214b7f 100644 --- a/vminitd/Sources/vminitd/Server+GRPC.swift +++ b/vminitd/Sources/vminitd/Server+GRPC.swift @@ -1084,12 +1084,23 @@ extension Initd: Com_Apple_Containerization_Sandbox_V3_SandboxContextAsyncProvid log.debug( "containerStatistics", metadata: [ - "container_ids": "\(request.containerIds)" + "container_ids": "\(request.containerIds)", + "categories": "\(request.categories)", ]) do { - // Get all network interfaces (skip loopback) - let interfaces = try getNetworkInterfaces() + // Parse requested categories (empty = all) + let categories = Set(request.categories) + let wantAll = categories.isEmpty + let wantProcess = wantAll || categories.contains(.process) + let wantMemory = wantAll || categories.contains(.memory) + let wantCPU = wantAll || categories.contains(.cpu) + let wantBlockIO = wantAll || categories.contains(.blockIo) + let wantNetwork = wantAll || categories.contains(.network) + let wantMemoryEvents = wantAll || categories.contains(.memoryEvents) + + // Get all network interfaces (skip loopback) only if needed + let interfaces = wantNetwork ? try getNetworkInterfaces() : [] // Get containers to query let containerIDs: [String] @@ -1103,30 +1114,57 @@ extension Initd: Com_Apple_Containerization_Sandbox_V3_SandboxContextAsyncProvid for containerID in containerIDs { let container = try await state.get(container: containerID) - let cgStats = try await container.stats() - // Get network stats for all interfaces - let socket = try DefaultNetlinkSocket() - let session = NetlinkSession(socket: socket, log: log) - var networkStats: [Com_Apple_Containerization_Sandbox_V3_NetworkStats] = [] + // Only fetch cgroup stats if needed + let cgStats: Cgroup2Stats? + if wantProcess || wantMemory || wantCPU || wantBlockIO { + cgStats = try await container.stats() + } else { + cgStats = nil + } - for interface in interfaces { - let responses = try session.linkGet(interface: interface, includeStats: true) - if responses.count == 1, let stats = try responses[0].getStatistics() { - networkStats.append( - .with { - $0.interface = interface - $0.receivedPackets = stats.rxPackets - $0.transmittedPackets = stats.txPackets - $0.receivedBytes = stats.rxBytes - $0.transmittedBytes = stats.txBytes - $0.receivedErrors = stats.rxErrors - $0.transmittedErrors = stats.txErrors - }) + // Get network stats only if requested + var networkStats: [Com_Apple_Containerization_Sandbox_V3_NetworkStats] = [] + if wantNetwork { + let socket = try DefaultNetlinkSocket() + let session = NetlinkSession(socket: socket, log: log) + for interface in interfaces { + let responses = try session.linkGet(interface: interface, includeStats: true) + if responses.count == 1, let stats = try responses[0].getStatistics() { + networkStats.append( + .with { + $0.interface = interface + $0.receivedPackets = stats.rxPackets + $0.transmittedPackets = stats.txPackets + $0.receivedBytes = stats.rxBytes + $0.transmittedBytes = stats.txBytes + $0.receivedErrors = stats.rxErrors + $0.transmittedErrors = stats.txErrors + }) + } } } - containerStats.append(mapStatsToProto(containerID: containerID, cgStats: cgStats, networkStats: networkStats)) + // Get memory events only if requested + var memoryEvents: MemoryEvents? + if wantMemoryEvents { + memoryEvents = try await container.getMemoryEvents() + } + + containerStats.append( + mapStatsToProto( + containerID: containerID, + cgStats: cgStats, + networkStats: networkStats, + memoryEvents: memoryEvents, + wantProcess: wantProcess, + wantMemory: wantMemory, + wantCPU: wantCPU, + wantBlockIO: wantBlockIO, + wantNetwork: wantNetwork, + wantMemoryEvents: wantMemoryEvents + ) + ) } return .with { @@ -1142,6 +1180,38 @@ extension Initd: Com_Apple_Containerization_Sandbox_V3_SandboxContextAsyncProvid } } + func getMemoryEvents( + request: Com_Apple_Containerization_Sandbox_V3_GetMemoryEventsRequest, + context: GRPC.GRPCAsyncServerCallContext + ) async throws -> Com_Apple_Containerization_Sandbox_V3_GetMemoryEventsResponse { + log.debug( + "getMemoryEvents", + metadata: [ + "containerID": "\(request.containerID)" + ]) + + do { + let container = try await state.get(container: request.containerID) + let events = try await container.getMemoryEvents() + + return .with { + $0.low = events.low + $0.high = events.high + $0.max = events.max + $0.oom = events.oom + $0.oomKill = events.oomKill + } + } catch { + log.error( + "getMemoryEvents", + metadata: [ + "containerID": "\(request.containerID)", + "error": "\(error)", + ]) + throw GRPCStatus(code: .internalError, message: "getMemoryEvents: \(error)") + } + } + private func swiftErrno(_ msg: Logger.Message) -> POSIXError { let error = POSIXError(.init(rawValue: errno)!) log.error( @@ -1171,41 +1241,54 @@ extension Initd: Com_Apple_Containerization_Sandbox_V3_SandboxContextAsyncProvid private func mapStatsToProto( containerID: String, - cgStats: Cgroup2Stats, - networkStats: [Com_Apple_Containerization_Sandbox_V3_NetworkStats] + cgStats: Cgroup2Stats?, + networkStats: [Com_Apple_Containerization_Sandbox_V3_NetworkStats], + memoryEvents: MemoryEvents?, + wantProcess: Bool, + wantMemory: Bool, + wantCPU: Bool, + wantBlockIO: Bool, + wantNetwork: Bool, + wantMemoryEvents: Bool ) -> Com_Apple_Containerization_Sandbox_V3_ContainerStats { .with { $0.containerID = containerID - $0.process = .with { - $0.current = cgStats.pids?.current ?? 0 - $0.limit = cgStats.pids?.max ?? 0 + if wantProcess, let pids = cgStats?.pids { + $0.process = .with { + $0.current = pids.current + $0.limit = pids.max ?? 0 + } } - $0.memory = .with { - $0.usageBytes = cgStats.memory?.usage ?? 0 - $0.limitBytes = cgStats.memory?.usageLimit ?? 0 - $0.swapUsageBytes = cgStats.memory?.swapUsage ?? 0 - $0.swapLimitBytes = cgStats.memory?.swapLimit ?? 0 - $0.cacheBytes = cgStats.memory?.file ?? 0 - $0.kernelStackBytes = cgStats.memory?.kernelStack ?? 0 - $0.slabBytes = cgStats.memory?.slab ?? 0 - $0.pageFaults = cgStats.memory?.pgfault ?? 0 - $0.majorPageFaults = cgStats.memory?.pgmajfault ?? 0 + if wantMemory, let memory = cgStats?.memory { + $0.memory = .with { + $0.usageBytes = memory.usage + $0.limitBytes = memory.usageLimit ?? 0 + $0.swapUsageBytes = memory.swapUsage ?? 0 + $0.swapLimitBytes = memory.swapLimit ?? 0 + $0.cacheBytes = memory.file + $0.kernelStackBytes = memory.kernelStack + $0.slabBytes = memory.slab + $0.pageFaults = memory.pgfault + $0.majorPageFaults = memory.pgmajfault + } } - $0.cpu = .with { - $0.usageUsec = cgStats.cpu?.usageUsec ?? 0 - $0.userUsec = cgStats.cpu?.userUsec ?? 0 - $0.systemUsec = cgStats.cpu?.systemUsec ?? 0 - $0.throttlingPeriods = cgStats.cpu?.nrPeriods ?? 0 - $0.throttledPeriods = cgStats.cpu?.nrThrottled ?? 0 - $0.throttledTimeUsec = cgStats.cpu?.throttledUsec ?? 0 + if wantCPU, let cpu = cgStats?.cpu { + $0.cpu = .with { + $0.usageUsec = cpu.usageUsec + $0.userUsec = cpu.userUsec + $0.systemUsec = cpu.systemUsec + $0.throttlingPeriods = cpu.nrPeriods + $0.throttledPeriods = cpu.nrThrottled + $0.throttledTimeUsec = cpu.throttledUsec + } } - $0.blockIo = .with { - $0.devices = - cgStats.io?.entries.map { entry in + if wantBlockIO, let io = cgStats?.io { + $0.blockIo = .with { + $0.devices = io.entries.map { entry in .with { $0.major = entry.major $0.minor = entry.minor @@ -1214,10 +1297,23 @@ extension Initd: Com_Apple_Containerization_Sandbox_V3_SandboxContextAsyncProvid $0.readOperations = entry.rios $0.writeOperations = entry.wios } - } ?? [] + } + } } - $0.networks = networkStats + if wantNetwork { + $0.networks = networkStats + } + + if wantMemoryEvents, let events = memoryEvents { + $0.memoryEvents = .with { + $0.low = events.low + $0.high = events.high + $0.max = events.max + $0.oom = events.oom + $0.oomKill = events.oomKill + } + } } }