@@ -32,7 +32,7 @@ metadata:
3232 capabilities : Seamless Upgrades
3333 categories : AI/Machine Learning,Monitoring
3434 containerImage : docker.io/rocm/gpu-operator:v1.2.0
35- createdAt : " 2025-08-06T05:54:02Z "
35+ createdAt : " 2025-08-12T02:38:34Z "
3636 description : |-
3737 Operator responsible for deploying AMD GPU kernel drivers, device plugin, device test runner and device metrics exporter
3838 For more information, visit [documentation](https://instinct.docs.amd.com/projects/gpu-operator/en/latest/)
@@ -603,6 +603,28 @@ spec:
603603 path : metricsExporter.upgradePolicy.upgradeStrategy
604604 x-descriptors :
605605 - urn:alm:descriptor:com.amd.deviceconfigs:upgradeStrategy
606+ - description : remediation workflow
607+ displayName : RemediationWorkflow
608+ path : remediationWorkflow
609+ x-descriptors :
610+ - urn:alm:descriptor:com.amd.deviceconfigs:remediationWorkflow
611+ - description : Name of the ConfigMap that holds condition-to-workflow mappings.
612+ displayName : ConditionalWorkflows
613+ path : remediationWorkflow.conditionalWorkflows
614+ x-descriptors :
615+ - urn:alm:descriptor:com.amd.deviceconfigs:conditionalWorkflows
616+ - description : enable remediation workflows. disabled by default enable if operator
617+ should automatically handle remediation of node incase of gpu issues
618+ displayName : Enable
619+ path : remediationWorkflow.enable
620+ x-descriptors :
621+ - urn:alm:descriptor:com.amd.deviceconfigs:enable
622+ - description : Time to live for argo workflow object and its pods for a failed
623+ workflow in hours. By default, it is set to 24 hours
624+ displayName : TtlForFailedWorkflows
625+ path : remediationWorkflow.ttlForFailedWorkflows
626+ x-descriptors :
627+ - urn:alm:descriptor:com.amd.deviceconfigs:ttlForFailedWorkflows
606628 - description : Selector describes on which nodes the GPU Operator should enable
607629 the GPU device.
608630 displayName : Selector
0 commit comments