From fa8d51b5fffe37d1dc665dd467ab977fd4c102c2 Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Fri, 8 May 2026 14:22:21 -0400 Subject: [PATCH 01/53] added pipelineId and piplineRunnable keys/values to all pipeline profiles --- assets/analysis-pipelines/bactopia/bactopia-base-3.2.0.json | 2 ++ assets/analysis-pipelines/bactopia/bactopia-base-dev.json | 1 + assets/analysis-pipelines/bactopia/kraken2-bactopia-3.2.0.json | 2 ++ assets/analysis-pipelines/bactopia/kraken2-bactopia-dev.json | 2 ++ assets/analysis-pipelines/bactopia/ont-bactopia-3.2.0.json | 2 ++ assets/analysis-pipelines/bactopia/ont-bactopia-dev.json | 2 ++ 6 files changed, 11 insertions(+) diff --git a/assets/analysis-pipelines/bactopia/bactopia-base-3.2.0.json b/assets/analysis-pipelines/bactopia/bactopia-base-3.2.0.json index fee36cc..688fa5c 100644 --- a/assets/analysis-pipelines/bactopia/bactopia-base-3.2.0.json +++ b/assets/analysis-pipelines/bactopia/bactopia-base-3.2.0.json @@ -1,6 +1,8 @@ { "pipelineType": "nextflow", "pipelineName": "Bactopia", + "pipelineId": "bactopia-base-v3.2.0", + "pipelineRunnable": false, "pipelineDescription": "Execute Bactopia v3.2.0", "project": "bactopia/bactopia", "version": "v3.2.0", diff --git a/assets/analysis-pipelines/bactopia/bactopia-base-dev.json b/assets/analysis-pipelines/bactopia/bactopia-base-dev.json index db4ad42..3777c80 100644 --- a/assets/analysis-pipelines/bactopia/bactopia-base-dev.json +++ b/assets/analysis-pipelines/bactopia/bactopia-base-dev.json @@ -1,6 +1,7 @@ { "pipelineType": "nextflow", "pipelineName": "Bactopia", + "pipelineId": "bactopia-bactopia-base-dev", "pipelineDescription": "Execute Bactopia development release", "project": "bactopia/bactopia", "version": "dev", diff --git a/assets/analysis-pipelines/bactopia/kraken2-bactopia-3.2.0.json b/assets/analysis-pipelines/bactopia/kraken2-bactopia-3.2.0.json index 416d88f..dc6e7ec 100644 --- a/assets/analysis-pipelines/bactopia/kraken2-bactopia-3.2.0.json +++ b/assets/analysis-pipelines/bactopia/kraken2-bactopia-3.2.0.json @@ -1,6 +1,8 @@ { "pipelineType": "nextflow", "pipelineName": "Bactopia Kraken2", + "pipelineId": "bactopia-kraken2-v3.2.0", + "pipelineRunnable": true, "pipelineDescription": "Execute Bactopia's Kraken2 workflow with the development release", "project": "bactopia/bactopia", "version": "v3.2.0", diff --git a/assets/analysis-pipelines/bactopia/kraken2-bactopia-dev.json b/assets/analysis-pipelines/bactopia/kraken2-bactopia-dev.json index 5deba2c..4991b26 100644 --- a/assets/analysis-pipelines/bactopia/kraken2-bactopia-dev.json +++ b/assets/analysis-pipelines/bactopia/kraken2-bactopia-dev.json @@ -1,6 +1,8 @@ { "pipelineType": "nextflow", "pipelineName": "Bactopia Kraken2", + "pipelineId": "bactopia-kraken2-dev", + "pipelineRunnable": true, "pipelineDescription": "Execute Bactopia's Kraken2 workflow with the development release", "project": "bactopia/bactopia", "version": "dev", diff --git a/assets/analysis-pipelines/bactopia/ont-bactopia-3.2.0.json b/assets/analysis-pipelines/bactopia/ont-bactopia-3.2.0.json index 90b8355..a094d90 100644 --- a/assets/analysis-pipelines/bactopia/ont-bactopia-3.2.0.json +++ b/assets/analysis-pipelines/bactopia/ont-bactopia-3.2.0.json @@ -1,6 +1,8 @@ { "pipelineType": "nextflow", "pipelineName": "Bactopia ONT Sample", + "pipelineId": "bactopia-ont-v3.2.0", + "pipelineRunnable": true, "pipelineDescription": "Execute Bactopia's ONT sample sequencing workflow with v3.2.0", "project": "bactopia/bactopia", "version": "v3.2.0", diff --git a/assets/analysis-pipelines/bactopia/ont-bactopia-dev.json b/assets/analysis-pipelines/bactopia/ont-bactopia-dev.json index 9c4d90b..456db56 100644 --- a/assets/analysis-pipelines/bactopia/ont-bactopia-dev.json +++ b/assets/analysis-pipelines/bactopia/ont-bactopia-dev.json @@ -1,6 +1,8 @@ { "pipelineType": "nextflow", "pipelineName": "Bactopia ONT Sample", + "pipelineId": "bactopia-ont-dev", + "pipelineRunnable": true, "pipelineDescription": "Execute Bactopia's ONT sample sequencing workflow with the development release", "project": "bactopia/bactopia", "version": "dev", From 4eadae002aa43a5e79faa16885190f0baee42892 Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Fri, 8 May 2026 14:23:01 -0400 Subject: [PATCH 02/53] first couple (untested) configs for workflow api endpoints --- Pulumi.cape-cod-dev.yaml | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/Pulumi.cape-cod-dev.yaml b/Pulumi.cape-cod-dev.yaml index d805c34..962a2c6 100644 --- a/Pulumi.cape-cod-dev.yaml +++ b/Pulumi.cape-cod-dev.yaml @@ -759,6 +759,7 @@ config: logging_enabled: True env_vars: - "DAP_REG_DDB_TABLE" + - "WORKFLOW_REG_DDB_TABLE" - "DDB_REGION" - "USER_ATTRS_DDB_TABLE" - "ETL_ATTRS_DDB_TABLE" @@ -768,6 +769,33 @@ config: # TODO: memory and timeouts for these functions need # some love handlers: + - id: "get_airflow_dags_handler" + name: "getdags" + code: "assets/api/capi/handlers/get_airflow_dags.py" + layers: + - capi-all + funct_args: + handler: "index.index_handler" + runtime: "python3.10" + architectures: + - "x86_64" + description: "getdags Lambda Funnction" + memory_size: 128 + timeout: 3 + - id: "get_workflow_pipeline_profiles_handler" + name: "getdagprofiles" + code: "assets/api/capi/handlers/get_workflow_pipeline_profiles.py" + layers: + - capi-all + funct_args: + handler: "index.index_handler" + runtime: "python3.10" + architectures: + - "x86_64" + description: "getdagprofiles Lambda Funnction" + memory_size: 128 + timeout: 3 + # TODO: remaining airflow handlers - id: "get_daps_handler" name: "getdaps" code: "assets/api/capi/handlers/get_daps.py" @@ -820,6 +848,8 @@ config: description: "getdaplogs Lambda Funnction" memory_size: 128 timeout: 3 + # TODO: if not used as part of workflows (probably + # won't be) this should be removed - id: "submit_dap_run_handler" name: "submitdaprun" code: "assets/api/capi/handlers/submit_dap_run.py" From 6bfc89a84ce7b3cac598e4712c9f0bb85a1d1bd0 Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Fri, 8 May 2026 14:23:37 -0400 Subject: [PATCH 03/53] first couple (untested) workflow api endpoints --- assets/api/capi/capi-openapi-301.yaml.j2 | 166 +++++++++++++++++++++++ 1 file changed, 166 insertions(+) diff --git a/assets/api/capi/capi-openapi-301.yaml.j2 b/assets/api/capi/capi-openapi-301.yaml.j2 index 14d702b..b8525c7 100644 --- a/assets/api/capi/capi-openapi-301.yaml.j2 +++ b/assets/api/capi/capi-openapi-301.yaml.j2 @@ -1277,9 +1277,175 @@ paths: passthroughBehavior: "when_no_match" timeoutInMillis: 29000 type: "mock" + /workflows: + get: + parameters: + - in: query + name: dagId + schema: + type: string + description: + The dag id for a workflow to limit the return to. If + not provided all workflows will be returned. + required: false + - in: query + name: includeDisabled + schema: + type: boolean + description: + Boolean value specifying if disabled workflows should + be included in results. Defaults to false if not + provided. + required: false + responses: + "200": +{# + TODO: need the response headers (i.e. cors) +#} + description: "Success" + content: + application/json: + schema: + type: object + description: + An object describing the workflows. Format + described in the Airflow REST v2 API docs + for GET /api/v2/dags. + items: + type: object + properties: + dags: + type: array + items: + type: object + additionalProperties: true + total_entries: + type: integer + "401": + description: Airflow Unauthorized + content: + application/json: + schema: + $ref: "#/components/responses/AirflowAuthnAuthzError" + "403": + description: Airflow Forbidden + content: + application/json: + schema: + $ref: "#/components/responses/AirflowAuthnAuthzError" + + "422": + description: Unprocessable Content + content: + application/json: + schema: + $ref: "#/components/responses/AirflowUnprocessableContentError" + "500": + description: + Server error while fetching workflow details. + x-amazon-apigateway-integration: + httpMethod: "POST" + uri: "arn:aws:apigateway:${AWS::Region}:lambda:path/2015-03-31/functions/{{ handlers['get_airflow_dags_handler'] }}/invocations" + passthroughBehavior: "when_no_match" + timeoutInMillis: 29000 + type: "aws_proxy" + options: + responses: + "200": + $ref: "#/components/responses/200OptionsCors" + x-amazon-apigateway-integration: + responses: + default: + statusCode: "200" + responseParameters: + method.response.header.Access-Control-Allow-Methods: "'OPTIONS,GET'" + method.response.header.Access-Control-Allow-Headers: "'Content-Type,X-Amz-Date,Authorization,X-Api-Key,X-Amz-Security-Token'" + method.response.header.Access-Control-Allow-Origin: "'*'" + requestTemplates: + application/json: "{'statusCode':200}" + passthroughBehavior: "when_no_match" + timeoutInMillis: 29000 + type: "mock" + /workflows/pipelineprofiles: + get: + parameters: + - in: query + name: dagId + schema: + type: string + description: + The dag id for a workflow fetch pipeline profiles for. + required: true + responses: + "200": +{# + TODO: need the response headers (i.e. cors) +#} + description: "Success" + content: + application/json: + schema: + type: object + description: + An object containing information describing + input required for each pipeline profile + used in the workflow. + properties: + type: object + properties: + dagId: + description: + The DAG ID for the workflow asked for + type: string + pipelineProfiles: + type: array + items: + type: object + additionalProperties: true + "500": + description: + Server error while fetching workflow details. + x-amazon-apigateway-integration: + httpMethod: "POST" + uri: "arn:aws:apigateway:${AWS::Region}:lambda:path/2015-03-31/functions/{{ handlers['get_workflow_pipeline_profiles_handler'] }}/invocations" + passthroughBehavior: "when_no_match" + timeoutInMillis: 29000 + type: "aws_proxy" + options: + responses: + "200": + $ref: "#/components/responses/200OptionsCors" + x-amazon-apigateway-integration: + responses: + default: + statusCode: "200" + responseParameters: + method.response.header.Access-Control-Allow-Methods: "'OPTIONS,GET'" + method.response.header.Access-Control-Allow-Headers: "'Content-Type,X-Amz-Date,Authorization,X-Api-Key,X-Amz-Security-Token'" + method.response.header.Access-Control-Allow-Origin: "'*'" + requestTemplates: + application/json: "{'statusCode':200}" + passthroughBehavior: "when_no_match" + timeoutInMillis: 29000 + type: "mock" # reusable components that follow OpenApi 3.0.1 spec components: responses: + AirflowAuthnAuthzError: + description: The 401/403 response format from the Airflow REST API + type: object + properties: + detail: + type: string + AirflowUnprocessableContentError: + description: The 422 response format from the Airflow REST API + type: object + properties: + detail: + type: array + items: + type: object + additionalProperties: true 200OptionsCors: description: "200 response" headers: From e1b368168ce22293abbb7ff99b62e78986d6c58a Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Fri, 8 May 2026 14:25:21 -0400 Subject: [PATCH 04/53] added pipelineId and pipelineRunnable keys/values to dap registry items. made pipeline_id part of index for lookups --- capeinfra/pipeline/dapregistry.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/capeinfra/pipeline/dapregistry.py b/capeinfra/pipeline/dapregistry.py index 1aef71b..d5eebb6 100644 --- a/capeinfra/pipeline/dapregistry.py +++ b/capeinfra/pipeline/dapregistry.py @@ -112,6 +112,10 @@ def create_dap_registry_table(self): attributes=[ # NOTE: we do not need to define any part of the "schema" here # that isn't needed in an index. + { + "name": "pipeline_id", + "type": "S", + }, { "name": "pipeline_name", "type": "S", @@ -193,6 +197,10 @@ def load_pipeline_assets(self): item=Output.json_dumps( { "pipeline_name": {"S": profile["pipelineName"]}, + "pipeline_id": {"S": profile["pipelineId"]}, + "pipeline_runnable": { + "S": profile["pipelineRunnable"] + }, "version": {"S": profile["version"]}, "project": {"S": profile["project"]}, "pipeline_type": {"S": profile["pipelineType"]}, From 2d2ad853135923425f2b7710e84382d1d1cf4382 Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Fri, 8 May 2026 14:26:49 -0400 Subject: [PATCH 05/53] wired up workflow meta registry to the environment variables sent off to lambda --- capeinfra/swimlanes/private.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/capeinfra/swimlanes/private.py b/capeinfra/swimlanes/private.py index 00fb03d..3e90708 100644 --- a/capeinfra/swimlanes/private.py +++ b/capeinfra/swimlanes/private.py @@ -357,6 +357,17 @@ def create_workflow_meta_registry(self): ), ) + # read access to this this resource can be configured via the deployment + # config (for api lambdas), so add it to the bookkeeping structure for + # that + self._exposed_env_vars.setdefault( + "WORKFLOW_REG_DDB_TABLE", + { + "resource_name": self.workflow_meta_registry.workflow_meta_ddb_table.name, + "type": "table", + }, + ) + # TODO: ISSUE #126 # TODO: refactor out elsewhere def _deploy_static_app(self, sa_cfg: CapeConfig): From 6d5effa92bb0a22952f2e977278a0bab986c1c36 Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Fri, 8 May 2026 14:27:27 -0400 Subject: [PATCH 06/53] first (untested) list workflow api endpoint --- assets/api/capi/handlers/get_airflow_dags.py | 90 ++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 assets/api/capi/handlers/get_airflow_dags.py diff --git a/assets/api/capi/handlers/get_airflow_dags.py b/assets/api/capi/handlers/get_airflow_dags.py new file mode 100644 index 0000000..f41ec2c --- /dev/null +++ b/assets/api/capi/handlers/get_airflow_dags.py @@ -0,0 +1,90 @@ +"""Lambda function for handling a get of airflow workflow details.""" + +import json + +import boto3 +from botocore.exceptions import ClientError +from capepy.aws.utils import decode_error + + +def index_handler(event, context): + """Handler for the GET of one or all airflow workflows. + + This endpoint is a proxy to the airflow /api/v2/dags endpoint. Done as a + lambda instead of direct integration so we can massage data as required. + + This endpoint does not return any CAPE specific data such as the pipeline + profiles of the pipelines in the workflows. That is a separate API call. + + :param event: The event object that contains the HTTP request and json + data. + :param context: Context object. + """ + + # TODO: this will be used in a number of endpoints. we can pass it in or add + # it to capepy or something else. + env_name = "ccd-pvsl-airflow-env-mwaa-env" + # TODO: add this to capepy + mwaa_client = boto3.client("mwaa") + + try: + qsp = event.get("queryStringParameters") + + api_path = "/dags" + include_disabled = False + + dag_id = None + if qsp is not None: + dag_id = qsp.get("dag_id") + + if dag_id is not None: + api_path = f"{api_path}/{dag_id}" + + # TODO: ensure this comes back as python boolean + include_all = qsp.get("includeDisabled", False) + + request_params = { + "Name": env_name, + "Path": api_path, + "Method": "GET", + "QueryParameters": {"paused": include_disabled}, + } + + response = mwaa_client.invoke_rest_api(**request_params) + + # no matter the status code of the response we can return the same + # thing. the difference in 200 vs non-200 is that the json will contain + # an error string under the key "detail" instead of workflow data in + # the non-200 case + return { + "statusCode": response["RestApiStatusCode"], + "headers": { + "Content-Type": "application/json", + # TODO: ISSUE #141 CORS bypass. We do not want this long term. + # When we get all the api and web resources on the same + # domain, this may not matter too much. But we may + # eventually end up with needing to handle requests from + # one domain served up by another domain in a lambda + # handler. In that case we'd need to be able to handle + # CORS, and would want to look into allowing + # configuration of the lambda (via pulumi config that + # turns into env vars for the lambda) that set the + # origins allowed for CORS. + "Access-Control-Allow-Headers": "Content-Type", + "Access-Control-Allow-Origin": "*", + "Access-Control-Allow-Methods": "OPTIONS,GET", + }, + "body": json.dumps(response["RestApiResponse"]), + } + except ClientError as err: + code, message = decode_error(err) + + msg = ( + f"Error during fetch of workflow data from airflow queuing. " + f"{code} {message}" + ) + + return { + "statusCode": 500, + "body": msg, + } From 389e0c4dd7d6a98aa68cdbc2693c4cbd281c4be1 Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Fri, 8 May 2026 14:27:43 -0400 Subject: [PATCH 07/53] first (untested) get profiles for workflow api endpoint --- .../get_workflow_pipeline_profiles.py | 114 ++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 assets/api/capi/handlers/get_workflow_pipeline_profiles.py diff --git a/assets/api/capi/handlers/get_workflow_pipeline_profiles.py b/assets/api/capi/handlers/get_workflow_pipeline_profiles.py new file mode 100644 index 0000000..db5ec9c --- /dev/null +++ b/assets/api/capi/handlers/get_workflow_pipeline_profiles.py @@ -0,0 +1,114 @@ +"""Lambda function for handling a get of pipeline profiles used in a workflow.""" + +import json +from decimal import Decimal + +from botocore.exceptions import ClientError +from capepy.aws.dynamodb import PipelineTable +from capepy.aws.utils import decode_error + + +# TODO: need to add some abstraction of this to capepy. it's repeated here and +# in get_object_etls at least +def bad_param_response(): + """Gets a response data object and status code when bad params are given. + + :return: A tuple containins a response data object and an HTTP 400 status + code. + """ + return ( + {"message": ("Missing required query string parameters: dagId")}, + 400, + ) + + +# TODO: this should probably go elsewhere. issue is you can't json serialize +# Decimal values, and some of the values coming back from dynamo in the +# pipeline profile spec are Decimal. So this shims them to floats. +def json_serialize_the_unserializable(val): + """Serialize a value (e.g. Decimal) that is otherwise not json serializable. + + Right now this just handles Decimal, but can be updated as needed. + + :param val: The value to serialize. + :return: the serialized value. + :raises: TypeError if even this function cannot serialize. + """ + if isinstance(val, Decimal): + # this results in a reduction of precision which can cause issues. In + # our case (for now at least) it's ok, but we may want to consider other + # mechanisms like string conversions or forcing some rounding. + return float(val) + raise TypeError(f"Value {val} of type {type(val)} is not json serializable") + + +def index_handler(event, context): + """Handler for the GET of the profiles used in a workflow (ariflow dag). + + :param event: The event object that contains the HTTP request and json + data. + :param context: Context object. + """ + + try: + headers = event.get("headers", {}) + + qsp = event.get("queryStringParameters") + + if qsp is None: + resp_data, resp_status = bad_param_response() + else: + dag_id = qsp.get("dagId") + + if not dag_id: + resp_data, resp_status = bad_param_response() + else: + + # TODO: below here is what we do for one profile. need to mod + # for a list of profiles after getting the dap ids from + # the workflow meta table + + # get a reference to the registry table + ddb_table = PipelineTable() + + dap = ddb_table.get_pipeline(pipeline_name, version) + resp_data = [] + resp_status = 200 + if dap: + resp_data = dap["profile"] + print(f"resp_data: {resp_data}") + # And return our response as a 200 + return { + "statusCode": resp_status, + "headers": { + "Content-Type": "application/json", + # TODO: ISSUE #141 CORS bypass. We do not want this long term. + # When we get all the api and web resources on the same + # domain, this may not matter too much. But we may + # eventually end up with needing to handle requests from + # one domain served up by another domain in a lambda + # handler. In that case we'd need to be able to handle + # CORS, and would want to look into allowing + # configuration of the lambda (via pulumi config that + # turns into env vars for the lambda) that set the + # origins allowed for CORS. + "Access-Control-Allow-Headers": "Content-Type", + "Access-Control-Allow-Origin": "*", + "Access-Control-Allow-Methods": "OPTIONS,GET", + }, + "body": json.dumps( + resp_data, default=json_serialize_the_unserializable + ), + } + except ClientError as err: + code, message = decode_error(err) + + msg = ( + f"Error during processing of submitted data analysis pipeline for " + f"queuing. {code} {message}" + ) + + return { + "statusCode": 500, + "body": msg, + } From 13509f5113ed0d45d6620a7ee736f864fa8349e0 Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Fri, 8 May 2026 14:30:44 -0400 Subject: [PATCH 08/53] spelling --- assets/api/capi/handlers/get_workflow_pipeline_profiles.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/api/capi/handlers/get_workflow_pipeline_profiles.py b/assets/api/capi/handlers/get_workflow_pipeline_profiles.py index db5ec9c..9b395bd 100644 --- a/assets/api/capi/handlers/get_workflow_pipeline_profiles.py +++ b/assets/api/capi/handlers/get_workflow_pipeline_profiles.py @@ -43,7 +43,7 @@ def json_serialize_the_unserializable(val): def index_handler(event, context): - """Handler for the GET of the profiles used in a workflow (ariflow dag). + """Handler for the GET of the profiles used in a workflow (airflow dag). :param event: The event object that contains the HTTP request and json data. From 98a99a08a6c717b755fe6f2c7c2268cc239146a7 Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Fri, 8 May 2026 14:33:05 -0400 Subject: [PATCH 09/53] moved dapregistry module to registry as it now has 2 classes --- capeinfra/pipeline/{dapregistry.py => registry.py} | 0 capeinfra/swimlanes/private.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename capeinfra/pipeline/{dapregistry.py => registry.py} (100%) diff --git a/capeinfra/pipeline/dapregistry.py b/capeinfra/pipeline/registry.py similarity index 100% rename from capeinfra/pipeline/dapregistry.py rename to capeinfra/pipeline/registry.py diff --git a/capeinfra/swimlanes/private.py b/capeinfra/swimlanes/private.py index 3e90708..74cfda0 100644 --- a/capeinfra/swimlanes/private.py +++ b/capeinfra/swimlanes/private.py @@ -22,7 +22,7 @@ get_vpce_api_invoke_policy, ) from capeinfra.pipeline.airflow import MwaaEnvironment -from capeinfra.pipeline.dapregistry import DAPRegistry, WorkflowMetaRegistry +from capeinfra.pipeline.registry import DAPRegistry, WorkflowMetaRegistry # TODO: ISSUE #145 This import is to support the temporary dap results s3 # handling. From 3fede032ce48c704ca5f8b28d9bf6cefe6ec53a9 Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Fri, 8 May 2026 14:50:58 -0400 Subject: [PATCH 10/53] (untested) speculative additions to get profiles for workflow api endpoint to handle reading from the workflw and dap registries and return response --- .../get_workflow_pipeline_profiles.py | 107 ++++++++++-------- 1 file changed, 61 insertions(+), 46 deletions(-) diff --git a/assets/api/capi/handlers/get_workflow_pipeline_profiles.py b/assets/api/capi/handlers/get_workflow_pipeline_profiles.py index 9b395bd..df18108 100644 --- a/assets/api/capi/handlers/get_workflow_pipeline_profiles.py +++ b/assets/api/capi/handlers/get_workflow_pipeline_profiles.py @@ -4,42 +4,45 @@ from decimal import Decimal from botocore.exceptions import ClientError -from capepy.aws.dynamodb import PipelineTable -from capepy.aws.utils import decode_error - - -# TODO: need to add some abstraction of this to capepy. it's repeated here and -# in get_object_etls at least -def bad_param_response(): - """Gets a response data object and status code when bad params are given. - - :return: A tuple containins a response data object and an HTTP 400 status - code. - """ - return ( - {"message": ("Missing required query string parameters: dagId")}, - 400, - ) - - -# TODO: this should probably go elsewhere. issue is you can't json serialize -# Decimal values, and some of the values coming back from dynamo in the -# pipeline profile spec are Decimal. So this shims them to floats. -def json_serialize_the_unserializable(val): - """Serialize a value (e.g. Decimal) that is otherwise not json serializable. - - Right now this just handles Decimal, but can be updated as needed. - - :param val: The value to serialize. - :return: the serialized value. - :raises: TypeError if even this function cannot serialize. - """ - if isinstance(val, Decimal): - # this results in a reduction of precision which can cause issues. In - # our case (for now at least) it's ok, but we may want to consider other - # mechanisms like string conversions or forcing some rounding. - return float(val) - raise TypeError(f"Value {val} of type {type(val)} is not json serializable") +from capepy.aws.dynamodb import PipelineTable, WorkflowMetaTable +from capepy.aws.utils import ( + bad_params_response, + decode_error, + json_serialize_the_unserializable, +) + +# # TODO: need to add some abstraction of this to capepy. it's repeated here and +# # in get_object_etls at least +# def bad_param_response(): +# """Gets a response data object and status code when bad params are given. +# +# :return: A tuple containins a response data object and an HTTP 400 status +# code. +# """ +# return ( +# {"message": ("Missing required query string parameters: dagId")}, +# 400, +# ) +# +# +# # TODO: this should probably go elsewhere. issue is you can't json serialize +# # Decimal values, and some of the values coming back from dynamo in the +# # pipeline profile spec are Decimal. So this shims them to floats. +# def json_serialize_the_unserializable(val): +# """Serialize a value (e.g. Decimal) that is otherwise not json serializable. +# +# Right now this just handles Decimal, but can be updated as needed. +# +# :param val: The value to serialize. +# :return: the serialized value. +# :raises: TypeError if even this function cannot serialize. +# """ +# if isinstance(val, Decimal): +# # this results in a reduction of precision which can cause issues. In +# # our case (for now at least) it's ok, but we may want to consider other +# # mechanisms like string conversions or forcing some rounding. +# return float(val) +# raise TypeError(f"Value {val} of type {type(val)} is not json serializable") def index_handler(event, context): @@ -60,23 +63,35 @@ def index_handler(event, context): else: dag_id = qsp.get("dagId") - if not dag_id: + if dag_id is None: resp_data, resp_status = bad_param_response() else: - # TODO: below here is what we do for one profile. need to mod - # for a list of profiles after getting the dap ids from - # the workflow meta table + # TODO: need new capepy as it has the workflow table class + workflow_table = WorkflowMetaTable() + wf = workflow_table.get_workflow_by_id(dag_id) # get a reference to the registry table - ddb_table = PipelineTable() - - dap = ddb_table.get_pipeline(pipeline_name, version) + dapreg_table = PipelineTable() resp_data = [] resp_status = 200 - if dap: - resp_data = dap["profile"] - print(f"resp_data: {resp_data}") + + # TODO: handle a change in status code due to potential issues + # in pid for loop? what can go wrong there? + + for pid in wf["pipeline_ids"]: + dap = dapreg_table.get_pipeline_by_id(pid) + if dap: + resp_data.append(dap["profile"]) + else: + # TODO: What other errors to handle here? + resp_data = [ + { + "message": f"Could not find pipeline profile for pipeline with id {pid} " + } + ] + resp_status = 404 + break # And return our response as a 200 return { "statusCode": resp_status, From 8f0b474943f1c756dfe63dcc8737e8f6e9cffd9f Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Tue, 12 May 2026 12:23:59 -0400 Subject: [PATCH 11/53] added config for 1st 4 workflow endpoints. fixed longstanding typo --- Pulumi.cape-cod-dev.yaml | 44 +++++++++++++++---- ...t_airflow_dags.py => get_workflow_dags.py} | 0 2 files changed, 35 insertions(+), 9 deletions(-) rename assets/api/capi/handlers/{get_airflow_dags.py => get_workflow_dags.py} (100%) diff --git a/Pulumi.cape-cod-dev.yaml b/Pulumi.cape-cod-dev.yaml index 962a2c6..30572b2 100644 --- a/Pulumi.cape-cod-dev.yaml +++ b/Pulumi.cape-cod-dev.yaml @@ -769,9 +769,9 @@ config: # TODO: memory and timeouts for these functions need # some love handlers: - - id: "get_airflow_dags_handler" + - id: "get_workflow_dags_handler" name: "getdags" - code: "assets/api/capi/handlers/get_airflow_dags.py" + code: "assets/api/capi/handlers/get_workflow_dags.py" layers: - capi-all funct_args: @@ -779,7 +779,7 @@ config: runtime: "python3.10" architectures: - "x86_64" - description: "getdags Lambda Funnction" + description: "getdags Lambda Function" memory_size: 128 timeout: 3 - id: "get_workflow_pipeline_profiles_handler" @@ -792,7 +792,33 @@ config: runtime: "python3.10" architectures: - "x86_64" - description: "getdagprofiles Lambda Funnction" + description: "getdagprofiles Lambda Function" + memory_size: 128 + timeout: 3 + - id: "post_workflow_run_handler" + name: "postdagrun" + code: "assets/api/capi/handlers/post_workflow_run.py" + layers: + - capi-all + funct_args: + handler: "index.index_handler" + runtime: "python3.10" + architectures: + - "x86_64" + description: "postdagrun Lambda Function" + memory_size: 128 + timeout: 3 + - id: "patch_workflow_run_handler" + name: "patchdagrun" + code: "assets/api/capi/handlers/patch_workflow_run.py" + layers: + - capi-all + funct_args: + handler: "index.index_handler" + runtime: "python3.10" + architectures: + - "x86_64" + description: "patchdagrun Lambda Function" memory_size: 128 timeout: 3 # TODO: remaining airflow handlers @@ -806,7 +832,7 @@ config: runtime: "python3.10" architectures: - "x86_64" - description: "getdaps Lambda Funnction" + description: "getdaps Lambda Function" memory_size: 128 timeout: 3 - id: "get_dap_profile_handler" @@ -819,7 +845,7 @@ config: runtime: "python3.10" architectures: - "x86_64" - description: "getdapprofiles Lambda Funnction" + description: "getdapprofiles Lambda Function" memory_size: 128 timeout: 3 - id: "get_dap_status_handler" @@ -832,7 +858,7 @@ config: runtime: "python3.10" architectures: - "x86_64" - description: "getdapstatus Lambda Funnction" + description: "getdapstatus Lambda Function" memory_size: 128 timeout: 3 - id: "get_dap_logs_handler" @@ -845,7 +871,7 @@ config: runtime: "python3.10" architectures: - "x86_64" - description: "getdaplogs Lambda Funnction" + description: "getdaplogs Lambda Function" memory_size: 128 timeout: 3 # TODO: if not used as part of workflows (probably @@ -860,7 +886,7 @@ config: runtime: "python3.10" architectures: - "x86_64" - description: "submitdaprun Lambda Funnction" + description: "submitdaprun Lambda Function" memory_size: 128 timeout: 3 - id: "get_raw_objstore_authz_handler" diff --git a/assets/api/capi/handlers/get_airflow_dags.py b/assets/api/capi/handlers/get_workflow_dags.py similarity index 100% rename from assets/api/capi/handlers/get_airflow_dags.py rename to assets/api/capi/handlers/get_workflow_dags.py From a853c51fe95c76cd5686b4f48c8a96fb48029253 Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Tue, 12 May 2026 12:25:51 -0400 Subject: [PATCH 12/53] added eps for triggering and halting workflows. fixed error schema for existing workflow eps (some errors can return either a detail string or a detail object) --- assets/api/capi/capi-openapi-301.yaml.j2 | 304 ++++++++++++++++++++++- 1 file changed, 295 insertions(+), 9 deletions(-) diff --git a/assets/api/capi/capi-openapi-301.yaml.j2 b/assets/api/capi/capi-openapi-301.yaml.j2 index b8525c7..b8aab7f 100644 --- a/assets/api/capi/capi-openapi-301.yaml.j2 +++ b/assets/api/capi/capi-openapi-301.yaml.j2 @@ -1321,21 +1321,40 @@ paths: additionalProperties: true total_entries: type: integer + "400": + description: Airflow Bad Params (400) + content: + application/json: + schema: + oneOf: + - $ref: "#/components/responses/AirflowErrorDetailStrResp" + - $ref: "#/components/responses/AirflowErrorDetailObjResp" "401": - description: Airflow Unauthorized + description: Airflow Unauthorized (401) content: application/json: schema: - $ref: "#/components/responses/AirflowAuthnAuthzError" + oneOf: + - $ref: "#/components/responses/AirflowErrorDetailStrResp" + - $ref: "#/components/responses/AirflowErrorDetailObjResp" "403": - description: Airflow Forbidden + description: Airflow Forbidden (403) content: application/json: schema: - $ref: "#/components/responses/AirflowAuthnAuthzError" - + oneOf: + - $ref: "#/components/responses/AirflowErrorDetailStrResp" + - $ref: "#/components/responses/AirflowErrorDetailObjResp" + "404": + description: Airflow Not Found (404) + content: + application/json: + schema: + oneOf: + - $ref: "#/components/responses/AirflowErrorDetailStrResp" + - $ref: "#/components/responses/AirflowErrorDetailObjResp" "422": - description: Unprocessable Content + description: Unprocessable Content/Validation Error (422) content: application/json: schema: @@ -1345,7 +1364,7 @@ paths: Server error while fetching workflow details. x-amazon-apigateway-integration: httpMethod: "POST" - uri: "arn:aws:apigateway:${AWS::Region}:lambda:path/2015-03-31/functions/{{ handlers['get_airflow_dags_handler'] }}/invocations" + uri: "arn:aws:apigateway:${AWS::Region}:lambda:path/2015-03-31/functions/{{ handlers['get_workflow_dags_handler'] }}/invocations" passthroughBehavior: "when_no_match" timeoutInMillis: 29000 type: "aws_proxy" @@ -1402,6 +1421,21 @@ paths: items: type: object additionalProperties: true + "404": + description: Workflow/pipeline Not Found (404) + content: + application/json: + schema: + # this is a CAPE 404, not one that came from + # airflow, so we define our own schema + description: + Could not find specified workflow or a + pipeline specified as part of the workflow + type: object + properties: + detail: + type: string + {# TODO: what other responses here? #} "500": description: Server error while fetching workflow details. @@ -1428,15 +1462,267 @@ paths: passthroughBehavior: "when_no_match" timeoutInMillis: 29000 type: "mock" + /workflows/trigger: + post: + parameters: + - in: query + name: dagId + schema: + type: string + description: + The dag id for a workflow to trigger. + required: true + requestBody: + description: Trigger an airflow DAG run. + required: true + content: + application/json: + schema: + type: object + # this format will be defined by the needs of the + # DAG and cannot be defined in a more specific + # manner here + additionalProperties: true + responses: + "200": +{# + TODO: need the response headers (i.e. cors) +#} + description: "Success" + content: + application/json: + schema: + type: object + description: + An object describing the submitted DAG run. + Format described in the Airflow REST v2 API + docs for POST /api/v2/dags/{dag_id}/dagRuns + (200 response). + items: + type: object + # this is not our format to define, so + # we'll just say it's any object format + additionalProperties: true + "400": + description: Airflow Bad Params (400) + content: + application/json: + schema: + oneOf: + - $ref: "#/components/responses/AirflowErrorDetailStrResp" + - $ref: "#/components/responses/AirflowErrorDetailObjResp" + "401": + description: Airflow Unauthorized (401) + content: + application/json: + schema: + oneOf: + - $ref: "#/components/responses/AirflowErrorDetailStrResp" + - $ref: "#/components/responses/AirflowErrorDetailObjResp" + "403": + description: Airflow Forbidden (403) + content: + application/json: + schema: + oneOf: + - $ref: "#/components/responses/AirflowErrorDetailStrResp" + - $ref: "#/components/responses/AirflowErrorDetailObjResp" + "404": + description: Airflow Not Found (404) + content: + application/json: + schema: + oneOf: + - $ref: "#/components/responses/AirflowErrorDetailStrResp" + - $ref: "#/components/responses/AirflowErrorDetailObjResp" + "409": + description: Airflow Conflict (409) + content: + application/json: + schema: + oneOf: + - $ref: "#/components/responses/AirflowErrorDetailStrResp" + - $ref: "#/components/responses/AirflowErrorDetailObjResp" + "422": + description: Unprocessable Content/Validation Error (422) + content: + application/json: + schema: + $ref: "#/components/responses/AirflowUnprocessableContentError" + "500": + description: + Server error while triggering workflow run. + x-amazon-apigateway-integration: + httpMethod: "POST" + uri: "arn:aws:apigateway:${AWS::Region}:lambda:path/2015-03-31/functions/{{ handlers['post_workflow_run_handler'] }}/invocations" + passthroughBehavior: "when_no_match" + timeoutInMillis: 29000 + type: "aws_proxy" + /workflows/halt: + patch: + parameters: + - in: query + name: dagId + schema: + type: string + description: + The dag id for the workflow being halted/failed. + required: true + - in: query + name: dagRunId + schema: + type: string + description: + The dag run id for the workflow being halted/failed. + required: false + requestBody: + description: Halt an airflow DAG run. + required: false + content: + application/json: + schema: + type: object + properties: + note: + type: string + description: An optional note to add to the halted DAG run. + responses: + "200": +{# + TODO: need the response headers (i.e. cors) +#} + description: "Success" + content: + application/json: + schema: + type: object + description: + An object describing the halted/failed DAG + run. Format described in the Airflow REST + v2 API docs for PATCH + /api/v2/dags/{dag_id}/dagRuns/{dag_run_id} + (200 response). + items: + type: object + # this is not our format to define, so + # we'll just say it's any object format + additionalProperties: true + "400": + description: Airflow Bad Params (400) + content: + application/json: + schema: + oneOf: + - $ref: "#/components/responses/AirflowErrorDetailStrResp" + - $ref: "#/components/responses/AirflowErrorDetailObjResp" + "401": + description: Airflow Unauthorized (401) + content: + application/json: + schema: + oneOf: + - $ref: "#/components/responses/AirflowErrorDetailStrResp" + - $ref: "#/components/responses/AirflowErrorDetailObjResp" + "403": + description: Airflow Forbidden (403) + content: + application/json: + schema: + oneOf: + - $ref: "#/components/responses/AirflowErrorDetailStrResp" + - $ref: "#/components/responses/AirflowErrorDetailObjResp" + "404": + description: Airflow Not Found (404) + content: + application/json: + schema: + oneOf: + - $ref: "#/components/responses/AirflowErrorDetailStrResp" + - $ref: "#/components/responses/AirflowErrorDetailObjResp" + "422": + description: Unprocessable Content/Validation Error (422) + content: + application/json: + schema: + $ref: "#/components/responses/AirflowUnprocessableContentError" + "500": + description: + Server error while halting/failing workflow run. + x-amazon-apigateway-integration: + httpMethod: "POST" + uri: "arn:aws:apigateway:${AWS::Region}:lambda:path/2015-03-31/functions/{{ handlers['patch_workflow_run_handler'] }}/invocations" + passthroughBehavior: "when_no_match" + timeoutInMillis: 29000 + type: "aws_proxy" + options: + responses: + "200": + $ref: "#/components/responses/200OptionsCors" + x-amazon-apigateway-integration: + responses: + default: + statusCode: "200" + responseParameters: + method.response.header.Access-Control-Allow-Methods: "'OPTIONS,GET'" + method.response.header.Access-Control-Allow-Headers: "'Content-Type,X-Amz-Date,Authorization,X-Api-Key,X-Amz-Security-Token'" + method.response.header.Access-Control-Allow-Origin: "'*'" + requestTemplates: + application/json: "{'statusCode':200}" + passthroughBehavior: "when_no_match" + timeoutInMillis: 29000 + type: "mock" + options: + responses: + "200": + $ref: "#/components/responses/200OptionsCors" + x-amazon-apigateway-integration: + responses: + default: + statusCode: "200" + responseParameters: + method.response.header.Access-Control-Allow-Methods: "'OPTIONS,GET'" + method.response.header.Access-Control-Allow-Headers: "'Content-Type,X-Amz-Date,Authorization,X-Api-Key,X-Amz-Security-Token'" + method.response.header.Access-Control-Allow-Origin: "'*'" + requestTemplates: + application/json: "{'statusCode':200}" + passthroughBehavior: "when_no_match" + timeoutInMillis: 29000 + type: "mock" + {# + TODO: + - add: + - get dag run (/api/v2/dags/{dag_id}/dagRuns/{dag_run_id}) + - for status of the dag run + - get dag run task instances (/api/v2/dags/{dag_id}/dagRuns/{dag_run_id}/taskInstances) + - for status of the dag run tasks. + - support a qsp for running tasks (or maybe any valid task state + https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/utils/state/index.html#airflow.utils.state.TaskInstanceState) + + #} # reusable components that follow OpenApi 3.0.1 spec components: responses: - AirflowAuthnAuthzError: - description: The 401/403 response format from the Airflow REST API + + AirflowErrorDetailStrResp: + description: + The error response format from the Airflow REST API returning a + detail string type: object properties: detail: type: string + AirflowErrorDetailObjResp: + description: + The error response format from the Airflow REST API returning a + detail object + type: object + properties: + detail: + type: object + # generally follows RFC 7807: + # https://datatracker.ietf.org/doc/html/rfc7807 + # but needs to be able to handle additions + additionalProperties: true AirflowUnprocessableContentError: description: The 422 response format from the Airflow REST API type: object From 12a8eca73ff2d3a8758a8aaec8893b3f096f1c5d Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Tue, 12 May 2026 12:50:55 -0400 Subject: [PATCH 13/53] made the mwaa config a single env mapping instead of a list. if we end up wanting more than one env we should probably make the move to either a managed ec2 instance or managing our own instead of MWAA hosted --- Pulumi.cape-cod-dev.yaml | 77 ++++++++++++++++++++-------------------- 1 file changed, 39 insertions(+), 38 deletions(-) diff --git a/Pulumi.cape-cod-dev.yaml b/Pulumi.cape-cod-dev.yaml index 30572b2..7a7371a 100644 --- a/Pulumi.cape-cod-dev.yaml +++ b/Pulumi.cape-cod-dev.yaml @@ -765,6 +765,7 @@ config: - "ETL_ATTRS_DDB_TABLE" - "CRAWLER_ATTRS_DDB_TABLE" - "CANNED_REPORT_DDB_TABLE" + - "MWAA_ENVIRONMENT" # env_vars: TODO: add env vars for this API if needed. # TODO: memory and timeouts for these functions need # some love @@ -1329,9 +1330,10 @@ config: # and no compute environments will be deployed compute: # `environments` (mapping, optional) - # Environments for aws batch and mwaa (aairflow) are configured + # Environments for aws batch and mwaa (airflow) are configured # in this section. There are mappings for both types of - # environments, and both mappings contain lists of mappings. + # environments, with the airflow mapping being singular and the + # batch environments being a list of mappings. # # Both types have some shared config keys and then keys specific # to that type. Shared keys are as follows: @@ -1346,9 +1348,8 @@ config: # config file. At least one subnet must be defined here. # environments: - # Each mwaa mapping in the list has the - # following schema in addition to the shared keys mentioned - # above: + # The mwaa mapping has the following schema in addition to + # the shared keys mentioned above: # * `dag_path` (string, required) # The path in the CAPE meta assets s3 bucket where dags # will be stored for airflow to load. MWAA syncs these @@ -1388,39 +1389,39 @@ config: # here: # https://www.pulumi.com/registry/packages/aws/api-docs/mwaa/environment/ mwaa: - - name: airflow-env - dag_path: airflow/dags - airflow_version: 3.0.6 - airflow_config: - core.default_task_retries: 2 - core.parallelism: 20 - celery.worker_autoscale: 5,5 - environment_class: mw1.small - subnet_types: - - compute - ingress_subnet_types: - - compute - - vpn - extra_env_args: - min_workers: 2 - max_workers: 10 - logging_configuration: - # NOTE: INFO is the lowest level supported in MWAA - dag_processing_logs: - enabled: True - log_level: INFO - scheduler_logs: - enabled: True - log_level: INFO - task_logs: - enabled: True - log_level: INFO - webserver_logs: - enabled: True - log_level: INFO - worker_logs: - enabled: True - log_level: INFO + name: airflow-env + dag_path: airflow/dags + airflow_version: 3.0.6 + airflow_config: + core.default_task_retries: 2 + core.parallelism: 20 + celery.worker_autoscale: 5,5 + environment_class: mw1.small + subnet_types: + - compute + ingress_subnet_types: + - compute + - vpn + extra_env_args: + min_workers: 2 + max_workers: 10 + logging_configuration: + # NOTE: INFO is the lowest level supported in MWAA + dag_processing_logs: + enabled: True + log_level: INFO + scheduler_logs: + enabled: True + log_level: INFO + task_logs: + enabled: True + log_level: INFO + webserver_logs: + enabled: True + log_level: INFO + worker_logs: + enabled: True + log_level: INFO # Each batch mapping in the list has the # following schema in addition to the shared keys mentioned # above: From 3285922c9a6b898d4f51effff194cec0e7116e57 Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Tue, 12 May 2026 12:53:30 -0400 Subject: [PATCH 14/53] changed to use correct env var for the mwaa environment. fixed a var name --- assets/api/capi/handlers/get_workflow_dags.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/assets/api/capi/handlers/get_workflow_dags.py b/assets/api/capi/handlers/get_workflow_dags.py index f41ec2c..de37443 100644 --- a/assets/api/capi/handlers/get_workflow_dags.py +++ b/assets/api/capi/handlers/get_workflow_dags.py @@ -1,6 +1,7 @@ """Lambda function for handling a get of airflow workflow details.""" import json +import os import boto3 from botocore.exceptions import ClientError @@ -21,9 +22,8 @@ def index_handler(event, context): :param context: Context object. """ - # TODO: this will be used in a number of endpoints. we can pass it in or add - # it to capepy or something else. - env_name = "ccd-pvsl-airflow-env-mwaa-env" + env_name = os.getenv("MWAA_ENVIRONMENT") + # TODO: add this to capepy mwaa_client = boto3.client("mwaa") @@ -41,7 +41,7 @@ def index_handler(event, context): api_path = f"{api_path}/{dag_id}" # TODO: ensure this comes back as python boolean - include_all = qsp.get("includeDisabled", False) + include_disabled = qsp.get("includeDisabled", False) request_params = { "Name": env_name, From 3bd1e5d577bfdb0045b2b35b59abb67aadf4902f Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Tue, 12 May 2026 12:55:06 -0400 Subject: [PATCH 15/53] removed code moved to capepy. changed to handle 404 for both workflows and daps. fixed comments. --- .../get_workflow_pipeline_profiles.py | 77 ++++++------------- 1 file changed, 23 insertions(+), 54 deletions(-) diff --git a/assets/api/capi/handlers/get_workflow_pipeline_profiles.py b/assets/api/capi/handlers/get_workflow_pipeline_profiles.py index df18108..2416549 100644 --- a/assets/api/capi/handlers/get_workflow_pipeline_profiles.py +++ b/assets/api/capi/handlers/get_workflow_pipeline_profiles.py @@ -1,7 +1,6 @@ """Lambda function for handling a get of pipeline profiles used in a workflow.""" import json -from decimal import Decimal from botocore.exceptions import ClientError from capepy.aws.dynamodb import PipelineTable, WorkflowMetaTable @@ -11,39 +10,6 @@ json_serialize_the_unserializable, ) -# # TODO: need to add some abstraction of this to capepy. it's repeated here and -# # in get_object_etls at least -# def bad_param_response(): -# """Gets a response data object and status code when bad params are given. -# -# :return: A tuple containins a response data object and an HTTP 400 status -# code. -# """ -# return ( -# {"message": ("Missing required query string parameters: dagId")}, -# 400, -# ) -# -# -# # TODO: this should probably go elsewhere. issue is you can't json serialize -# # Decimal values, and some of the values coming back from dynamo in the -# # pipeline profile spec are Decimal. So this shims them to floats. -# def json_serialize_the_unserializable(val): -# """Serialize a value (e.g. Decimal) that is otherwise not json serializable. -# -# Right now this just handles Decimal, but can be updated as needed. -# -# :param val: The value to serialize. -# :return: the serialized value. -# :raises: TypeError if even this function cannot serialize. -# """ -# if isinstance(val, Decimal): -# # this results in a reduction of precision which can cause issues. In -# # our case (for now at least) it's ok, but we may want to consider other -# # mechanisms like string conversions or forcing some rounding. -# return float(val) -# raise TypeError(f"Value {val} of type {type(val)} is not json serializable") - def index_handler(event, context): """Handler for the GET of the profiles used in a workflow (airflow dag). @@ -67,7 +33,6 @@ def index_handler(event, context): resp_data, resp_status = bad_param_response() else: - # TODO: need new capepy as it has the workflow table class workflow_table = WorkflowMetaTable() wf = workflow_table.get_workflow_by_id(dag_id) @@ -76,23 +41,27 @@ def index_handler(event, context): resp_data = [] resp_status = 200 - # TODO: handle a change in status code due to potential issues - # in pid for loop? what can go wrong there? - - for pid in wf["pipeline_ids"]: - dap = dapreg_table.get_pipeline_by_id(pid) - if dap: - resp_data.append(dap["profile"]) - else: - # TODO: What other errors to handle here? - resp_data = [ - { - "message": f"Could not find pipeline profile for pipeline with id {pid} " - } - ] - resp_status = 404 - break - # And return our response as a 200 + if wf is None: + resp_data = [ + {"detail": f"Could not find workflow with id {dag_id} "} + ] + resp_status = 404 + else: + for pid in wf["pipeline_ids"]: + dap = dapreg_table.get_pipeline_by_id(pid) + if dap: + resp_data.append(dap["profile"]) + else: + # TODO: What other errors to handle here? + resp_data = [ + { + "detail": f"Could not find pipeline profile for pipeline with id {pid} " + } + ] + resp_status = 404 + break + + # And return our response however it worked out return { "statusCode": resp_status, "headers": { @@ -119,8 +88,8 @@ def index_handler(event, context): code, message = decode_error(err) msg = ( - f"Error during processing of submitted data analysis pipeline for " - f"queuing. {code} {message}" + f"Error during fetch of workflow pipeline profiles. " + f"{code} {message}" ) return { From 6b33b8740121d053db118bb19150513f9ca92e32 Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Tue, 12 May 2026 12:55:56 -0400 Subject: [PATCH 16/53] added policy enum and supported policies for mwaa env --- capeinfra/pipeline/airflow.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/capeinfra/pipeline/airflow.py b/capeinfra/pipeline/airflow.py index 313834f..2f782d4 100644 --- a/capeinfra/pipeline/airflow.py +++ b/capeinfra/pipeline/airflow.py @@ -1,6 +1,7 @@ """Abstractions for Apache Airflow.""" import json +from enum import Enum import pulumi_aws as aws from pulumi import Input, Output, ResourceOptions @@ -23,6 +24,11 @@ class MwaaEnvironment(CapeComponentResource): to change or have a new set of resources defined. """ + class PolicyEnum(str, Enum): + """Enum of supported policy names for this component.""" + + invoke_api = "invoke_api" + @property def default_config(self): return {"dag_path": "airflow/dags", "extra_env_args": {}} @@ -275,6 +281,24 @@ def __init__( } ) + @property + def policies(self) -> dict[ + str, + list[aws.iam.GetPolicyDocumentStatementArgsDict], + ]: + if self._policies is None: + self._policies = dict[ + str, + list[aws.iam.GetPolicyDocumentStatementArgsDict], + ]() + self._policies[self.PolicyEnum.invoke_api] = [ + { + "effect": "Allow", + "actions": ["airflow:InvokeRestApi"], + } + ] + return self._policies + # TODO: feels this should be able to pass exactly one role, or be able to # pass exactly one role to exactly one named batch compute env. # TODO: ISSUE #338 From d86fdf4b730c2805db0069333fd5d83b4c3eef98 Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Tue, 12 May 2026 12:57:36 -0400 Subject: [PATCH 17/53] changed cape rest api to handle old style resource grants as well as new style (resource provided policies) --- capeinfra/resources/api.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/capeinfra/resources/api.py b/capeinfra/resources/api.py index 07d51d5..f643d43 100644 --- a/capeinfra/resources/api.py +++ b/capeinfra/resources/api.py @@ -42,7 +42,8 @@ def __init__( spec_path: str, stage_suffix: str, env_vars: Mapping[str, Output[str] | str], - resource_grants: dict[str, list[Output]], + legacy_resource_grants: dict[str, list[Output]], + policy_statements: list[aws.iam.GetPolicyDocumentStatementArgsDict], vpc_endpoint: aws.ec2.VpcEndpoint, domain_name: Output, *args, @@ -95,7 +96,7 @@ def __init__( self._ids_to_lambdas = {} self._configure_logging() - self._create_api_ep_lambdas(resource_grants) + self._create_api_ep_lambdas(legacy_resource_grants, policy_statements) self._create_api_authorizer_lambdas() self._create_aws_proxy_roles() self._render_spec() @@ -152,7 +153,8 @@ def _configure_logging(self): def _create_api_ep_lambdas( self, - res_grants: dict[str, list[Output]], + legacy_res_grants: dict[str, list[Output]], + policy_statements: list[aws.iam.GetPolicyDocumentStatementArgsDict], ): """Create the Lambda functions acting as endpoint handlers for the API. @@ -170,15 +172,23 @@ def _create_api_ep_lambdas( # read from DynamoDB in another, this role's policy must have both # those grants). This may not be the long term implementation. # TODO: ISSUE 245 + + # TODO: migrate policies into each policy granting resource + all_policy_statements = aggregate_statements( + [ + Output.all( + grants=legacy_res_grants, + ).apply(lambda kwargs: get_api_statements(**kwargs)) + ] + + [policy_statements], + ) + self._api_lambda_role = get_inline_role( f"{self.name}-lmbd-role", f"{self.desc_name} {self.config.get('desc')} lambda role", "lmbd", "lambda.amazonaws.com", - # TODO: migrate policies into each policy granting resource - Output.all( - grants=res_grants, - ).apply(lambda kwargs: get_api_statements(**kwargs)), + all_policy_statements, "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole", ) From 046f94b73de0f34f3aeda3833128de7f583cc11b Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Tue, 12 May 2026 12:58:34 -0400 Subject: [PATCH 18/53] removed base class mwaa env tracking (this is private swimlane only now) --- capeinfra/swimlane.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/capeinfra/swimlane.py b/capeinfra/swimlane.py index 68c79fc..dab0bf3 100644 --- a/capeinfra/swimlane.py +++ b/capeinfra/swimlane.py @@ -10,7 +10,6 @@ # TODO: ISSUE #145 this import is only needed for the temporary DAP S3 handling. # it should not be here after 145. from capeinfra.datalake.datalake import CatalogDatabase -from capeinfra.pipeline.airflow import MwaaEnvironment from capeinfra.pipeline.batch import BatchCompute, BatchJobDefinition from capeinfra.pipeline.ecr import ContainerRepository from capeinfra.resources.certs import BYOCert @@ -80,13 +79,6 @@ def __init__( # to. e.g. self.az_assets["us-east-2b"]["inet_nat_gw"] is the internet # facing nat gateway for az "us=east-2b" self.az_assets = dict[str, dict[str, Any]]() - # TODO: there's maybe a common base class that could be made for batch - # and mwaa (and later additions too) environments. I don't really - # see how the envs are used after being created, so i don't yet - # know if keeping all compute envs together is a good idea or not. - # For now we'll just keep them separate until we show airflow - # working. - self.mwaa_compute_environments = dict[str, MwaaEnvironment]() self.batch_compute_environments = dict[str, BatchCompute]() self.job_definitions = dict[str, BatchJobDefinition]() self.albs = {} From 39b42e87a67273e282f69f714a72f66e7b7c7b73 Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Tue, 12 May 2026 13:00:15 -0400 Subject: [PATCH 19/53] moved all mwaa env managent to private swimlane. changed mwaa env config from list of mappings to singular mapping. starting to move caperestapi perms to resource provided perms. added MWAA_ENVIRONMENT env var --- capeinfra/swimlanes/private.py | 64 +++++++++++++++++++++++++++------- 1 file changed, 52 insertions(+), 12 deletions(-) diff --git a/capeinfra/swimlanes/private.py b/capeinfra/swimlanes/private.py index 74cfda0..2f3d0a2 100644 --- a/capeinfra/swimlanes/private.py +++ b/capeinfra/swimlanes/private.py @@ -8,6 +8,7 @@ import pulumi_aws as aws from pulumi import Config, Output, ResourceOptions, warn +from pulumi_aws.iam import GetPolicyDocumentStatementArgsDict from pulumi_synced_folder import S3BucketFolder import capeinfra @@ -101,6 +102,8 @@ def __init__(self, name, *args, **kwargs): aws_config = Config("aws") self.aws_region = aws_config.require("region") + self.mwaa_compute_environment = None + # will contain a mapping of env var labels to resource names and types. # these may be used in api configuration to state the need for a # particular env var to be passed into the api lambda handlers. by @@ -212,6 +215,9 @@ def _deploy_api(self, api_name): resource_grants = {} for ev in self.apis[api_name]["spec"].get("env_vars", []): env_vars.setdefault(ev, self._exposed_env_vars[ev]["resource_name"]) + + # TODO: this is the old style policy management we should be moving + # away from res = resource_grants.setdefault( self._exposed_env_vars[ev]["type"], [] ) @@ -221,6 +227,25 @@ def _deploy_api(self, api_name): if self._exposed_env_vars[ev]["resource_name"] not in res: res.append(self._exposed_env_vars[ev]["resource_name"]) + # TODO: this is the new style policy statements we should be moving to + policy_statements = [] + + policy_statements.append( + self.mwaa_compute_environment.mwaa_environment.arn.apply( + # adding invoke rest api perms for the `Op` default role in + # airflow. Need Op as we will be configuring runs in + # addition to triggering (if no config, we'd be able to use + # User role) + lambda arn: add_resources( + self.mwaa_compute_environment.policies[ + MwaaEnvironment.PolicyEnum.invoke_api + ], + f"{arn}/Op", + arn, + ) + ) + ) + self.apis[api_name]["deploy"] = CapeRestApi( f"{self.basename}-{api_name}-api", api_name, @@ -228,6 +253,7 @@ def _deploy_api(self, api_name): self.api_stage_suffix, env_vars, resource_grants, + policy_statements, self.api_vpcendpoint, self.apigw_domainname.domain_name, config=self.apis[api_name]["spec"], @@ -248,25 +274,27 @@ def create_airflow_compute_environment(self): # environments. but we support more if needed. for now we assume they # will have the same set of policies. - for env in self.config.get( - "compute", "environments", "mwaa", default=[] - ): - name = env.get("name") + mwaa_cfg = self.config.get( + "compute", "environments", "mwaa", default=None + ) + + if mwaa_cfg is not None: + name = mwaa_cfg.get("name") env_subnets = dict[str, aws.ec2.Subnet]() - for st in env.get("subnet_types"): + for st in mwaa_cfg.get("subnet_types"): env_subnets.update(self.get_subnets_by_type(st)) ingress_subnets = dict[str, aws.ec2.Subnet]() - for st in env.get("ingress_subnet_types"): + for st in mwaa_cfg.get("ingress_subnet_types"): ingress_subnets.update(self.get_subnets_by_type(st)) - self.mwaa_compute_environments[name] = MwaaEnvironment( + self.mwaa_compute_environment = MwaaEnvironment( f"{self.basename}-{name}-mwaa", vpc=self.vpc, subnets=env_subnets, ingress_subnets=ingress_subnets, - config=env, + config=mwaa_cfg, aws_region=capeinfra.data_lakehouse.aws_region, # TODO: add policy attachments extra_policy_statements=None, @@ -275,16 +303,28 @@ def create_airflow_compute_environment(self): # now configure the new mwaa environment to be able to pass the # batch roles to batch # TODO: ISSUE #338 - self.mwaa_compute_environments[ - name - ].configure_batch_compute_pass_role( + self.mwaa_compute_environment.configure_batch_compute_pass_role( [bce for bce in list(self.batch_compute_environments.values())] ) - self.mwaa_compute_environments[name].configure_batch_job_def_policy( + self.mwaa_compute_environment.configure_batch_job_def_policy( [bjd for bjd in list(self.job_definitions.values())] ) + self._exposed_env_vars.setdefault( + "MWAA_ENVIRONMENT", + { + "resource_name": self.mwaa_compute_environment.name, + # TODO: this is just to keep the same interface we were + # using for exposed env vars and res grants before + # moving to resource provided policies. We should + # ideally never care about type anymore once we have + # moved everything to that pattern and should get rid + # of this + "type": "untyped", + }, + ) + def create_env_rds_instance(self): """Creates the CAPE environment RDS instance.""" From 92ed38c33be5ec4168da3f36a31c5bf649500a91 Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Tue, 12 May 2026 13:01:07 -0400 Subject: [PATCH 20/53] untested ep handlers for triggering and halting a workflow run --- assets/api/capi/handlers/halt_workflow_run.py | 98 ++++++++++++++++++ assets/api/capi/handlers/post_workflow_run.py | 99 +++++++++++++++++++ 2 files changed, 197 insertions(+) create mode 100644 assets/api/capi/handlers/halt_workflow_run.py create mode 100644 assets/api/capi/handlers/post_workflow_run.py diff --git a/assets/api/capi/handlers/halt_workflow_run.py b/assets/api/capi/handlers/halt_workflow_run.py new file mode 100644 index 0000000..135fc06 --- /dev/null +++ b/assets/api/capi/handlers/halt_workflow_run.py @@ -0,0 +1,98 @@ +"""Lambda function for handling a patch to halt an airflow dag run.""" + +import datetime +import json + +import boto3 +from botocore.exceptions import ClientError +from capepy.aws.utils import decode_error + + +def index_handler(event, context): + """Handler for the PATCH to halt an airflow dag run. + + This endpoint is a proxy to the airflow + /api/v2/dags/{dag_id}/dagRuns/{dag_run_id} endpoint. Done as a lambda + instead of direct integration so we can massage data as required. + + :param event: The event object that contains the HTTP request and json + data. + :param context: Context object. + """ + + # TODO: this will be used in a number of endpoints. we can pass it in or add + # it to capepy or something else. + env_name = "ccd-pvsl-airflow-env-mwaa-env" + + # TODO: add this to capepy + mwaa_client = boto3.client("mwaa") + + try: + req_params = {"dagId", "dagRunId"} + + qsp = event.get("queryStringParameters") + + if qsp is None: + resp_data, resp_status = bad_param_response(list(req_params)) + else: + dag_id = qsp.get("dagId") + dag_run_id = qsp.get("dagRunId") + body = json.loads(event["body"]) + + if not all([dag_id, dag_run_id]): + resp_data, resp_status = bad_param_response(req_params) + else: + update_mask = ["state"] + note = body.get("note") + req_body = {"state": "failed"} + + if note: + update_mask.append("note") + req_body.update({"note": note}) + + request_params = { + "Name": env_name, + "Path": f"/dags/{dag_id}/dagRuns/{dag_run_id}", + "Method": "PATCH", + "QueryStringParameters": {"update_mask": update_mask}, + "Body": req_body, + } + + response = mwaa_client.invoke_rest_api(**request_params) + + # no matter the status code of the response we can return the same + # thing. the difference in 200 vs non-200 is that the json will contain + # an error string under the key "detail" instead of workflow data in + # the non-200 case + return { + "statusCode": response["RestApiStatusCode"], + "headers": { + "Content-Type": "application/json", + # TODO: ISSUE #141 CORS bypass. We do not want this long term. + # When we get all the api and web resources on the same + # domain, this may not matter too much. But we may + # eventually end up with needing to handle requests from + # one domain served up by another domain in a lambda + # handler. In that case we'd need to be able to handle + # CORS, and would want to look into allowing + # configuration of the lambda (via pulumi config that + # turns into env vars for the lambda) that set the + # origins allowed for CORS. + "Access-Control-Allow-Headers": "Content-Type", + "Access-Control-Allow-Origin": "*", + "Access-Control-Allow-Methods": "OPTIONS,GET", + }, + "body": json.dumps(response["RestApiResponse"]), + } + except ClientError as err: + code, message = decode_error(err) + + msg = ( + f"Error during halting/failing of airflow dag run. " + f"{code} {message}" + ) + + return { + "statusCode": 500, + "body": msg, + } diff --git a/assets/api/capi/handlers/post_workflow_run.py b/assets/api/capi/handlers/post_workflow_run.py new file mode 100644 index 0000000..57521cb --- /dev/null +++ b/assets/api/capi/handlers/post_workflow_run.py @@ -0,0 +1,99 @@ +"""Lambda function for handling a post to trigger an airflow dag.""" + +import datetime +import json + +import boto3 +from botocore.exceptions import ClientError +from capepy.aws.utils import decode_error + + +def index_handler(event, context): + """Handler for the POST to trigger an airflow dag. + + This endpoint is a proxy to the airflow /api/v2/dags/{dag_id}/dagRuns + endpoint. Done as a lambda instead of direct integration so we can massage + data as required. + + :param event: The event object that contains the HTTP request and json + data. + :param context: Context object. + """ + + # TODO: this will be used in a number of endpoints. we can pass it in or add + # it to capepy or something else. + env_name = "ccd-pvsl-airflow-env-mwaa-env" + + # TODO: add this to capepy + mwaa_client = boto3.client("mwaa") + + try: + req_params = {"dagId"} + + qsp = event.get("queryStringParameters") + + if qsp is None: + resp_data, resp_status = bad_param_response(list(req_params)) + else: + dag_id = qsp.get("dagId") + dag_params = json.loads(event["body"]) + + if not dag_id: + resp_data, resp_status = bad_param_response(req_params) + else: + + # TODO: we can add some additional run params that airflow + # supports: + # - specific run id (probably don't want people using this + # usually if ever) + # - note (freetext string) + # - run_after (if we want to get into scheduling from + # users) + # - there are others like data_interval_[start|end] that + # are used internally in dags that process data of + # specific intervals + request_params = { + "Name": env_name, + "Path": f"/dags/{dag_id}/dagRuns", + "Method": "POST", + "Body": { + "conf": dag_params, + "logical_date": datetime.datetime.now().isoformat(), + }, + } + + response = mwaa_client.invoke_rest_api(**request_params) + + # no matter the status code of the response we can return the same + # thing. the difference in 200 vs non-200 is that the json will contain + # an error string under the key "detail" instead of workflow data in + # the non-200 case + return { + "statusCode": response["RestApiStatusCode"], + "headers": { + "Content-Type": "application/json", + # TODO: ISSUE #141 CORS bypass. We do not want this long term. + # When we get all the api and web resources on the same + # domain, this may not matter too much. But we may + # eventually end up with needing to handle requests from + # one domain served up by another domain in a lambda + # handler. In that case we'd need to be able to handle + # CORS, and would want to look into allowing + # configuration of the lambda (via pulumi config that + # turns into env vars for the lambda) that set the + # origins allowed for CORS. + "Access-Control-Allow-Headers": "Content-Type", + "Access-Control-Allow-Origin": "*", + "Access-Control-Allow-Methods": "OPTIONS,GET", + }, + "body": json.dumps(response["RestApiResponse"]), + } + except ClientError as err: + code, message = decode_error(err) + + msg = f"Error during trigger of airflow dag run. {code} {message}" + + return { + "statusCode": 500, + "body": msg, + } From 39d7b3db5f52ed937e48e476b0b3cabe7ddcf516 Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Tue, 12 May 2026 13:35:06 -0400 Subject: [PATCH 21/53] added pipelineRunnable: false to bactopia base dev profile. --- assets/analysis-pipelines/bactopia/bactopia-base-dev.json | 1 + 1 file changed, 1 insertion(+) diff --git a/assets/analysis-pipelines/bactopia/bactopia-base-dev.json b/assets/analysis-pipelines/bactopia/bactopia-base-dev.json index 3777c80..6dc21a9 100644 --- a/assets/analysis-pipelines/bactopia/bactopia-base-dev.json +++ b/assets/analysis-pipelines/bactopia/bactopia-base-dev.json @@ -2,6 +2,7 @@ "pipelineType": "nextflow", "pipelineName": "Bactopia", "pipelineId": "bactopia-bactopia-base-dev", + "pipelineRunnable": false, "pipelineDescription": "Execute Bactopia development release", "project": "bactopia/bactopia", "version": "dev", From 116defdfc6ab014c704662399d580dfd20eae2a6 Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Tue, 12 May 2026 16:13:15 -0400 Subject: [PATCH 22/53] updated capepy to 2.3.0 --- ..._workflow_run.py => patch_workflow_run.py} | 0 ...-any.whl => capepy-2.3.0-py3-none-any.whl} | Bin 10810 -> 11581 bytes assets/lambda-layers/capepy/capepy_layer.zip | Bin 13516 -> 14551 bytes 3 files changed, 0 insertions(+), 0 deletions(-) rename assets/api/capi/handlers/{halt_workflow_run.py => patch_workflow_run.py} (100%) rename assets/lambda-layers/capepy/{capepy-2.2.0-py3-none-any.whl => capepy-2.3.0-py3-none-any.whl} (52%) diff --git a/assets/api/capi/handlers/halt_workflow_run.py b/assets/api/capi/handlers/patch_workflow_run.py similarity index 100% rename from assets/api/capi/handlers/halt_workflow_run.py rename to assets/api/capi/handlers/patch_workflow_run.py diff --git a/assets/lambda-layers/capepy/capepy-2.2.0-py3-none-any.whl b/assets/lambda-layers/capepy/capepy-2.3.0-py3-none-any.whl similarity index 52% rename from assets/lambda-layers/capepy/capepy-2.2.0-py3-none-any.whl rename to assets/lambda-layers/capepy/capepy-2.3.0-py3-none-any.whl index eb2968957090ce247a3db37e149a8f57312fb280..48fd793ca8d3e3b8daa038ffef5fe2db49ebcafc 100644 GIT binary patch delta 4490 zcmZ{ncTf{byT${AA|>?RdnW{tE=@|1BGL>Xy-5)eLNB2cLJa{#DFO*il-@+7sFZ*p zAX25b1BxIBNcHl4bI(0TXYS7I>>vBQ@9ggEZ=ZMD&0kxCObm#C3;+Ot?5guZ(I5x` z6s>Wc0{SZ*5kX7>07P)2qd5>eRyMvXiVRVkcVT0wgV6895oLkEvDA^5iqGZOwKvV$w=6fo?QD z<(<1PMW@qN7(g44`&U)ARy&?|zHWH3B4f~A)~GRglV|m7*!Q@Eb_f{pNp(a}SAv0- zB9C!~!uc7)C8#@ZzzV$EsFny_#L*Y+WWe6e=_qU(>tx zd64(kklaL-OR*g9X*xRP`*3#Ui*fIr`Y*pRRN;VuQ584auFGbx+3(zG?S!5B3%`w)ka=3zmowSP<2_E>=e~IWG1elK~IwK zpl*q9#YvTjgN6dsDmPZ(sSUiop_pxSfQMDVsT>T2Hha8JPlgT|mr)|+vLUej?lR~k zjVEbF47BFf(%J|qQ~3#R9+F9k?j26kEzAB$fV&$0)&;S&Nrs@@ze}qj&xb^IIVlf} zehFtSUJ$Lc)@)#l)74s5-NWrNMuo*`71O#(aOH4_e*WZdb2uqc=aV+dvFPR@sw(;M z0ybK?hAOjoY!EL-NLjgk)2@JyEW1=3+wd-1EdmslvlZcCnK_0Gkxd%O8GWk#MEhgv zfSm=gomVsA@&%$Vpd+`)anL2gxh?@dH;H-HB@URT?v9sBhw?Q|@%a%XM+;HmLBsQy z!UMnELit>CTGx6f+nv_E*!e{I*imUX$t~Y>w>p?8W)MCDpwfiOCjE zR5HmXFbn`O4(7y!x4hU#=w1GN)YzOulDQ`v+xB1(?n+H^*LQ99x*KEe+CdoI?2?80| z??h91sHt@oaJdjL>}x3mxxu@J<)Jnv2#ugdY4=h#ll-XXlEH zK)^!GLF5@n{SKSttC9r@x%DCE6zR&f;Ph+*1~+4fTeNT`(sNrQZ&6bV1}TBcC2*=~ zI~XajKd%7X>-3YkH0(4q%)xuMz9f}=EwhKhp>$@{)NQ^6E5@DMr)Eg@wN5qGK9#EV zT`cBvk9k&iy|fyOd?K7YcZcKl-RhDN!~~yLyY%(emC$nQW(`x7zutVPN?(M3kSVm! z;&%G}P=&E#FDF|=bbcBj9{W8)?d&KjLa}&7$HhU}5_F=&w(yp|6@vE0Bh)^S*4+$r z)YR9f_Xylr;77F~KFE;BR<)==j;J`nmqst&k+^ZKtn8&H91H#0$En0RC?4N7N03IX zh!SZ_we&SGA{_MH3LvbFBimj#qONZ>AKbbm5*E)4x8yi7lbvQZ?8y89aoabV`N>M1 zZM@fi$3MaPX7yoC$8)IClYHjJ?>p=8qlI(tI|qE}%6j>7kwrzUy1u<8XAL%ugWmUd zEo@(!I`st~GBv^8%6lf@q}5$}Y7pOuSYu}G@bOyj#^iop?p!GOLHkq4qA!)!9)?Wl z4m*tV?i4PaUQ%)g8w6wXHhx9J2SeO}F0a^g!U0BfOv5afV+WJ0;IxTf<|9cYAooy% zeTArjp_-Nj>CW~i#!JYD0XOoyoP8CaZ|F?aUB053$;bY+?Dr)Cg}0bUcH*{&V^OlD z0nP(yfYLSR;z8fvZKIK#WEV&PZ9FA!BfJ_Ksj!3;(B)g!8Fz^QfLT(EBMlqE;)d^> zG|l{>IB461T{@n!kkWC+DKK6|Ts=8V2fTw1+OxdM=2IEzPn;Yy0zk zPIo<74i5E^wRejb{&4WcK{pmL=x5MVWZh!vou@Tj{WRE0x=V(go%4cTdbc$q-N5*x zG0PB6MGy3d7Tjm@2_WHx2p4b+0WgH~ve)~xA(wq-0(A>@I{2n`*vcO#*$6yW;%xC& z?8{^-gSq2omlFK7#C`=2xOAgvFxd-!7RGEvN99n?_quJsK>d^8Lmc9V=6cF^Uuo^- z(EAp~rlIfN)Xm4}7r~hxN(r}8zn-e?8RU=Do(AHEk(s6_o>^7xp{fI;_eY3JXUGlh zh71v*I$6n%M+?^G;*}ar)!7%`A+CB`k#3tdMc$Qk#+i2N{#%5Q$!k+HFj5gqu19>x zDXIr+MHaO=u7>VP_i+gKyrPT5s8c5Z$m*6n@=|Ul{(8Z*PP`9ii z$qU1y*Ihe>xw&!3rXebw_%gzIg5%Mxr)NdfWWdlBW(FP`Zov?J8nBw0$( zA9fzhrK3uoZ19&e$}o;4(u)2O<=NQc#0RnWM%gIJfX-g!KBPi-Uf1FB#B0Jhx>B`L zb!*ZzR2+9#K?#CQc}-~t_6pW*qk)q3fK?0ii5G}2rggK=@t^J_Tatc1gc8qhXSW{ ziM(IN8{%{p*-zfN4~1upU3;nr)T$frYH4?sX*Xju?gf0zfb&RHs)E{m52n|w3V)H| z3LU<$cm=nWVt!E>gY9-|JyoB&BM5${(bRhq0=fck?W%m^zAEdS9sOMWh(jf1q+~IE zyle=0nyAg%HeN6ljlqK~VP|$XMOHVSom8PYX$GQ*Tw}VssI>v?jf*o*L8amL&bIJ+ z!G-zb$l!Bh-{gATzTR4f(x9MgQ@t<3l5R+rXx)x70rM&JdL8DSa=opKXEYySc?OGJ zVuLDAdi*cBWHrgE6} zie`q5GR}yES#njUGfnY1so9)YrUAL+znleU9DC-p1vp;w4NTxXkJn|{cuRa(IZSuT zA=VE0adSwMSyINrRx{9acdHgh)Y}`B$+q?LY}W^+P+QyinsKnXhb0iJPv#Z|lUhb> z_NS2@^l(;=>|&rHa4K%dsOX{n`rOiNbmUVTgvOkZx8Cz)dG+~%e6Y+1aA-gj{_sT5 zer)JPI~GQ~)0n~JC2U#W5yIASPyXZLJrt4p36kiS!rkji%mV6nWb3F!So^w3Mg&3Y=2I(1gAO|vxJ#_tM{owCLQhnBCvJS zoO67Iu6fM&He4nityZ$#Dd|*!pQs4#f$g-*Qb! z%g}<9W%+W8zz(moSt_2X&aQJ0LEPsUS#=3NQ}n!<;)Wb=ohnLg(dy@vK@tX^m-^3G zGjZ#%#)su{_176t`ag6WIBEM(YgO{?b%`A%5_@UgnK!!@#$$D;B@sfv&TggHPgxf9 z%JyqEl#Y1yWXnt}26ExU=1+pK@T_5UxZNUTvt{)`Hj_X1te{>fbCFD0mBzaQCXl!m zGcSw%@ojB`TG>yF$YmWr^Ip-(D?i)}{FTpak&zTX$GG>(fNzFSzx=b=;LR^;=s*D= zl_M7?(tRIEz2ezB6AP~LJSnjBPF$H7B7VNH_KT1c*2Z=y)Cbxfn9Zuzv1~WJ5th8E*=4JNe>@4U+`V1j*cPKpbB}kG!1dL zTEzgd<+;Vn=;>tS zYk5zHZWx{WoNRZ?dz`ZI;30JahUfbjpnAa4(6SDyga zfW`^h7;1JX=XM=vlK=ovzj_Bae~ZG|F}j4GUMqj@MA#S_vs)WL7tQlz@{z$E=K1voE&9m?K(BunB^VECr`SFM?xj>% z5R7cA{0u5StAq_{htY`t*!i*W6tW_G@+G?DqSjLRQ?cjxmpARD-nSo8fvO>z>5p)T zY`;9}7T7+Vxx6?uEuCAQAWwlqDy3s0sq4PHFL%Z4I z%*6`B`)l!QMSdh$;w)=Z!O9v@Sh|3)E$b_@YcU>>4XcR~*5(Wck&DSa)>&u1o_u;+ z!Xz@RHrn2k@oRT%Y}$qj&jXO9+ZY@XSm2xMu|MU$nN$>|ZG`D*n}1fOF2~Y)Y4G}0 zsW&qdh_!`{MTJF)3H=6DNbh5L)IOBe#m+=2hop4R^W910YN4n$iX$pS`ld zJHo^YeMaBDgLMzNFUp2cv{AH5^T8Yr(K}r%$q}c(L}h@6G@WQyba#8g&g^JVLtvKr zsUqF6a(H~0dVBMmvBV0Fewhzg>-O%KXmJ&HpM4DT-z)^w<}(qF=4_t|NYU&3DVr--e4RA;EBo z@N+_~xpY8@gaAM@1pvVK_w?0>i$POh>_mh(p)sxF7w{{EAo-s;Zhev z`rmSV!z_siibDT1vA;FAn4qi8{MXX{b~HvzhA6ct^iOjCiRfYeEuvqP5Ai2e|A_Cf u{9F7R^nWu3Hq^hVV{E4L=h^-V-Tx&eAQbsMDTZE*lbq;^O#!k0i2er+0qDE{ delta 3722 zcmZ{n1ys~eyT^ZYH%NExE+q}3A8 zBu1BXkSR;he2vZyXJsDD{Ve%@vdKE_HX{E|pTbrl{&pK#T~u-CHa@ADwWahy@iWnrqhN?uEX~2L|CpB}>8De&+*Xh6%!@qavN>`}?9lq`5i_ zL=ZPBQnW?LL~_2+_}(?(F_O}IqpJncIO3#Y>ow2wOYDAdpQ3kw%B=Z2U&4A}BCd!+ zc|wY4`TayNs7!ANyDf(somFper-`{!UrIKW0H`Om3#|8SUak*USJk$!)~FAxuA$q; zeV<4X%9p|Jo18{kx0K7aB3a;z12Umb{H)k_r15QY;tyksAx!U(+@T&FLh3@03Dr5` z%05;fkr`sH&$Tbl;gB*(;ranm$~RF5#Lqh!7rgO7;tWIs#+aP>4t@13y+@%-Cl8N^ zX2>mueiwxV+ZzfQP8LyH9zP?rv{$JHU7E?&j@X@pUb_)@ShkO6`&>~gqCU)-^AZ;4 z3b3(#v%d1hUUPaNsPu-a{fo3yJ=>1WJQ@F%j`(iHnbw^%JwIow@}?5->#um@uOg!z zY$U})p!o-t@l%{VdwNx7I(0GloGeHrb8WZ0sJvqU&bGG2Tq4o;-VJ6u;#TJSgn3d+`fWTIEJ^3Kptj680#<-|@ z(?H_Y`g3R~!uq76J9g=ZZ3MlK@LVpzV47bPSToLP8CP^2;lyBMy-_%@%FjfP(6PB* z0?V^2txI}6?Wyq9FmwUetuB4C1rOZp4K53bDYd(G{JaY0DKbhn?N9Z2zMi#bUjI9< zwVqt;K>UM819z*CjAuqGHUD&r0-eu9?C20l%{QMEX;Mse%*=D-bW=%-3lt%!i8AkEAQg zSjq|rh+Kp0Vedc+AynY8kVpKs%YM%0D`+FXNv!L+svoVeft_T-$2$&?zyg0V1C5Rn z>9YVEqmO$Z{S8iYj)+&YE=R?wW^cI)g4U0BA^D%I#*c_>8q1aju@2GJ{L2S%jc#bg zn;B*?Uu-77w`*w(qdg$4eYX`+6jh4 ztDiTSbq$&j1_b8Q-(k?|T|B8vm{d(oFe3w&Yxy zf4qwh7F2%uNsRQxy}-4BP{~1;w>d|b_i-?5=hE}i^aL5sOa6x6{6bP&^F8JCJU!N= zk2*hZ->W)pnI8JOW;!DIZH6%|CV$1&zh;3tJ{7t82_>@D>SA2QW57kpL`tjUOKtp< z=FfbLj9fl$Ap~k_i9ZwK0X5ka_4pSZnmQjJ1XRv*LWJ-Dpco$j7%sB6y)EW8#$UqL z&s)OR$HmQC9OECH)a3DAk`nG0Oq2`3!v|GEDG?v5bh$s%GCs5$ji~aA@v_WvfINIY=NWjH)ff2bSP|)g*j{sDJZ`;h`IOKdzv|ZOh;yW+ z`@{*yjpj<{f9R; zi;)dfpbVLOwK$Z@WF#-I63Fsr_iJocV|VRPp>If2&~gUSRi} zuHEC6&V&^$9DfHJs}naw^XnEzMYQe#$)QXX`PH9lSF0_V2FI*60x92fTtX?@zp%R; z?#fdomrmlG%(=r?psg=8R1e60?NWHZP&nTs?|a!+_yf{DB)vY?qE?KyZ$Q+>PF|JmmB0I z*Z{)?EH$IkrkY(rt(nBDm|iDli7bkvkz28i!vbW~-L}`FRCaEtx02N5D`g8SVbgE4^)unT5`K_o0oz}-oPd9tZ zy3%%fkZFPjO0XZ|L9Vegox>m@B+Wo^?|LHRCR zsv33JKs%cqw{Cj*0cQ_YYnjAf7pe$dGuwDKN{8>yEZZd6Wo(WRn~6AgZytMmV%idI z8)G!l{tU1Bov(iKJajgCa*SP40R;?)u!KRG_Y%sxydOL(p_XjRII*}7jTlg426;d^ z@`f|~*!Rbl!M-yJUK%lukBsXkvtnzL4trNP6>{;a_g38o z6zp6xT(2jr@L4S|Ub#q6yQ#YB)A)~z`yi`DzADMS0%UA$vr4r6E0=aO$D?Zqxr<9y z-OHvZ+pAKf(HxoQ5ATnma}8eIR?G&a($|W)^9iz-NwwpBKUEZrn81u%23%soB&w_Y zfw{I0%!hUHB7e#f008IzzaC?_hM}=$`5>|)4+W?W|Foev3e&oBM8KqpQvKp<=gg%| zGg5P0Qqc2&O`()3bC&(SY}xuwCqD$c{cUOtlpeWuH9Qa0j%2Rz?cwr^jJl@x5`)s2 ze}6`PP?lp-nDV@dGz$S%(r&?ymhao2*py?$ae3swp32HZS=>~A7m%6aj6QbM43+@; zho!tBdDT|--C^Bdc^CL(TDoMJ*RNU{y|I56<4gc@cmI;;wj+I~rlPWOQ>NCyrYmcQXkNktiE%&Vd_@_Z ze%>51e_Pqgopn(w*nxyOKb`T{BKf_$;E|OpC#Dy?P1=tEAy1}g+1(g~+@pkUn*?VU z;&Ry9ES?LAiskn`jY3Mm*)sKwGWHbSJsp{)9!lEYr?VWxoHz0*P>ZZ#OG(#K7SdA96|56`M2|c6++~7L=?=}CeM!)CM=Ka6t!b*v< zL4d#E{2do6@Xt7`uPC3<-{kzq8IMNrpU!`9{ofp+bcrr>5&$SJ>hXnYLWTc5`WG4k BZ*Twr diff --git a/assets/lambda-layers/capepy/capepy_layer.zip b/assets/lambda-layers/capepy/capepy_layer.zip index b23b6ba9a263f16e333be844da9a0d5821586e53..05a182aeec6b7d9abce3e24aa0972c87260d1abc 100644 GIT binary patch delta 6137 zcmb7I2|UzY_aA0#VeI?9C(9UQ&z5Cm$=)FQ7ACvw#vWrgNLjKkDQhHTsq7JwHM@us z5wiBD=XqXvpI7z2^SQs7`F!sAo^#Ld-19y6ys{g10c)xPLFWL+BP{6Q^#m|I$Gu3P z{qgkh!Utf08Z7H6KrPqnDgJBWRpvijn8fpk3+KfJ|48)@WB-2epfP~_7xZ-T0KnhM z0st&9bRmW+M;}hO{_*_f2GLZQ3cok}2WBAVRmd+e$&YNsJ(?`A@3BZfENlS4j{$v% zsfav7X2rnBX;zFpVDTEf{|Z)dm+Dl!S5_EJkH(Wn)Jp#FN33Y-WxEK2al6}c``hB} zw?5}_JX7Jh$=Iy&$!({1r?(=gOjO>URk!R(l7%{@e3T|^q3S-%_ziwi#o7!v61S8O z#YysYoh-xGqy6^{U84=DANkUqZs_ppdsxy&Ous1xDSc|61$#yK8Z+B*NPi{(nU@L;i3mN3h1 zY(mtRo}{8j1BjLF==qAFM2L1z(%VofT=UC9Ov(yK-X3`FE5F#Mbd(UmId-7;ry#{4 zVZXTY%!G~+NR#heuSF3_I**`84LNg%psU7$5vZ zxIf)q0d#ReEJ=N%6JG32Xr{)w*n-&I>E5K6L7guU@`A57=fj4G?Q!CQV5L{4=X-G| z3wDq!sS%Xoq%ZU2Y!Xek*qKV)Usxfh7jX&JuaU@#utr2QSx9vBee|Wtn&d7rl&ho- zQIemLTFYCd2=ED!&mytrWlW~$e)rbZXmg0S+%cw)e#*vHwV2Une za*%J$qjh8oNfak&(IA**TVN_{ePrGj)y(`xkP4f|g@ucyi*R-!ogWW`uYGsWDo4z+ z7Z~$qV79Xcv}L{Aad$DKpQCQVz1LAsaX3zEe-B=aL# z1KuBOhdaSvk=gL1@9=s|OSnAf5nZimyTx)%c%~`dUpV`VzFgs`*ix5F=}L?M+n6s? z-g4s78kk%Gr07h$VJaK-yf=KAt@ewqTM5cLr#k`9Xh3-A+~Pil2RWTqAfcT!Qo1Rf z(F%inU0(E-jeAp%2&~1mu!Lvr%0cO*QFly>PVp=$TN=DgsP0sSOYbVt8E zAG`Wjo-5Q3{GDe?0#X|N?qvQD6o(xx&-RCcJLQZ9|+6Fx+ zznD{!8*z^-a$Jl4>a~*W-a%G}I)RH%X1xmxt7Ww%KRkb396ROyS}?;jk;&qVizG$i zT5^&s_rwT*C-{q>^xjs0pIFwcqLrD1K6powcJc+;6KDdW6Dj=~r~I;qxtxj$nVrYN zBpa$0`C1TLsJKQFx<$wUIYg>IeTi+uI6J!*8@A2)eLYVy*i3BusxD5zEH_YrzoxB{ z0%@jVlMbb+>RooY6mW5=dgIC=kc%hYSD$`MM`(mfts(v^)Mj0M^cxLPlEzxQmTQ>d z<&w?hhKDfmdudcvUsm3`Z%ytav^H2{C6v>I{WCLZl$=^M_bQF5x)8`4t9r&ywJq8_ zHz^;(Ux?U;=*uaxN;!w3~P_V4LlT|ckvoj>M-OHp2HSe8u8RhAbeGml z3N+T;)0mEMal4e(Wa%U#_HaRQu>7!|cr-2d!;JGsKUAmQV5EhqdoD^S$IY@M29Psv znbqZV%)BQgg6_aor850h$l9Az{{F+0-*#M1Xj5Ba#o15U<0>s&xNFDh>%6P!@5cx z=9{(?$T{%Ub6W&j>phuPCyGVt@=>azsBo+LH*NhMQZ@!Fkm0cxV zmTU%3ueRMlTN@x-tCa^R%0`;*&3Jk{3Ypr3a=qU8Xr>h0<*eG0X;`C=NR=Ncx$kKp z&?HDl$8bOO=R#oGT26cCyq%!yp38YQ)Er8bn>C0~J1t9ljp{sDLjhJy9^-#)C zY+xN;p)I(3+lc+4I71CWtSz20A8wmBHXY_Ff8o1lhgCBYMT}0Gbk@_L&D<)0F}zZ$ z1$n6Kcy8u>m3trc#YsS6#`}hzhPL<1=j9VYDw*z-F8o|ih@K6XwRExhD~y2hdLrVr zQOsjf3f;vU>aVtthnCPw3YBr^Im?Ck8tzOQ>hcuHJ}yZ*KzLazFZtUn8f7AiNHyY3 zWL%dpp+k7c;ZZowd40w^tP#V6H|I0;%95?sY{hTn*{<~f*v6GFE z%6|&+5gEzgWb~f8?dNTim?_beFyx~WbiH^hCxHTRUlLZ5vl!b0?Sznb#>ViSM`C>t z@oshGT@v2fwq6!s(tt$GK+eV9_I^E*`_|ri6vn=ZynU^2|AEfZCuP@gcjbl8i5zuN zQDC;t)&kf^``B!fUZN!IL~+X!}|^R|>0d)UKN)W~Z3Oet!s zQz0wOZ2z!!uGSUW6)hKGFqm=S;P%Y@fkKJ!xKMHyA3bbGDxl{asTUOsRN6R1r8Q-( zO4kD#SZZfGZFw+Lp8Q^J2P*Q6u8c5V`SbS<2!C%hS`@{2hZ9Z^mjrHh{;Nsz0`eMB*srQH}I%BMMO>VAo z41l8PwOhI4eflUy&->^zmc&vHvsy?S?H9#>7)3trdB$zqC)c`}<2iYRG3@jFFt(8!H?L19z03N-S1>d|I(xa1&!Vt%u%fT@Ysm0>bL>Mk*;oqx$N4fONYe|O zgG;zU4N>=usjVdWa#Jq8EHl#eSjeF)Hy^P~A{4R-Y#^=U%zkoiJMm%pdB;xe>oSAv&(E?=2qJP14I5L(=6#qAHsyJ} zFzDzE#M%k+Z`B)rN`jGM&Fw7dTnK_5IMTXe>)Dr5m|d;mG+HuvEfmj|vX@@L89#+9 zAw`TRgtLdw2Tcg&e*HYZKqTQT543vUIr>V>!XeF92l9zkXNm%+bDUzWUe$_q6jP<( zole)~kJ1Sq>>xsOMurI6^$4O_`_|E5NU{A+x;`R&cCZ`k;lliP=FHu66KU0bZ6+>f z3gdGWEk%v*9E_y#X>B)&P7}0`x+g;a=)MO(nzF|S4Rn0a&`55ovpAHnOY&Tx05Mjx zbYTaU{=VMRz>&y^AuYxWp~j=^ijGdpcjquLSNgu4lN`VJ=x(#-eT9qzkP5zWPPxC3 z{&hvto`jT#xT{Nu0fMTX`UOE^{17Rs4d747<0y9=83VvOejE>lW3#mWb^Z}{me!7L z)^7igtkM87_C*i@0LHukfa)>orL$4hR4yxO=qUawyHs7J^dca%u&9Q zJ8_}L1^OQ@{I84qqrRt=IkJGTnSE*e^LwQ4WU&lDAcdNj_Stet`4m>ZyLbf;EikLR zgI3GjNCTPg4c$@IqMCSLDNi;u321d~B7k&)>+qMiKxsb!PR(4a08y1e+GOKZ5*L%P?f9uewHx!H^O?@rxmbyY z+8jlt-0)a-E@PT{9lRhr=z_suHcfS$+4+N^HJUL?)|NCfW6ayQxk!ZsQ+tX}&B4Jj z3zEz?!TL7+?jGq*DR%3_wu`qj0~FNJEwvL5@@0gnTMt#A)#o6nD8V!}wA7N+;*^lj zE)TX;R@sQSN?dat#bVOjJs&UT+<~H)Wwz%AQ6?lMk8eme6jj;W?3z>>^Gb_z(EO%B z5DqAKS9Q-qHN%U9DjBJ6Q|MVeXBjDLD_pbzqK=*yUr3`Xszmr{8YJwI)ve^(dfnir zMT!}T8N@iEH@nF!4yJ{%qTt+uAT{E~1nY$6y0De8zFUyzAAovGr#^$NCJD$`V?kqgJd0P_`&`WAA&J38G$GLD)dcWYUGOC zaZEdVyWenfJPKW`zj{Ic6(pLfrzMX?{u%fH>c4*y16JgY(f$TQ*jfA@ko{==aqsLO zwBv8eQ;6XJ09p%!3N6EjCG|f$YiFCr{xc9dkdL434*<~peAHMga>5wLYWX>T?~osD zEO~wdwqw%&jpNXb{Kw#Un8)C}Xkv2E`9Jab4;4ZXW4<$0oRlg*^~s(=bY%ZPLM41| zw;6O)+<*Z9k-uGbls^D@L8f^Bu@fCAKydaBPt>E!1ZaVA;1juh0ir*W4$BS=o< z1W52MA36UI>_53{0RW<>>_?FQg9rX4*2$UpISwU%4j(yT>SHV({ePwCU%$W;&z!0u zPnlW#7xZ6L=O;w_Cok<3QJz8*rvG86(Z3A622)g2`(f>wPW|7GP&nlX5W^3A(|_Ur z#DKp#H76c9#eb*v5d4=8o3nNNhY$ZOMoy99DOGNKKeqFi^P_}WnSN^8ANQL90J5Xc Nr=uqHTgdUh{{z2ej5YuO delta 5235 zcmb7|bzGEL7snrlp}Rx6XGm$Vn1P{N5Rj6Pkd%;S2nD5K6a?v1N}(aY0J>L`{El_WOeug@Gvm)z!fRF#pvS09XN3 z9)=31=2{Q=QTfLXqG&*J|L*uVW&lV-@E@3zXkT&Bg%#K&5-+-f1pw)^r~?oJd1Ui9 z13j#-<}@!(TDPx93J)BUNmp^i_H{P*D0PSdn+SUI;KSm1Zk;l4Q-T^io;#eTi;Lq? zQC{=K7;fyxk@~RI{frJ_UpEIQ6Tk_U+Y~dn-pnrWPy;FNmPOmkX=c@}+hS9^1a~&0 zQ9#axI9B-Ya(orjHLKnvw>O$B_U}`CIzkn8^{)XGZnbI>f=))t$y zEY6;(65V*Cm5NPRUawyVE<(J_S5n)EBK4ctgeR8fl6nOq8QJdfu<7FBtGi~dL#mUe zyYEaYBSsHo3{#<;WBHmo^ctQ4WYQz6?<0_#a%sFz42t^o=J#ND>Y61A%(nLFbFSGt zK`-eIp9N9H(XeG+y@IWvN( z{kl4fmaytm6$OawE+Y|rr$MGiO#4kwBtd5nkzV}=w&>-Yn7Ukovm?S-^DjpH&XQ~J zUKOt!oP<|9kc3~WD!?0y2H>&H9LpW6r>lbCGKv;uvQ<81~yH-xKT}00=FXaAXLwlm;)SJxZqrn5t%9Mk}f(oo(XIw_5=B9XKj3%Yvg5<MEtt>1&a{gB+x@{$+%`Qnk zur>@`bNn2vptwlXYg>;G+dXCD`@|l*#i)5W&Fu^%@p-&Xo+fvcHD#S?%;I^bapEzp|H7%Im~4h&S#<^k%w>u47v51FMOuctlUs&ER+S z=W09laV8lY&*`}CWM9fb>7`1M@oxb`QTEngrVRtCwLCUReE>!*MnkB?1+Y3)!wk2N#dEjlT^=j*t zm-?6VUF^lw)1i?vX=`?zCU-U=S+9+Ucd<-s3g>!JmSHCB^E;6>_WDwnALxa@H68uX ztgu?%+Fc-e4b!1>`>IzwLtO&A`^6xa`JD_TOKLuq0^Q7_Pn$W>h94vhepsv5)~f^d z_Q@i@MxoF-yI&bSDw7bchr|Qs%e&@n8(t^yn!38c#|>YE7*W>_rCwTpbzR?&^X#M7 z{J0Nq_$>7V2=&@NOW!@cUB_!;Q*c<26fMd~?WOVF!#|-h%Sl|-$#F?!xAo2XjnXd- z<9+XzbO(gqPf#X?XDyg{mCq1IB`7Vv*5p}gwAC(UQD-8g!X;I9C)OtYOmdXdeuZb0 z1D*=aQBb~^5@-FFz{-KIZXf{Q#7C84oJKi|g4rFe zx|s<$dwok&zvJ(c1%ily3y10cTo?e5hXzQXbDJ>ANemx7P?*0l^NX!@d?7@L@bJUR zfM9@-G6970uS!*z-;h$?G9Qd0H5BEkwmp3AE$2^Igbia6?B#qE@+ee9D*twA!v;Q1 zUb}wZSDCfhFf@DOdz+>`FFNlnftXIe zf1S=9_-s`i;wW#tzGO|g*LcdTpozJQL4m+#`?+v^N^l=&6;g$0BKgaj;YUETO? z9j%-M|4mZ-S~btW#y<8W0AS1q0PrK2=>Ne~m1Gg>7ZE?_|8J6hIS!K-2|lp(5zMiV zp8E%=23k?%ahKE(Ixsny4(y*WIXYTS6Ox4fqmO6%5~ys}o=#mXAtD&SAn2RIva6gE zBE+>AtPnRC;DAaU?q{uKuio%!pHeZuv(Os7pisx&LQiMS43T@c#!wulvV+9=C>n}? z?lbIMnGsd*knxHS;R^<8O(~0q=BI<~SW3l{aBSijH`uZj)r9-10I_#%l8+q-UPo47 zJ0ya_@{Hbzc{DIKx|I5}$8%j-h=HmH4iy!*$br|}a!}tnL$kE0t~b_bC>rEm zm8=#0sw<;6H}jO*V?*K^o7hLl75sdZx5f;Ln9_O`W}aQ4C-^wL)0##wm0)+^N<>XR z2Oe>|+4;~58<*gXIF(t_Y24Dz%k=gohfQr# znOwZQ+lJq=j;@55Q$6XOPvcKf9ot&--xNNZTVG8%=UXr}4Rsff+#@|en%#}ceK@qt zot9AG;`L$Cwsmd(8J{7U!Yk@~tDS{yi5neCiJa=vyq^So?II>x`!xycAeH)+IxmfV z4h{o)L^5_tOq7OIW9QD%H7Q=ubf2Lv9kg80x6#{cTy^mP4sOX*PzgNAxxjT^Z{_(Q z8OV!9tX8N^+d!UwBKXju9yxSpU$-I5EL>}(`94P3b9c44Y3O9w=+G%4Nll>NpEeLm zvlU&`=6dsfKCw{KgMC9!XmAgVh6O2(Uwr_Ic(G^X=RP6nA{%aXTf1^JHKHOe>Wn2R zw&S7UQdtn6bl3pyj?B3zs{~q0XS240s_xb<%}>*sf}oeNNPdzOaikQJ;>QY7ywt`H z`S|3lQ0)iZTsM??@PgkT?1I9~-0BmPl419xU*eZ=8={J~1A8%J*yFvJO1NS)z>Mp- zBPf1y*{z0k)~um*%dInvk{K9fTZ{HRlIC^~>@G$xuo=%#o)(rIOtAZ7i2Nl z{X(2O2OfRXq%>*wf;#nv+;CVqm^m+h(V=jRu(>o|M4ut~@RnzxtMr%GyS36$v(Oq! zXi%(Q^>BqRi|EG{=Hn4P$*T~di{9q?=pu8pH^E1H6BWQf^E>fP)NvX=K!^~l?_Ay+ zEVeLIT|m{H+daDHp^9v_XSP4y@OP>YOpws`)p@p>6fcj<$04?H9#PhmM-L@ZQwYEI z8nTs4wBhQRz!yB?JZ$7mP30T|zi`YcMP*($x>;OwkQc)Q0N5|D>iHRve-jysdX9#R=B*;$__!TzaJYC z8909~C=;m|LR0MC!Q>GVdS3OBv!?R&iv#?f!gQUS_>@}QRC#0x=^F3uyr;#!X_2!4 zvMv+<>pd~iP{Ygc=ibTjHu`&3a(;q9cfYVJB(u!eqCK$v9qSC6XhWMA&GI=Tt)~|E z%5tK0TAdx&$;&KUx1`wcc(Zge!OE-Kfl&-`{j2M9uKdDRHoJC4n6;YLZxI|gVvA=R_SSeBmz<2P<4{O?0 zv#!l89M+|3^r=0w^iiJ`ydya5p)_4wNT!xKrIfXP*4Tk=R>98_hbAkD^6M=AjqAI{ zc8t!Q$g8;B&Pd4JG161^6kHCWTvv7c(sJt3>6;8wxcK<9y6%N42_oo|)wCXLNj^v3 z8<-^Si(TI)Ga7O}tYMQR=2=1&;4a0_Buz-1txOmKf-6cUH{qDm8*GPfNAK!>6nL{N zZ&X3PK;Ey#3(k}Jf?8$3n)@O@9Ky-+sjMO=<^}`rpt2)P$H96QBh4fj?mcHv$E(bJ zjV%7F+)9(lpcnY_9+svG28an0eF{gS*XhwQ{99)F{=flfzkiVe4X~q6XD>G!Cr5OO z!usRX`xmM5lZLMPR~W%D*b(hJ%-+@<4V3-wMCxP25CA}FVbJiL)Hl?O5G_^%Y~-66 zCpts^cL#WnIUtJGXj&KoT>sAbc66^fZE*HTTK$J16#R30C=FpL_P^P*f1ZPH4HXC* z^Z&M?Ut;1fhrK8$E?9U>NVey*hCfL;T=um2~00DA&5iJV&f?R07!lB z6*?>d*?}uWKX?=}CjkADyZ|763^Y&r0|Zw6se(`D-7zVA4BATZ6N;9{M86-UU-w$5 zG_j*~W>=;h)+M8vtaF;cGd+xs4%= zvKOa6$%Gusd&f{BkRPZ32FjDIlRpyHk9>8E27>;e)$>!K3M7e`|8!l5h3*FckfZ+w M= Date: Thu, 14 May 2026 12:59:54 -0400 Subject: [PATCH 23/53] fixed bad query param name. removed useless word from error msg --- assets/api/capi/handlers/get_workflow_dags.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/assets/api/capi/handlers/get_workflow_dags.py b/assets/api/capi/handlers/get_workflow_dags.py index de37443..b2da6ac 100644 --- a/assets/api/capi/handlers/get_workflow_dags.py +++ b/assets/api/capi/handlers/get_workflow_dags.py @@ -35,7 +35,7 @@ def index_handler(event, context): dag_id = None if qsp is not None: - dag_id = qsp.get("dag_id") + dag_id = qsp.get("dagId") if dag_id is not None: api_path = f"{api_path}/{dag_id}" @@ -80,7 +80,7 @@ def index_handler(event, context): code, message = decode_error(err) msg = ( - f"Error during fetch of workflow data from airflow queuing. " + f"Error during fetch of workflow data from airflow. " f"{code} {message}" ) From 54cc8ff56a568cb2aeac089563bfa2295957ed34 Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Thu, 14 May 2026 13:01:04 -0400 Subject: [PATCH 24/53] fixed missing param to function call. changed get_pipeline_by_id to get_pipeline per v2.4.0 of capepy --- .../capi/handlers/get_workflow_pipeline_profiles.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/assets/api/capi/handlers/get_workflow_pipeline_profiles.py b/assets/api/capi/handlers/get_workflow_pipeline_profiles.py index 2416549..f4a0a2d 100644 --- a/assets/api/capi/handlers/get_workflow_pipeline_profiles.py +++ b/assets/api/capi/handlers/get_workflow_pipeline_profiles.py @@ -5,7 +5,7 @@ from botocore.exceptions import ClientError from capepy.aws.dynamodb import PipelineTable, WorkflowMetaTable from capepy.aws.utils import ( - bad_params_response, + bad_param_response, decode_error, json_serialize_the_unserializable, ) @@ -19,18 +19,20 @@ def index_handler(event, context): :param context: Context object. """ + req_params = {"dagId"} + try: headers = event.get("headers", {}) qsp = event.get("queryStringParameters") if qsp is None: - resp_data, resp_status = bad_param_response() + resp_data, resp_status = bad_param_response(list(req_params)) else: dag_id = qsp.get("dagId") if dag_id is None: - resp_data, resp_status = bad_param_response() + resp_data, resp_status = bad_param_response(list(req_params)) else: workflow_table = WorkflowMetaTable() @@ -48,7 +50,7 @@ def index_handler(event, context): resp_status = 404 else: for pid in wf["pipeline_ids"]: - dap = dapreg_table.get_pipeline_by_id(pid) + dap = dapreg_table.get_pipeline(pid) if dap: resp_data.append(dap["profile"]) else: From c72d4ca076a2d955d4646ef0bce34b0a16a60fde Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Thu, 14 May 2026 13:02:00 -0400 Subject: [PATCH 25/53] changed to use env var for mwaa env name --- assets/api/capi/handlers/patch_workflow_run.py | 5 ++--- assets/api/capi/handlers/post_workflow_run.py | 7 +++---- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/assets/api/capi/handlers/patch_workflow_run.py b/assets/api/capi/handlers/patch_workflow_run.py index 135fc06..1acbc20 100644 --- a/assets/api/capi/handlers/patch_workflow_run.py +++ b/assets/api/capi/handlers/patch_workflow_run.py @@ -2,6 +2,7 @@ import datetime import json +import os import boto3 from botocore.exceptions import ClientError @@ -20,9 +21,7 @@ def index_handler(event, context): :param context: Context object. """ - # TODO: this will be used in a number of endpoints. we can pass it in or add - # it to capepy or something else. - env_name = "ccd-pvsl-airflow-env-mwaa-env" + env_name = os.getenv("MWAA_ENVIRONMENT") # TODO: add this to capepy mwaa_client = boto3.client("mwaa") diff --git a/assets/api/capi/handlers/post_workflow_run.py b/assets/api/capi/handlers/post_workflow_run.py index 57521cb..bb41567 100644 --- a/assets/api/capi/handlers/post_workflow_run.py +++ b/assets/api/capi/handlers/post_workflow_run.py @@ -2,6 +2,7 @@ import datetime import json +import os import boto3 from botocore.exceptions import ClientError @@ -20,9 +21,7 @@ def index_handler(event, context): :param context: Context object. """ - # TODO: this will be used in a number of endpoints. we can pass it in or add - # it to capepy or something else. - env_name = "ccd-pvsl-airflow-env-mwaa-env" + env_name = os.getenv("MWAA_ENVIRONMENT") # TODO: add this to capepy mwaa_client = boto3.client("mwaa") @@ -39,7 +38,7 @@ def index_handler(event, context): dag_params = json.loads(event["body"]) if not dag_id: - resp_data, resp_status = bad_param_response(req_params) + resp_data, resp_status = bad_param_response(list(req_params)) else: # TODO: we can add some additional run params that airflow From dcad8ecb72e9fa433ece5c93155921a79d950e8f Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Thu, 14 May 2026 13:05:50 -0400 Subject: [PATCH 26/53] removed range key for dap registry (it's now by id). added secondary index to dap registry for name/version queries (version range key moved here). changed marshalling for pipelineRunnable to bool instead of string --- capeinfra/pipeline/registry.py | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/capeinfra/pipeline/registry.py b/capeinfra/pipeline/registry.py index d5eebb6..a07d366 100644 --- a/capeinfra/pipeline/registry.py +++ b/capeinfra/pipeline/registry.py @@ -107,8 +107,7 @@ def create_dap_registry_table(self): # probably be much cheaper to go that route if we have a # really solid idea of how many reads/writes this table needs billing_mode="PAY_PER_REQUEST", - hash_key="pipeline_name", - range_key="version", + hash_key="pipeline_id", attributes=[ # NOTE: we do not need to define any part of the "schema" here # that isn't needed in an index. @@ -125,6 +124,25 @@ def create_dap_registry_table(self): "type": "S", }, ], + global_secondary_indexes=[ + aws.dynamodb.TableGlobalSecondaryIndexArgs( + name="PipelineNameVerIndex", + key_schemas=[ + aws.dynamodb.TableGlobalSecondaryIndexKeySchemaArgs( + attribute_name="pipeline_name", + key_type="HASH", + ), + aws.dynamodb.TableGlobalSecondaryIndexKeySchemaArgs( + attribute_name="version", + key_type="RANGE", + ), + ], + projection_type="ALL", + non_key_attributes=[], + read_capacity=0, + write_capacity=0, + ) + ], opts=ResourceOptions(parent=self), tags={ "desc_name": ( @@ -191,15 +209,12 @@ def load_pipeline_assets(self): f"{self.name}-{stem}-ddbitem", table_name=self.analysis_pipeline_registry_ddb_table.name, hash_key=self.analysis_pipeline_registry_ddb_table.hash_key, - range_key=self.analysis_pipeline_registry_ddb_table.range_key.apply( - lambda rk: f"{rk}" - ), item=Output.json_dumps( { "pipeline_name": {"S": profile["pipelineName"]}, "pipeline_id": {"S": profile["pipelineId"]}, "pipeline_runnable": { - "S": profile["pipelineRunnable"] + "BOOL": profile["pipelineRunnable"] }, "version": {"S": profile["version"]}, "project": {"S": profile["project"]}, From 2c308fe926bae18cbb7f420d07a43bdcd4f654ed Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Thu, 14 May 2026 13:19:30 -0400 Subject: [PATCH 27/53] added vpc config and vpc access role for lambdas. they need to be in (and have access to) the same vpc as the mwaa environment to invoke it's api. --- capeinfra/resources/api.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/capeinfra/resources/api.py b/capeinfra/resources/api.py index f643d43..65b49d5 100644 --- a/capeinfra/resources/api.py +++ b/capeinfra/resources/api.py @@ -10,7 +10,7 @@ from collections.abc import Mapping import pulumi_aws as aws -from pulumi import AssetArchive, FileAsset, Output, ResourceOptions, log +from pulumi import AssetArchive, FileAsset, Output, ResourceOptions import capeinfra from capeinfra.iam import ( @@ -47,6 +47,11 @@ def __init__( vpc_endpoint: aws.ec2.VpcEndpoint, domain_name: Output, *args, + # TODO: the vpc config was added hastily here as we need the + # lambdas to be deployed in the vpc to have access to MWAA. + # may not need changing, but the design should be thought more + # about here. + lambda_vpc_cfg: aws.lambda_.FunctionVpcConfigArgs | None = None, **kwargs, ): """Constructor. @@ -74,6 +79,8 @@ def __init__( requests will pass. domain_name: The domain name (e.g. api.cape-dev.org) on which this API will reside. + lambda_vpc_cfg: If specified, the vpc config for lambda endpoint + functions. authorizer_path: Optional path to the source file for a lambda authorizer for the API. If not provided, no authorizer will be configured for the API. @@ -87,6 +94,7 @@ def __init__( self.spec_path = spec_path self.api_vpcendpoint = vpc_endpoint self.domain_name = domain_name + self.lambda_vpc_cfg = lambda_vpc_cfg # this will map the ids (string ids) from the config to a tuple of # (function name, Lambda Function Resource) so we can fill in the @@ -180,7 +188,7 @@ def _create_api_ep_lambdas( grants=legacy_res_grants, ).apply(lambda kwargs: get_api_statements(**kwargs)) ] - + [policy_statements], + + policy_statements ) self._api_lambda_role = get_inline_role( @@ -189,7 +197,13 @@ def _create_api_ep_lambdas( "lmbd", "lambda.amazonaws.com", all_policy_statements, - "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole", + [ + "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole", + # TODO: This is needed to make use of the endpoints requiring vpc + # access to things like MWAA. we probably want to limit it + # to those lambdas longer term + "arn:aws:iam::aws:policy/service-role/AWSLambdaVPCAccessExecutionRole", + ], ) # make functions from the configuration and save the mapping of the @@ -238,6 +252,7 @@ def _create_api_ep_lambdas( ), memory_size=funct_args.get("memory_size", 128), timeout=funct_args.get("timeout", 3), + vpc_config=self.lambda_vpc_cfg, ) # update our mapping of function ids from the config to the name and From f2349063c9cab5cec3480e911bec830ecb743141 Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Thu, 14 May 2026 14:44:54 -0400 Subject: [PATCH 28/53] added (untested) endpoints for getting a workflow run, workflow tasks, and a workflow run's task instances --- assets/api/capi/capi-openapi-301.yaml.j2 | 322 +++++++++++++++++- assets/api/capi/handlers/get_workflow_run.py | 93 +++++ .../get_workflow_run_task_instances.py | 93 +++++ .../api/capi/handlers/get_workflow_tasks.py | 92 +++++ 4 files changed, 595 insertions(+), 5 deletions(-) create mode 100644 assets/api/capi/handlers/get_workflow_run.py create mode 100644 assets/api/capi/handlers/get_workflow_run_task_instances.py create mode 100644 assets/api/capi/handlers/get_workflow_tasks.py diff --git a/assets/api/capi/capi-openapi-301.yaml.j2 b/assets/api/capi/capi-openapi-301.yaml.j2 index b8aab7f..fa80c03 100644 --- a/assets/api/capi/capi-openapi-301.yaml.j2 +++ b/assets/api/capi/capi-openapi-301.yaml.j2 @@ -1602,11 +1602,9 @@ paths: v2 API docs for PATCH /api/v2/dags/{dag_id}/dagRuns/{dag_run_id} (200 response). - items: - type: object - # this is not our format to define, so - # we'll just say it's any object format - additionalProperties: true + # this is not our format to define, so + # we'll just say it's any object format + additionalProperties: true "400": description: Airflow Bad Params (400) content: @@ -1699,6 +1697,320 @@ paths: https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/utils/state/index.html#airflow.utils.state.TaskInstanceState) #} + /workflows/run: + get: + parameters: + - in: query + name: dagId + schema: + type: string + description: + The dag id for a workflow run being fetched + required: true + - in: query + name: dagRunId + schema: + type: string + description: + The dag run id for a workflow run being fetched + required: true + responses: + "200": +{# + TODO: need the response headers (i.e. cors) +#} + description: "Success" + content: + application/json: + schema: + type: object + description: + An object describing the workflow run. Format + described in the Airflow REST v2 API docs + for GET + /api/v2/dag/{dag_id}/dagRuns/{dag_run_id} + # this is not our format to define, so + # we'll just say it's any object format + additionalProperties: true + "400": + description: Airflow Bad Params (400) + content: + application/json: + schema: + oneOf: + - $ref: "#/components/responses/AirflowErrorDetailStrResp" + - $ref: "#/components/responses/AirflowErrorDetailObjResp" + "401": + description: Airflow Unauthorized (401) + content: + application/json: + schema: + oneOf: + - $ref: "#/components/responses/AirflowErrorDetailStrResp" + - $ref: "#/components/responses/AirflowErrorDetailObjResp" + "403": + description: Airflow Forbidden (403) + content: + application/json: + schema: + oneOf: + - $ref: "#/components/responses/AirflowErrorDetailStrResp" + - $ref: "#/components/responses/AirflowErrorDetailObjResp" + "404": + description: Airflow Not Found (404) + content: + application/json: + schema: + oneOf: + - $ref: "#/components/responses/AirflowErrorDetailStrResp" + - $ref: "#/components/responses/AirflowErrorDetailObjResp" + "422": + description: Unprocessable Content/Validation Error (422) + content: + application/json: + schema: + $ref: "#/components/responses/AirflowUnprocessableContentError" + "500": + description: + Server error while fetching workflow run details. + x-amazon-apigateway-integration: + httpMethod: "POST" + uri: "arn:aws:apigateway:${AWS::Region}:lambda:path/2015-03-31/functions/{{ handlers['get_workflow_run_handler'] }}/invocations" + passthroughBehavior: "when_no_match" + timeoutInMillis: 29000 + type: "aws_proxy" + options: + responses: + "200": + $ref: "#/components/responses/200OptionsCors" + x-amazon-apigateway-integration: + responses: + default: + statusCode: "200" + responseParameters: + method.response.header.Access-Control-Allow-Methods: "'OPTIONS,GET'" + method.response.header.Access-Control-Allow-Headers: "'Content-Type,X-Amz-Date,Authorization,X-Api-Key,X-Amz-Security-Token'" + method.response.header.Access-Control-Allow-Origin: "'*'" + requestTemplates: + application/json: "{'statusCode':200}" + passthroughBehavior: "when_no_match" + timeoutInMillis: 29000 + type: "mock" + /workflows/tasks: + get: + parameters: + - in: query + name: dagId + schema: + type: string + description: + The dag id to get the tasks for + required: true + responses: + "200": +{# + TODO: need the response headers (i.e. cors) +#} + description: "Success" + content: + application/json: + schema: + type: object + description: an object containing information on all tasks in the DAG + properties: + tasks: + type: array + description: An array of objects describing the tasks of the DAG + items: + type: object + description: + An object describing one task. Format + described in the Airflow REST v2 API docs + for GET + /api/v2/dags/{dag_id}/tasks + # this is not our format to define, so + # we'll just say it's any object format + additionalProperties: true + total_entries: + type: integer + description: the number of items in the tasks array + "400": + description: Airflow Bad Params (400) + content: + application/json: + schema: + oneOf: + - $ref: "#/components/responses/AirflowErrorDetailStrResp" + - $ref: "#/components/responses/AirflowErrorDetailObjResp" + "401": + description: Airflow Unauthorized (401) + content: + application/json: + schema: + oneOf: + - $ref: "#/components/responses/AirflowErrorDetailStrResp" + - $ref: "#/components/responses/AirflowErrorDetailObjResp" + "403": + description: Airflow Forbidden (403) + content: + application/json: + schema: + oneOf: + - $ref: "#/components/responses/AirflowErrorDetailStrResp" + - $ref: "#/components/responses/AirflowErrorDetailObjResp" + "404": + description: Airflow Not Found (404) + content: + application/json: + schema: + oneOf: + - $ref: "#/components/responses/AirflowErrorDetailStrResp" + - $ref: "#/components/responses/AirflowErrorDetailObjResp" + "422": + description: Unprocessable Content/Validation Error (422) + content: + application/json: + schema: + $ref: "#/components/responses/AirflowUnprocessableContentError" + "500": + description: + Server error while fetching workflow tasks. + x-amazon-apigateway-integration: + httpMethod: "POST" + uri: "arn:aws:apigateway:${AWS::Region}:lambda:path/2015-03-31/functions/{{ handlers['get_workflow_tasks_handler'] }}/invocations" + passthroughBehavior: "when_no_match" + timeoutInMillis: 29000 + type: "aws_proxy" + options: + responses: + "200": + $ref: "#/components/responses/200OptionsCors" + x-amazon-apigateway-integration: + responses: + default: + statusCode: "200" + responseParameters: + method.response.header.Access-Control-Allow-Methods: "'OPTIONS,GET'" + method.response.header.Access-Control-Allow-Headers: "'Content-Type,X-Amz-Date,Authorization,X-Api-Key,X-Amz-Security-Token'" + method.response.header.Access-Control-Allow-Origin: "'*'" + requestTemplates: + application/json: "{'statusCode':200}" + passthroughBehavior: "when_no_match" + timeoutInMillis: 29000 + type: "mock" + /workflows/taskinstances: + get: + parameters: + - in: query + name: dagId + schema: + type: string + description: + The dag id to get the task instaces for + required: true + - in: query + name: dagRunId + schema: + type: string + description: + The dag run id to get the task instances for + required: true + responses: + "200": +{# + TODO: need the response headers (i.e. cors) +#} + description: "Success" + content: + application/json: + schema: + type: object + description: + An object containing information on all + task instances in the DAG run + properties: + tasks_instances: + type: array + description: + An array of objects describing the + task instances of the DAG + items: + type: object + description: + An object describing one task instance. Format + described in the Airflow REST v2 API docs + for GET + /api/v2/dags/{dag_id}/dagRuns/{dag_run_id}/taskInstances + # this is not our format to define, so + # we'll just say it's any object format + additionalProperties: true + total_entries: + type: integer + description: the number of items in the task_instances array + "400": + description: Airflow Bad Params (400) + content: + application/json: + schema: + oneOf: + - $ref: "#/components/responses/AirflowErrorDetailStrResp" + - $ref: "#/components/responses/AirflowErrorDetailObjResp" + "401": + description: Airflow Unauthorized (401) + content: + application/json: + schema: + oneOf: + - $ref: "#/components/responses/AirflowErrorDetailStrResp" + - $ref: "#/components/responses/AirflowErrorDetailObjResp" + "403": + description: Airflow Forbidden (403) + content: + application/json: + schema: + oneOf: + - $ref: "#/components/responses/AirflowErrorDetailStrResp" + - $ref: "#/components/responses/AirflowErrorDetailObjResp" + "404": + description: Airflow Not Found (404) + content: + application/json: + schema: + oneOf: + - $ref: "#/components/responses/AirflowErrorDetailStrResp" + - $ref: "#/components/responses/AirflowErrorDetailObjResp" + "422": + description: Unprocessable Content/Validation Error (422) + content: + application/json: + schema: + $ref: "#/components/responses/AirflowUnprocessableContentError" + "500": + description: + Server error while fetching workflow tasks. + x-amazon-apigateway-integration: + httpMethod: "POST" + uri: "arn:aws:apigateway:${AWS::Region}:lambda:path/2015-03-31/functions/{{ handlers['get_workflow_run_task_insatnces_handler'] }}/invocations" + passthroughBehavior: "when_no_match" + timeoutInMillis: 29000 + type: "aws_proxy" + options: + responses: + "200": + $ref: "#/components/responses/200OptionsCors" + x-amazon-apigateway-integration: + responses: + default: + statusCode: "200" + responseParameters: + method.response.header.Access-Control-Allow-Methods: "'OPTIONS,GET'" + method.response.header.Access-Control-Allow-Headers: "'Content-Type,X-Amz-Date,Authorization,X-Api-Key,X-Amz-Security-Token'" + method.response.header.Access-Control-Allow-Origin: "'*'" + requestTemplates: + application/json: "{'statusCode':200}" + passthroughBehavior: "when_no_match" + timeoutInMillis: 29000 + type: "mock" # reusable components that follow OpenApi 3.0.1 spec components: responses: diff --git a/assets/api/capi/handlers/get_workflow_run.py b/assets/api/capi/handlers/get_workflow_run.py new file mode 100644 index 0000000..1f90140 --- /dev/null +++ b/assets/api/capi/handlers/get_workflow_run.py @@ -0,0 +1,93 @@ +"""Lambda function for handling a get of an airflow workflow run.""" + +import json +import os + +import boto3 +from botocore.exceptions import ClientError +from capepy.aws.utils import bad_param_response, decode_error + + +def index_handler(event, context): + """Handler for the GET of one or all airflow workflow runs of a DAG. + + This endpoint is a proxy to the airflow + /api/v2/dags/{dag_id}/dagRuns/{dag_run_id} endpoint. Done as a + lambda instead of direct integration so we can massage data as required. + + This endpoint does not return any CAPE specific data such as the pipeline + profiles of the pipelines in the workflows. That is a separate API call. + + :param event: The event object that contains the HTTP request and json + data. + :param context: Context object. + """ + + env_name = os.getenv("MWAA_ENVIRONMENT") + + # TODO: add this to capepy + mwaa_client = boto3.client("mwaa") + + req_params = {"dagId", "dagRunId"} + + try: + qsp = event.get("queryStringParameters") + + if qsp is None: + resp_data, resp_status = bad_param_response(list(req_params)) + else: + dag_id = qsp.get("dagId") + dag_run_id = qsp.get("dagRunId") + + if None in [dag_id, dag_run_id]: + resp_data, resp_status = bad_param_response(list(req_params)) + + else: + api_path = f"/dags/{dag_id}/dagRuns/{dag_run_id}" + + request_params = { + "Name": env_name, + "Path": api_path, + "Method": "GET", + } + + response = mwaa_client.invoke_rest_api(**request_params) + resp_data = response["RestApiResponse"] + resp_status = response["RestApiStatusCode"] + + # no matter the status code of the response we can return the same + # thing. the difference in 200 vs non-200 is that the json will contain + # an error string under the key "detail" instead of workflow data in + # the non-200 case + return { + "statusCode": resp_status, + "headers": { + "Content-Type": "application/json", + # TODO: ISSUE #141 CORS bypass. We do not want this long term. + # When we get all the api and web resources on the same + # domain, this may not matter too much. But we may + # eventually end up with needing to handle requests from + # one domain served up by another domain in a lambda + # handler. In that case we'd need to be able to handle + # CORS, and would want to look into allowing + # configuration of the lambda (via pulumi config that + # turns into env vars for the lambda) that set the + # origins allowed for CORS. + "Access-Control-Allow-Headers": "Content-Type", + "Access-Control-Allow-Origin": "*", + "Access-Control-Allow-Methods": "OPTIONS,GET", + }, + "body": json.dumps(resp_data), + } + except ClientError as err: + code, message = decode_error(err) + + msg = ( + f"Error during fetch of workflow run data from airflow. " + f"{code} {message}" + ) + + return { + "statusCode": 500, + "body": msg, + } diff --git a/assets/api/capi/handlers/get_workflow_run_task_instances.py b/assets/api/capi/handlers/get_workflow_run_task_instances.py new file mode 100644 index 0000000..f2f9faf --- /dev/null +++ b/assets/api/capi/handlers/get_workflow_run_task_instances.py @@ -0,0 +1,93 @@ +"""Lambda function for handling a get of an airflow workflow run.""" + +import json +import os + +import boto3 +from botocore.exceptions import ClientError +from capepy.aws.utils import bad_param_response, decode_error + + +def index_handler(event, context): + """Handler for the GET of all task instances for an airflow workflow runs. + + This endpoint is a proxy to the airflow + /api/v2/dags/{dag_id}/dagRuns/{dag_run_id}/taskInstances endpoint. Done as + a lambda instead of direct integration so we can massage data as required. + + This endpoint does not return any CAPE specific data such as the pipeline + profiles of the pipelines in the workflows. That is a separate API call. + + :param event: The event object that contains the HTTP request and json + data. + :param context: Context object. + """ + + env_name = os.getenv("MWAA_ENVIRONMENT") + + # TODO: add this to capepy + mwaa_client = boto3.client("mwaa") + + req_params = {"dagId", "dagRunId"} + + try: + qsp = event.get("queryStringParameters") + + if qsp is None: + resp_data, resp_status = bad_param_response(list(req_params)) + else: + dag_id = qsp.get("dagId") + dag_run_id = qsp.get("dagRunId") + + if None in [dag_id, dag_run_id]: + resp_data, resp_status = bad_param_response(list(req_params)) + + else: + api_path = f"/dags/{dag_id}/dagRuns/{dag_run_id}/taskInstances" + + request_params = { + "Name": env_name, + "Path": api_path, + "Method": "GET", + } + + response = mwaa_client.invoke_rest_api(**request_params) + resp_data = response["RestApiResponse"] + resp_status = response["RestApiStatusCode"] + + # no matter the status code of the response we can return the same + # thing. the difference in 200 vs non-200 is that the json will contain + # an error string under the key "detail" instead of workflow data in + # the non-200 case + return { + "statusCode": resp_status, + "headers": { + "Content-Type": "application/json", + # TODO: ISSUE #141 CORS bypass. We do not want this long term. + # When we get all the api and web resources on the same + # domain, this may not matter too much. But we may + # eventually end up with needing to handle requests from + # one domain served up by another domain in a lambda + # handler. In that case we'd need to be able to handle + # CORS, and would want to look into allowing + # configuration of the lambda (via pulumi config that + # turns into env vars for the lambda) that set the + # origins allowed for CORS. + "Access-Control-Allow-Headers": "Content-Type", + "Access-Control-Allow-Origin": "*", + "Access-Control-Allow-Methods": "OPTIONS,GET", + }, + "body": json.dumps(resp_data), + } + except ClientError as err: + code, message = decode_error(err) + + msg = ( + f"Error during fetch of workflow run data from airflow. " + f"{code} {message}" + ) + + return { + "statusCode": 500, + "body": msg, + } diff --git a/assets/api/capi/handlers/get_workflow_tasks.py b/assets/api/capi/handlers/get_workflow_tasks.py new file mode 100644 index 0000000..bb3f146 --- /dev/null +++ b/assets/api/capi/handlers/get_workflow_tasks.py @@ -0,0 +1,92 @@ +"""Lambda function for handling a get of an airflow workflow run.""" + +import json +import os + +import boto3 +from botocore.exceptions import ClientError +from capepy.aws.utils import bad_param_response, decode_error + + +def index_handler(event, context): + """Handler for the GET all tasks for an airflow workflow DAG. + + This endpoint is a proxy to the airflow /api/v2/dags/{dag_id}/tasks + endpoints. Done as a lambda instead of direct integration so we can + massage data as required. + + This endpoint does not return any CAPE specific data such as the pipeline + profiles of the pipelines in the workflows. That is a separate API call. + + :param event: The event object that contains the HTTP request and json + data. + :param context: Context object. + """ + + env_name = os.getenv("MWAA_ENVIRONMENT") + + # TODO: add this to capepy + mwaa_client = boto3.client("mwaa") + + req_params = {"dagId"} + + try: + qsp = event.get("queryStringParameters") + + if qsp is None: + resp_data, resp_status = bad_param_response(list(req_params)) + else: + dag_id = qsp.get("dagId") + + if dag_id is None: + resp_data, resp_status = bad_param_response(list(req_params)) + + else: + api_path = f"/dags/{dag_id}/dagRuns/tasks" + + request_params = { + "Name": env_name, + "Path": api_path, + "Method": "GET", + } + + response = mwaa_client.invoke_rest_api(**request_params) + resp_data = response["RestApiResponse"] + resp_status = response["RestApiStatusCode"] + + # no matter the status code of the response we can return the same + # thing. the difference in 200 vs non-200 is that the json will contain + # an error string under the key "detail" instead of workflow data in + # the non-200 case + return { + "statusCode": resp_status, + "headers": { + "Content-Type": "application/json", + # TODO: ISSUE #141 CORS bypass. We do not want this long term. + # When we get all the api and web resources on the same + # domain, this may not matter too much. But we may + # eventually end up with needing to handle requests from + # one domain served up by another domain in a lambda + # handler. In that case we'd need to be able to handle + # CORS, and would want to look into allowing + # configuration of the lambda (via pulumi config that + # turns into env vars for the lambda) that set the + # origins allowed for CORS. + "Access-Control-Allow-Headers": "Content-Type", + "Access-Control-Allow-Origin": "*", + "Access-Control-Allow-Methods": "OPTIONS,GET", + }, + "body": json.dumps(resp_data), + } + except ClientError as err: + code, message = decode_error(err) + + msg = ( + f"Error during fetch of workflow run data from airflow. " + f"{code} {message}" + ) + + return { + "statusCode": 500, + "body": msg, + } From 4b086839346d4f5cc51f70d866b47eb80dfb1957 Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Thu, 14 May 2026 14:45:22 -0400 Subject: [PATCH 29/53] added config for new workflow endpoints --- Pulumi.cape-cod-dev.yaml | 41 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/Pulumi.cape-cod-dev.yaml b/Pulumi.cape-cod-dev.yaml index 7a7371a..3b55227 100644 --- a/Pulumi.cape-cod-dev.yaml +++ b/Pulumi.cape-cod-dev.yaml @@ -822,7 +822,46 @@ config: description: "patchdagrun Lambda Function" memory_size: 128 timeout: 3 - # TODO: remaining airflow handlers + - id: "get_workflow_run_handler" + name: "getdagrun" + code: "assets/api/capi/handlers/get_workflow_run.py" + layers: + - capi-all + funct_args: + handler: "index.index_handler" + runtime: "python3.10" + architectures: + - "x86_64" + description: "getdags Lambda Function" + memory_size: 128 + timeout: 3 + - id: "get_workflow_tasks_handler" + name: "getdagtasks" + code: "assets/api/capi/handlers/get_workflow_tasks.py" + layers: + - capi-all + funct_args: + handler: "index.index_handler" + runtime: "python3.10" + architectures: + - "x86_64" + description: "getdagtasks Lambda Function" + memory_size: 128 + timeout: 3 + - id: "get_workflow_run_task_instances_handler" + name: "getdagruntaskinsts" + code: "assets/api/capi/handlers/get_workflow_run_task_instances.py" + layers: + - capi-all + funct_args: + handler: "index.index_handler" + runtime: "python3.10" + architectures: + - "x86_64" + description: + "getdagruntaskinsts Lambda Function" + memory_size: 128 + timeout: 3 - id: "get_daps_handler" name: "getdaps" code: "assets/api/capi/handlers/get_daps.py" From 5a954d9dc3737137e0ee22220bfb7d21765a74c7 Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Thu, 14 May 2026 14:48:28 -0400 Subject: [PATCH 30/53] wired up a vpc endpoint for the airflow env (so we can hit the api in the vpc). added pulumi call to get the aws account id --- capeinfra/swimlanes/private.py | 61 ++++++++++++++++++++++++++++++---- 1 file changed, 54 insertions(+), 7 deletions(-) diff --git a/capeinfra/swimlanes/private.py b/capeinfra/swimlanes/private.py index 2f3d0a2..5e663c4 100644 --- a/capeinfra/swimlanes/private.py +++ b/capeinfra/swimlanes/private.py @@ -101,6 +101,7 @@ def __init__(self, name, *args, **kwargs): aws_config = Config("aws") self.aws_region = aws_config.require("region") + self.aws_account_id = aws.get_caller_identity().account_id self.mwaa_compute_environment = None @@ -231,17 +232,26 @@ def _deploy_api(self, api_name): policy_statements = [] policy_statements.append( - self.mwaa_compute_environment.mwaa_environment.arn.apply( + Output.all( + arn=self.mwaa_compute_environment.mwaa_environment.arn, + name=self.mwaa_compute_environment.mwaa_environment.name, + ).apply( # adding invoke rest api perms for the `Op` default role in # airflow. Need Op as we will be configuring runs in # addition to triggering (if no config, we'd be able to use # User role) - lambda arn: add_resources( + lambda kwargs: add_resources( self.mwaa_compute_environment.policies[ MwaaEnvironment.PolicyEnum.invoke_api ], - f"{arn}/Op", - arn, + # TODO: this isn't the arn of the environment or of the + # execution role. rather it seems to be the arn of + # the Op role in for airflow. anyway, if not specified + # like this (previously was using the env arn with + # `/Op` at the end) it fails. Need a good way to + # construct this + f"arn:aws:airflow:{self.aws_region}:{self.aws_account_id}:role/{kwargs['name']}/Op", + f"{kwargs['arn']}", ) ) ) @@ -259,6 +269,17 @@ def _deploy_api(self, api_name): config=self.apis[api_name]["spec"], desc_name=f"{self.apis[api_name]['spec']['desc']}", opts=ResourceOptions(parent=self), + lambda_vpc_cfg=aws.lambda_.FunctionVpcConfigArgs( + # vpc_id=self.vpc.id, + # TODO: hijacking the sec group here, need to either make this + # one a default for a lot of things or create a new one + security_group_ids=[ + self.mwaa_compute_environment.security_group.id + ], + subnet_ids=[ + sn.id for sn in self.get_subnets_by_type("compute").values() + ], + ), ) def create_airflow_compute_environment(self): @@ -295,7 +316,8 @@ def create_airflow_compute_environment(self): subnets=env_subnets, ingress_subnets=ingress_subnets, config=mwaa_cfg, - aws_region=capeinfra.data_lakehouse.aws_region, + aws_region=self.aws_region, + aws_account_id=self.aws_account_id, # TODO: add policy attachments extra_policy_statements=None, ) @@ -314,7 +336,7 @@ def create_airflow_compute_environment(self): self._exposed_env_vars.setdefault( "MWAA_ENVIRONMENT", { - "resource_name": self.mwaa_compute_environment.name, + "resource_name": self.mwaa_compute_environment.mwaa_environment.name, # TODO: this is just to keep the same interface we were # using for exposed env vars and res grants before # moving to resource provided policies. We should @@ -325,6 +347,29 @@ def create_airflow_compute_environment(self): }, ) + # need this VPCE in order to be able to hit the airflow rest api + self.mwaa_env_vpcendpoint = aws.ec2.VpcEndpoint( + f"{self.basename}-{name}-mwaa-vpce", + vpc_id=self.vpc.id, + service_name=f"com.amazonaws.{aws.get_region().region}.airflow.env", + vpc_endpoint_type="Interface", + private_dns_enabled=True, + subnet_ids=[ + s.id + for _, s in self.get_subnets_by_type( + SubnetType.COMPUTE + ).items() + ], + # TODO: hijacking the sec group here, need to either make this + # one a default for a lot of things or create a new one + security_group_ids=[ + self.mwaa_compute_environment.security_group.id + ], + tags={ + "desc_name": f"{self.desc_name} MWAA Environment endpoint", + }, + ) + def create_env_rds_instance(self): """Creates the CAPE environment RDS instance.""" @@ -403,7 +448,9 @@ def create_workflow_meta_registry(self): self._exposed_env_vars.setdefault( "WORKFLOW_REG_DDB_TABLE", { - "resource_name": self.workflow_meta_registry.workflow_meta_ddb_table.name, + "resource_name": ( + self.workflow_meta_registry.workflow_meta_ddb_table.ddb_table.name + ), "type": "table", }, ) From 857d66d36b634a4f23604e3c3d86892079d79bc2 Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Thu, 14 May 2026 14:49:08 -0400 Subject: [PATCH 31/53] got away from hard coded aws account number --- capeinfra/pipeline/airflow.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/capeinfra/pipeline/airflow.py b/capeinfra/pipeline/airflow.py index 2f782d4..c2b6b08 100644 --- a/capeinfra/pipeline/airflow.py +++ b/capeinfra/pipeline/airflow.py @@ -55,7 +55,7 @@ def base_role_policy_statements( "effect": "Allow", "actions": "airflow:PublishMetrics", "resources": [ - f"arn:aws:airflow:{self.aws_region}:767397883306:environment/{self.mwaa_env_name}" + f"arn:aws:airflow:{self.aws_region}:{self.aws_account_id}:environment/{self.mwaa_env_name}" ], }, { @@ -70,7 +70,7 @@ def base_role_policy_statements( "logs:GetQueryResults", ], "resources": [ - f"arn:aws:logs:{self.aws_region}:767397883306:log-group:airflow-{self.mwaa_env_name}-*" + f"arn:aws:logs:{self.aws_region}:{self.aws_account_id}:log-group:airflow-{self.mwaa_env_name}-*" ], }, { @@ -113,7 +113,7 @@ def base_role_policy_statements( ], # our mwaa env uses an aws-managed set of keys, so we # have to explicitly deny our own account's keyspace - "not_resources": "arn:aws:kms:*:767397883306:key/*", + "not_resources": f"arn:aws:kms:*:{self.aws_account_id}:key/*", "conditions": [ { "test": "StringLike", @@ -132,6 +132,7 @@ def __init__( subnets: dict[str, aws.ec2.Subnet], ingress_subnets: dict[str, aws.ec2.Subnet], aws_region: str, + aws_account_id: str, extra_policy_statements: ( list[aws.iam.GetPolicyDocumentStatementArgsDict] | None ) = None, @@ -156,6 +157,7 @@ def __init__( self.name = f"{name}" self.mwaa_env_name = f"{self.name}-env" self.aws_region = aws_region + self.aws_account_id = aws_account_id # get in a var so we aren't typing super long lines anywhere ma_bucket = capeinfra.meta.automation_assets_bucket From fd3b8675116a24dff0567a7bc8dc374158fa0245 Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Fri, 15 May 2026 10:56:06 -0400 Subject: [PATCH 32/53] update capepy wheel --- ...-any.whl => capepy-3.0.0-py3-none-any.whl} | Bin 11581 -> 12108 bytes 1 file changed, 0 insertions(+), 0 deletions(-) rename assets/lambda-layers/capepy/{capepy-2.3.0-py3-none-any.whl => capepy-3.0.0-py3-none-any.whl} (62%) diff --git a/assets/lambda-layers/capepy/capepy-2.3.0-py3-none-any.whl b/assets/lambda-layers/capepy/capepy-3.0.0-py3-none-any.whl similarity index 62% rename from assets/lambda-layers/capepy/capepy-2.3.0-py3-none-any.whl rename to assets/lambda-layers/capepy/capepy-3.0.0-py3-none-any.whl index 48fd793ca8d3e3b8daa038ffef5fe2db49ebcafc..fee97c4843b185e5a1e1c02440fce81b83ca57b2 100644 GIT binary patch delta 3642 zcmZ9PWmMFQ7R5($Kstt@yIYWM7(`TB8l*)!1SRAz3^g>23_UQw9lE5Ekd}@?Qlv$s zr6exCd*7$$J73OPzrEJ^wD(@?bXbnsrUa7meW+@t0fRtacv9c80+x@wm*l8Ax6P=2 zI5^^mi**nlb(bx3Ey)O4H!G7iXeoL)P4SlTRHy##l;VMPuzq+#DpIS`@?LBF@VO&m^g@X+f(KGg*(BkEDfCm_?ps3^=SuAQps`c!jHA?vXeO28D!F#Dj&}Ytw_j2qgC)Ue+z`CB!abhtR-3LrQoZ{nP9^I*QLks(1P-hH(;}#J5A~0ntmOzRjPZ+Zjr}PL*mPEX3V#t&({QM_M|m|+c3Tzwz^kpfd}2wR1@S>FsXyz-CNJ+~>HVNi1bT^);WM8JVO3 z3b+#~A49q|{84mi_EJ?i7A6(3E7F}35-VG(1PQ$clid^bUn}Ggv!i+cg{yk|C5*#7 zWu%gmwx7}ez%|=UM!zZD?iG$*T0^KtYv9TP+_$2(<_MFlT@S4+KqN{SB`OOv&m-cA zyLl@qgo1(@jhduZdPsf9%4ZSyLe$SQ7289hdXMufQr4c2hy^Qw}Nyum17T;=xu zASgS+eswltpc$;9Yb4O>^s!!UjFwhP;&sQA)itOtmE=8Ep_6eop2`oL(>5lmh0ZZ} z=ui6>2PB_otxiOGypcK#aoO5~wI-yNXmn&^eM&p4=CSdJ{92Iv6$JYI;L46 z@4#Bawv(hqbDqD`UT`9l)qm{O7IZcj{excu*t};*-6OfE|MWra}a7Pa8buaCK2{*ABMbpw})$;Z?MY!KaWU*2zQ?rGA+lZ&c zjXNZJcJB@dMIT%&5cyW2%d<%%h8N0%k}BBJ-0-Vo)Enlrx5c>o-^_{J=x3ZHNncL& z#(R)eHyLt)l_MhM>D0#p9WMCFT#Nv8&%qNt`10J$66*N$3t-r3e*~^8iNfs52L`ru zt-UpsvidkQLO~hWQzpw)p1>WJ3mRYK)YxN%cnpS{s({_rNTNBfB}6q1Qq{Lvv zo*NV89}l~#xSESFJGyle`O+N=Whbh`o?@2!+^7fuJRYB};hmhBO zORc}H1~F9|OJ{q3FHz2|pfLa)qUXGuHUk-1O~F+T5F-@b+4amSFhesm_Xj0DTOCUq zS6OnDb;w2M8|SI)a>(WZ94q~W_c}Wy_HKn379SpmYqjBfMJR#RJQZzzNB?*J?ska2+?YhXbAdgYoS08S@`U zpL}5fR%(CEu1>%YKoM(;fC1|-#Go8i>patl=}nSzly4ZM;VO6q(Xi8aSLha>-<^}P zQf{y0+j^bTI`hLrW4`?K#KWU~B<;<%K&wVo-RG;E_R5FynK|Ar8{X`8u_5+h=cSjt zNeT~^q~BztVF~DX%=wz{Sa~g`%Zzm-om2|wEZr{XXBsG_PwTl!CC0X?ege;&=R6jG zu{?&0=RZT^+bDz~K;QG@m2BChOS3Xr?u78S$mg%^Z>)OJUC+h-5a?6n2JIh|FXFqR z>YN6Sy3~|p*Z)er%I^vF=vfmvyaVj+yBzQAl5bR0><}bD^Q)hqFzk^pyM-Z`W&zF& zuImks;FcEFH*v>8*;;j!Nkwr|>)!`f6G4T)+zW=g>M=wcG7CBGL*9zi^vdWtZ6v~=U^{2zvJ(eFOzPvfnZ|)!2tzbWl^m`$p-GSpFpj95- zK3c-*MqaL-n`{FYhCE8Gtn3n#l4roy?rQd1n+z*JYl+8$s4`VXbs3!_es4lNC@5@b z?2>n7awM;<#GG)yKH23FziFLP$U%p-46e&M4qtWVz%IQFsw|cMTjjNZvB{5R%0pCz zU(oMc*vQ^Sy9l$d#Udnqci3Y;%X?&Moi{03lu|5oo)LHzt6A_}(b%ibt2Fld+t2H_ zV)X?9UA7PB9;dE}GWO`O8wSkL)qQsQVeV>n^>x=z^$x(tF-XdgJ(Crkj2j&8u>n}z;U^q=w*xvD?lZPa#}03SSA8Za>2gPkRJeQhhu282;7Tru=% ztA}WOav_;UUhyF39u3^u`n$i1*NMAnsiJo&E&h5z7fKu})5@k}+Cg>K_T#WNWSmp8 zY|Mq$iZ@NG+b8Viovxiu7kf;udV01S;XJZx$P@^k_Q&(<^UJ551^?t%j}M7BgU0uz zDAg)T>K^KOh1KBmORJfpbH@_!o0?i>&k#0FKXM$zgL3e0{T}jIaSXi+(G7ClfAAg^ zY4;%W0r7g4(t1aWrEk+}iAiIqp0Lat-@Pq7i1AN`*g=)Sj0u{q*+FV+ScV2nqEx(j z5k;@??HMR42^&pO)=56d1U117YnG(d_ z$TlFo+-fo0Yr$H~+4wwT)-%w?;zy`54GD7)|0zC&UA0i>xrtrU+L%HlVoRoU zP-^M^3F*>3>E>^wa&PHBM}-w#AM~By$OskQl!RS+=S#g*#WL?N->KXI+G$T7*M+4C>Z#+C>~CMQ)Kxg9{Qd`_8)}U8g~OY)@=qS8^W`d#0jG za$U^ne2YX~2`~_pkC3O;!tG$UPMXW*E5hn_)mED5H%Kla)c?F=$Q~gRylOBKDy)Ks zCP!A?Wk7lh3*nhjA#?sA*EGl_VP1j%FXDg9^xr8CheQ6TWdRXYib=*lP=JUMg&or$ ds7pkY;9qkhk3=98)Ixt~bfVluQX>Ce{|`)D&FKID delta 3107 zcmZ8jcRUo18$O&-I2m`GJwh_h9p#K{QfOEmvUM`b-s9}f+3rY2;mmAjlq9N)BZrdg z)!|DS5wd@Me}Db@z5l$Q_jx{__pj%9-uI*ZYnzOqv!`j82u3CVAVMfJS_orp>%D;F ziCVsc8oth0)n`1L5mJ-@9k|g}%V!vgsujoct-52aC*2?E9PO$01g)1!zP={=K)CDL z5C47YKD9BXUPaeK+@ijX;A{%fO}=HcM0zGRd0&Cc;6~rRRrC3ir93+z-UE! zpM#PqDQQ{s+mEoHaS82k*cXfjRnkz7hm$oIJkIL!oaf1BgO3N2&^V{`uEo$9q}W>x zabqzGF=J1|I`eAX26mz$0~Tr|H5p$=rcL#UUGG;Osd6n+7CT7GO#V4QsI2|uwYHZ+ zYaGpjnGF!W#PhN^=_!d^)t4JU5kE$xy%d1J5Z}%-g#L&z7*&o)Z@@gw>>Y#1J0@~f zu~{$Wwn#V|a{)Ex)mRR2V;&ga-|C^b5`!41(uaL2b&z1s?jqOG`+C={5yb z6~OLjBDLJ@b+F&R1D?lSC|3$WZFQ9yjh^#hPLDCFyFN#wGDFJu#d5I_Rj&7?I_NBi z-SZ;V0qIV;o)a%4tU8&0ocR=#rwP*eRfaX?)?f&qm?p91G5Sjm~y!ZJV*viQKV6ion2D zfW`_XqdC-vFg;*_C3}!dz27k#-igE$WD`|y5$H_(LEygZnDExxEldt)vJ`OHI#z5? zakgk{j24c;b%}mP@G?Pt#iYgh*cX-x6wUxy&7O?7mLOfI45^<#El^s4yG{pjhAfNO zMksaGnBM{yBIIP5tnQ|Lxzj2nX4Z1)f{JYt{&0pjcP)G2mu9J#VP~dn7o29|R{~!! zFq=R+I`zyI{Fw8{IA3h0jM~}1**%G|xROzxttyF8FG~`ST0GuM^hSN;M#~oL%LUG< z`92>|*=YR`Bz8x6{!K=ta`8_qor-bfN}pEUTB@SNM7T)lYSzjoh({l2;KRS|pq>1d znz$NIKl6<3C*{$z%SUv9JOa0)+1)iY-xLVD((!ERssy0}-V9te>h`Os zmfgILIj)YLrY6&>(DvizgWJSQG^bUK-pEvy_dwr5&PQ;#OLi#>s z>c6rbSTjam)bt^_Y4E6&u3&$3;8tcDR{J^ym)c+RdEGB$rUx~h?oP>Vh}<%FuyOP6 zyp6CNws;>02SFyIukILa|5HfplMoOPj*LKILM%XM(|3Cum(PBaFHp&83^`&=Q>%xi z5sZ1zH{j@K=}N4}s&?+IjxGvbY^0nZq^a*{sv=NR0l3@ggE%qiG%?Ahc(lD_mVb@d z#FgR<#x=EU*MpI9M-D0J@*mp1lIou+)O*hsiMz!-Z@5%aO~mF&Cr#cKym_a(m^y+H z_iR_Z)V2^>ZquS|rvA^{!K$>`fI&o|Lzam1PhWL##b!1^TV{F;ARGHLLi2DpDgs%w zVBqSgW(C?e;GcQN-3HI}qF^)!nH%&1opi2U<8}{RnvuZ0zziaol&V_Q;k)cYuoKgl z@0rlT3k#cR342nfTczr;j>x^67R;Ea1sMi?h1L&EV2tB6bO9VnqON*giMq7XvVHx8 zL0UE=+)8lQTxpEgq$A@O9KB^a{u|0cxUt!D+b_XJuX-oDqsB-zFQ1q6b8RtTcjn0J z_O^JM+SPpJ$fr-ChTh~`hfTJmKCk;5mUb`AoIeEbK$=nSE_)=DDr(*9u3N`2Kw~EC zDe=1Rhn0VytR1WR81*ExM4zbAd_zVI2OK|%trgB4oUn-m8wV3}mrkPt`a;}*uCD~L z!vUsTkO97v;q6fq@NEu`Q3zlDT?y03+mcwZZSef)Apz`6)1f9`(beUo=5fzf2T9vfeAuK@rHa$?-+L~ z2e=LdFSf&f>GkXI$|Ef8bOOybR_aR`$mF05{*~W{8xL_R_4S>v!F?^=e1XJkEa)(l z!u)biD$91aP$hK(k1+}fU>AW8$?P~RPR>n4M?SN~XirLcU9CyFtTkPb4?_&XLj9vC zJNuFj!~M1GL=@v1DIMY|ZPnNj!ryWC^5@yRI0miJ_50cFxf)83PZ43a%q>>msfdi` z`$g{cRHu#i+_P?7XljOC{Q7u&f^{|r-noJ@(l}K2EAH!}6Vr)_b{bfrxlD`G;o{E73QJ5! za-JQZtGYZf=hFLnTZ_P@5=ev7mx@f?1)nrx+}k$Yy8>7~I7$esGGcIte&Ldgvviv7g$$K22%W?GZv?>4T3- z{Kjn{A_ma-upHr@24glqU#H&v7lZZI#p|z&?b)!z6PwQ530jz_^^UGw2%Ug{HL{1T zEa;Wp4}uXFcAAdknwsyy}!th^EzM*10RmT0GLW@eyw(@ej1=w(xhHXvdCNR#!#A=i{Pqcsqy-}>I8Scgysb`3u`Q(ogtPYR9M zsQ2fJ7R?)dYsmo*g4r> zYrkfm!55_Wzeg7z*ITK5Df0OA{cU@R*G*q`P&Hg9?Qto=CzqoYwH3fyUKE;|CUTi4 z*PHVLDW!q7wZ+cMbC%i?6~x^7jjYH)vd*9ByvgZnKe3lFTVZma7EgQX!$f4vvN4ma ztdl{dotL(QzA`@(gK>v1S&tM$Thbjb9FJ~7CtSq4^SSNlAF&YiGaWp@Ke}RLQ-Z}1DU)`1;J&n>g#dp7$eqN@f%tt;ke*LP%ix&cdw(|3-^Ql5$>%Px- zuWd+hNLTw2A0kup1A?2EOCG~QcTyWs_ILE$hrf?)EjJTgZJGehktU zX`T8Izthj{d_1Sjn2eG^08P$yX5P!}YEM|37z%0%%+fkQa_y;w$A@XPx4a!rtPt!$ z3=(bfCxn?GHXo~+AC$d%9G7{1U~87Xu0y?@EAbc)m&^H|-y1(HXF*@ggqN1rq|ao< zyUFX&UkBlz|C!d$;aBA)FZ_Q5{}IxEt28Ev=dVCkQP2W)^Zo_JD` Date: Fri, 15 May 2026 15:25:24 -0400 Subject: [PATCH 33/53] set the airflow api endpoints to timeout at 10 sec instead of 3 as they take a little longer than others --- Pulumi.cape-cod-dev.yaml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Pulumi.cape-cod-dev.yaml b/Pulumi.cape-cod-dev.yaml index 3b55227..5a33bfc 100644 --- a/Pulumi.cape-cod-dev.yaml +++ b/Pulumi.cape-cod-dev.yaml @@ -782,7 +782,7 @@ config: - "x86_64" description: "getdags Lambda Function" memory_size: 128 - timeout: 3 + timeout: 10 - id: "get_workflow_pipeline_profiles_handler" name: "getdagprofiles" code: "assets/api/capi/handlers/get_workflow_pipeline_profiles.py" @@ -795,7 +795,7 @@ config: - "x86_64" description: "getdagprofiles Lambda Function" memory_size: 128 - timeout: 3 + timeout: 10 - id: "post_workflow_run_handler" name: "postdagrun" code: "assets/api/capi/handlers/post_workflow_run.py" @@ -808,7 +808,7 @@ config: - "x86_64" description: "postdagrun Lambda Function" memory_size: 128 - timeout: 3 + timeout: 10 - id: "patch_workflow_run_handler" name: "patchdagrun" code: "assets/api/capi/handlers/patch_workflow_run.py" @@ -821,7 +821,7 @@ config: - "x86_64" description: "patchdagrun Lambda Function" memory_size: 128 - timeout: 3 + timeout: 10 - id: "get_workflow_run_handler" name: "getdagrun" code: "assets/api/capi/handlers/get_workflow_run.py" @@ -834,7 +834,7 @@ config: - "x86_64" description: "getdags Lambda Function" memory_size: 128 - timeout: 3 + timeout: 10 - id: "get_workflow_tasks_handler" name: "getdagtasks" code: "assets/api/capi/handlers/get_workflow_tasks.py" @@ -847,7 +847,7 @@ config: - "x86_64" description: "getdagtasks Lambda Function" memory_size: 128 - timeout: 3 + timeout: 10 - id: "get_workflow_run_task_instances_handler" name: "getdagruntaskinsts" code: "assets/api/capi/handlers/get_workflow_run_task_instances.py" @@ -861,7 +861,7 @@ config: description: "getdagruntaskinsts Lambda Function" memory_size: 128 - timeout: 3 + timeout: 10 - id: "get_daps_handler" name: "getdaps" code: "assets/api/capi/handlers/get_daps.py" From 4bd6e61f50ba62d940f9d03f7d0c1bb1cbd0061b Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Fri, 15 May 2026 15:26:24 -0400 Subject: [PATCH 34/53] fixed path and handler alias for workflows/run/taskinstances endpoint --- assets/api/capi/capi-openapi-301.yaml.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/assets/api/capi/capi-openapi-301.yaml.j2 b/assets/api/capi/capi-openapi-301.yaml.j2 index fa80c03..4baceef 100644 --- a/assets/api/capi/capi-openapi-301.yaml.j2 +++ b/assets/api/capi/capi-openapi-301.yaml.j2 @@ -1898,7 +1898,7 @@ paths: passthroughBehavior: "when_no_match" timeoutInMillis: 29000 type: "mock" - /workflows/taskinstances: + /workflows/run/taskinstances: get: parameters: - in: query @@ -1990,7 +1990,7 @@ paths: Server error while fetching workflow tasks. x-amazon-apigateway-integration: httpMethod: "POST" - uri: "arn:aws:apigateway:${AWS::Region}:lambda:path/2015-03-31/functions/{{ handlers['get_workflow_run_task_insatnces_handler'] }}/invocations" + uri: "arn:aws:apigateway:${AWS::Region}:lambda:path/2015-03-31/functions/{{ handlers['get_workflow_run_task_instances_handler'] }}/invocations" passthroughBehavior: "when_no_match" timeoutInMillis: 29000 type: "aws_proxy" From 9b872c696087c3b8161e3d99868e800281816d63 Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Fri, 15 May 2026 15:27:23 -0400 Subject: [PATCH 35/53] added query perms for the new dap registry index (in a way that grants perms on all indexes in tables used by api, which will be fixed later) --- capeinfra/iam.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/capeinfra/iam.py b/capeinfra/iam.py index cbedf65..ae872e0 100644 --- a/capeinfra/iam.py +++ b/capeinfra/iam.py @@ -313,10 +313,18 @@ def get_api_statements( "actions": [ "dynamodb:DescribeTable", "dynamodb:GetItem", + "dynamodb:Query", "dynamodb:Scan", ], "resources": [ f"arn:aws:dynamodb:*:*:table/{t}", + # TODO: we now have an index that needs to be queried, which + # doesn't get only the table name but also the index + # name here. just opening to all indices right now as + # we're moving away from the `get_api_statements` way + # of doing things toward resource provided policies + # and need to handle this in that switch. + f"arn:aws:dynamodb:*:*:table/{t}/index/*", ], }, ) From 50ac539501b58cbfc51358b38fcff46cdea85a1a Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Fri, 15 May 2026 15:28:35 -0400 Subject: [PATCH 36/53] updated to capepy 3.0.0 --- assets/lambda-layers/capepy/capepy_layer.zip | Bin 14551 -> 15078 bytes .../lambda-layers/capi-all/requirements.txt | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/lambda-layers/capepy/capepy_layer.zip b/assets/lambda-layers/capepy/capepy_layer.zip index 05a182aeec6b7d9abce3e24aa0972c87260d1abc..0023c1763f1d940e154e67cb7b55f334649a8630 100644 GIT binary patch delta 4083 zcmZXX2UJtb*2e>(gh($jl+cSHErN8U1`!oPks_c7ibyYlQY1oXp~I05fq*ykCQXnc zU3wJ+L^_C6rAfK?l<&Rkdwz4)taawM&+IvC*8FGhJ?^&bNrS!)CWC^0HJC|h91Vg} zRGQNMXOBa&fqxNQo<9j7`X^aX_=5myBx?VK06$WCpn}BemqP$XiuzyEQ>fsZ;u=N> z2=tL3I3=~j+`jHIB}d=5VnYAL+6EjT-ax+HoIlMwB_nK6qYSOoRCK&QB#_Tv68ED~ ziXYX0cykjfT6X32E6x6$W7`m&Lm$4Eg6bLRqJNnJ!GrUj$&ONWurA(6RHADiiU_H> zIK%veNXOpwBCs$bJ(mi2xTrjpXkg7__KwrKI)6(KBj2-Yq{EZZo$UO}L7@+xScE&$1m_;Dd1HVUG}Z-GRx46&1;RIyU%D$+7Gtd{zm!uEao7ycBj23ZUbL^*G92v5Iv zH$ND2EOU+;OdrgyjK|}b?upnLxU~-#bPK4nkWh^7@ksPT_;rR7^v&*dgBnAHs+KT* zkGx`s!pNk~XIv!AgezSqlE-1|peIG|p+R)F5587VQyU5{=m}PXYMhviu$HXPJ9(00 zbE&$5Xnn}UK@huP`3XX8%>5N5KkEM5=3f{d1GxToY}M^$nBFQ-b*& zL#Qu_dCX;Vi(4_i`p8?d@)*T!8rxIE!_>~|v1ylTBBNKGVD)5#OIlr|RI}sA4AM5E zI%k7Imd*PWW#OVE4Wg7U)O^52P&Nw`(TaHcuo_fL&9p#WspWwT``)KiI+!#{(e>o? z5rm%#J{9UTShuschFpJcPkF<^LBvZ3K%L~aeTqr%D#qoxZ!eg&a8k=sGxMd3@$Q$c zHou&hjM9$3$j=u%n?(=3!#3v|ce}u)hHZzM!J&vY4RSzCL?2)oUN$6S(W)h;L*R^} z*0Pd)Y(`{-SjjAoB?8@xF)X%!wdtK2cyD$ru%iZYMaSSm?fthEa(zrpQj$*_hRlya z<#AN6hzgCYV-fW3khHo!F-?3L`kDvRU)-=v{<(5d>HaDPf81eh3(DyI^qVEoUmDRziLzdzX}yf&ME-QVO*?j5433b-eFbY!ZZY zBULnc*mJjoIuw6Oc!s9yc8XkXCAtO-53+M5RSiikMFn{#S=&025L!_->0){6&C z8tm*A%PN%|j4H2q!HDjKDRor58uG%G@Kze_?3+3^UVMYMJd?{x-?dgtYYa5Z<7-?! z;H4WkHVZ~pOWLkrUVIfd=~W}tC}eC}sNZR%dadz6sPUfgKr+Ik@5vH;EFJ$vND{Mn zNuS;wC3QMtESc)eGWcNOW#M4vvpb%*_SnON+&o7vAS3J&V->aef;!e4*f8l@cXKN~ zPp@2m?udrR%pP^W6jlr6DAzLmel%dFx+Z{ici7E=(BAr62$O6WlCv~N;_;yoYh>Ar zXJSby6KJmvbT|!6LC`5PaQZJVM9_`&TgNyxuXTuoY#vNdxE13IQlWv}69wMUg`DyB z;F3_a$`7e4;(YDT#zjx`l6In{4~JSK9H~pH^?4!6fkE=jYJFbT2ZH&w1{i$H=1pDf z^!Vsh;@91e82!e3+mQ0yV8VvHm)Gm2x#z}G=5M=tXz9FK@@3fyBKiE&LH(0F>gx!Y zV`qTz6^Q*DRS3^9GH<$onO)UVN*qP$v@lTccB`qFw>n366a6(Ms^PrDV7+6Qx1HWNoT|ajSSBR9n#FUhX*+43lF$j+!^VkIHU>KVHa9> zvgmGYZ#~4+>oVwWKvUl`ry@-JVeR|0ZL0Q%c_*QGVQO#v?u+?z;MsBXJ2Y;kd#kv znR!X3BOgsu#>Y0WQZXRW8A{92q)9;>nfL7Djd7WaPQ&w?e+$h$1bIn^5*q?aEDa zt;hHz`a!UpKdkb|X9icfT6Iq36fESxLsulXp1V9>et(|*+LobUWr~!BM)%WhTtIQ$z)ED^#e*#s{BfYqnGs5NIHmJak~v6x zg#m}*2PIQX|v$kRT zb#;wqp|Eh3EE=9!@^I(;I?c4bKaOn-!;{2&yuj`A`ZeNN_*aos&2qZvoN%f6&mFT- zpzQAsS=~()cd2G*&yoo>xDtnyxa800{v(gaVvn3{T_kQ9QGvk!WHHP@F&s*cVrb>3 z2kPN6;GbUudvG<(qj(`3c6&KiTkefW!6`{*a=*lWMw9TxcR9*`A0q@NHYOWiKPwmb zG=1>w$%$?$u3W2Qf{QuZ%ZpaFJ;Mi!=p(4>HJaqUb8@q^D9j!8gdfR^X1RBu+ZoAf zux*oeoA;8|U12<^Ndd6p`7EL`>q?cA>Z_r#EHOjXt`|8N>@w`NfT{Qx9peOfB!@w} zaIDWL%ftmA7-wOktt_v#LfYTiQDmGZJ)yFCboOztJ@3($dIpP~gloZxt*oATeH?Wh zB9lD31#?*u6e$JOSqk|zyL!U`^N!QkqaL5^=+(6MtQB%?1-Uq5rmO1_&77BVAE7U5u((M_v_I=He_!jCIY(S}A}rs=j43*Ehc zoT7Tx7wSdX-OMIo32WVs8!0Y5L-PXBA!2mmejivdPr@{^J}VkJmpkW$9WQ@7elA{- z<=J%S+W3vQIWg82Z7zM!apv;(_rI9hnH+sub5|k32y%Bq&!-M^;Up4#oIQh7KcV`> zcVA^B>i4kC*N7D;c<7%qMLHt8Ls&;N?oe3*gXY2JU2G4*6uwaIHAaf8?%)%fi{ChS zhy~=4nC3+?jYgO^P8^j4)X3G=TnsC8(!abtDem@s`AslDD%v1eTfui`0adH=atT+0mi1WP6i89k~&H=N!>z!eT%uosH25_TSn1nU;W9gyO$VFo0c-s$v82 zf{unq*yrvF2?(SD27%N760jJV*e|6ui6Jr9VzSX7vRU6BFuRFn4K8#nN>z0~Ld~1| zn`s?aVUxc?O0p!S3s8qiC@`N4aGtObe_*(u)bp^$>NDOz^q_q^e*H0>+H{8D$^0YF zu!K*%v=$Mk;o1pFmb9#ja<&1Yp7gCpPa9{Fn* zZZqoDQ(DEgyBbbf43m2_###I^dlhrETW?P6y4!Tj{cYE9yjos+P&c@F)SCHN;_|M5 z=EZZA)aH7Th<=WUq!@m~&MEj~PTeuI5lVLfHl4BYP`(?EiY+~MI|9DnG(K%Klkt9o ze4zJ2Q_rt9aZnFXISSWzHKWsiS!wY2lFifE^~ZTj@%kR`8JY);;9VZQin17^2NO>T zuXtltHvPkza`>|C>fODp%5P5#rg7FLM+Ys@NJFxs2pbR9M4NWK z1vzitK^@7xiE0@N$xXL4Wp>LEvm+xUx{5fNPn>o(YscFjJ|QouSE*$Td3HjAzD`2Q z3I3;+v^Xm&K_FmVg74RL94`X{%8(Rirwb`5@h1wDRQMBplGOeaX(RQ1qrfv?moFLq zMlhh}(&PWGCofCIf*sF+0DTfsz*U+ID59bQ$fX1UA!#;nI{kk*8l1rNpLa4~NBSDr z8}@tba#{3jCH!S}uo~M>4i|<0|DZ#D7IflexDjW;=BEx8DL^0%5d3T(eo6!MGNAhZ zW}nH&QN{$U!FlHG-^ybKsAPH1RuY$G2QzW~wxJPlihukAwX$sBxc{;K5~2jzFAV~^ Yaxie9#P6w1Vk|(RoFK&_^4EX-3xUKpkpKVy delta 3522 zcmZWs2{hDQA09Jf$ujma_N9<4W5!@C*+OJV)>Zh~Ot21MP6uw%!Y9j;LiT%*Z9`>VAXS%#h*)?m^@B`!L-|%zY68Y}FlaJ>`B& z^Ke(GH|R^5=wp4!8v@<>pZ)iS_J$f`>J_y-M9k~kiOxn)trQczrRKZBQ`cqKbRHD> zv-;=$SxwaHAPY}7431T1^gGBIH76|#ef%8uH7=n80e^>69TL@*;$UUS79}=X60%@S?WVAcpiRBK;vR6ps9O1{C@^ zMrRBakVGg+9tP0{| zVSu!wMhZQ&!SYLJqR|!^QyswMXehS))N6llV4HJ3`D}$;2xhapTyKoUgEk{ZukO+u zWr!A9u_v5|hbpnXDAUAdJM5ehtOx6rNL1Nq zHu1#iYR#)|l&y0{g~e$VvtE-D$l;Ut@XpV6drYd~X4){{EY@8@Rr>7_X1I!yTyF7) zLHs!&W7X;f`$9H4VyPsl=><_O0uq+965(!{If4(7yET+Ed{;YP`)z8Uy#=+sXDe|2 z=q1j-Gxv^DziWg`LqfpR7~$R%Nx*yN?s!zXo=D5Mh!0g#v=|c^GB`~r+VWX1Qph!D zz1HY#x7NN9JDtcLJ1h$hYy+sSkTY9CeTdV8=J+0uo-*&}w7OQ21fpc3k_nN@&>zCt zpA!?_c7GF>%bFqvUbc=E-j$s#-h599$0c_Qy+v}-aQh023->cEEEUR}0yCKYG3r{1 zb{PVty?wJlZUgT)?Z@fTE#~N9R2X8u^PY{6lBBV`n*PqJjbGTbRsO7^Z4%*NhAVF^ zXW^S#nU`)?mSi`AV(3?jSkN(@L_0e5&J}*1^T#=#Yax%>*+1Dijxj%I&awtD2IbQJjcphpc@2LTH6etA?q{pU+-br_TnwMix0_3p#)GRpG4M z$RTP-Ouq+6#(s@ZJJ^YeKo>9QxH>9ZLiTicW}dURBeJ~6IJMWb4Hp8PH1+k_-2<0q zPA5OWy++c=Rkx`ic9{6#$3_od&|n1?7BS@mmA>%QPX+%c2(X%S)s1*lEE#t;w!nMVQi zb14DHtBJdjE$+gLMRQ|?)>6f$WH~?)xyEc($&^0Q`J)4VQKi9-t_R_bqBGOGk->+? z-bszRFMH}aN`rzt30y?6$8$SxU*LH|Jg4~(>>eg|jz_OHZHX%1 zwYQm0!j!;>lF=}tMGkpR#k!L2YuZ*sXG>hXIEdr-x!7QTV_ozOIF-+|MnrtTpq`$fOrDkR2kSSJ73=G}9&`4$K7|F6^y#o+7@7Iy-ZZ+cr~Fkz z>ja!$NC1-{Vpw9^VR341GCK0EEly)f%Mbr>1A-$BAq zuQg{tJ;f~>J41LnuPVHqy_yVCANz7Wrz1~Q?)DF4n2DMB>T^ZGu>!xy-QJq?iQa40 zZ3|5;@N?t0CngzYa|k^Kg^acWcHiH_f>UuW#jr55FO4^=@waw1e^A+BHK&{&W!3#5 z@;n?FpQTo^+9mB=Ngiz+uKN}@zUV}AY^agWDbrG}&gyV(dvt{^rZXkq4(6&NP0E#j zQEzJ=xKs*laC+}TWRk;1cd~Vem9*3EX3G!KFhV#q#za#!av1h{Eh$P`h8DE2`D5G) zl|5N!xl}S!omb~7j{2ojWc6{t0ovn23KsvSef&;pn^rGB5)TYMEcKhPfePwy$A{$# z^fnkU`uRHb?LBy1Z(Z``afv-6o^))}l{ZNV6SCgck_w^X<+VolGL{EDvinTbW264K zw_w~AQ8Ur1V+cxvB zcOIb*4I?-RNS_o&hCupBO$iCAo?r(--v7MIewLB-UmUb~OR$KUiGQ^LvP=a4sDl6i zO~MF|%*pf+9D#e-RH^|;m_JlwID5eML4(>F4AyRY;ZdSVEa!9$-#=AR>kMoTjajev zXNwlfAG>XEg=>1TNsE1K2GHa8gb_}LbTF*$fHzVqEvSZ8RlWxmA5>umw8K~=zpZ_n zxrR$u$c-OSla!|RlI${imX5_PtXsc2Q5 z1Sb8AxE=Q+vr{qdh$ZXM67JRv$FoOc8{CsFB2Nq0?SQxFNZMHr9-N=MV`I~nRD^Co zEU_a2frU8lT=&g!x8+-RqO^?&PajO*D_2LsdX5bqKPvU&f;Fjgm+lXW#_1%x7$@Z)@E!R~#tD3Pl`e$?}LkcoHO}-m5CITux zH0L`T6os&IO zl1;&+0FBM7>XsYjk8a0hof+Jm1=e+{bg(5J9a5PXPyq!%|Lqj6p0o@A0O0^}`q%AC z5JvKyq>}~`{u|ChD*uMpkb1viE4j=6!Gtyx3*nud*NKic%It4^{NH;7ZTU=~!zqFj z3PGS%&d)i{l*Lq^NPvGy5W>+`Kos9E$$w+8lEO)-^Gdux vF#rE}2SFIk?*RyXN<6^W- Date: Fri, 15 May 2026 15:28:52 -0400 Subject: [PATCH 37/53] updated to capepy 3.0.0 --- capeinfra/meta/capemeta.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capeinfra/meta/capemeta.py b/capeinfra/meta/capemeta.py index 23088a6..f66148f 100644 --- a/capeinfra/meta/capemeta.py +++ b/capeinfra/meta/capemeta.py @@ -179,7 +179,7 @@ def __init__(self, assets_bucket: VersionedBucket, **kwargs): ) self.bucket = assets_bucket - capepy_whl = "capepy-2.2.0-py3-none-any.whl" + capepy_whl = "capepy-3.0.0-py3-none-any.whl" self.object = self.bucket.add_object( f"{self.name}-object", key=capepy_whl, From dd9e20a96716ce3e4d2c5681b86c616dbd881bff Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Fri, 15 May 2026 15:29:46 -0400 Subject: [PATCH 38/53] added ignore for changes to the gsi for dap registry table as pulumi keeps reporting changes. --- capeinfra/pipeline/registry.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/capeinfra/pipeline/registry.py b/capeinfra/pipeline/registry.py index a07d366..a760f4d 100644 --- a/capeinfra/pipeline/registry.py +++ b/capeinfra/pipeline/registry.py @@ -143,7 +143,20 @@ def create_dap_registry_table(self): write_capacity=0, ) ], - opts=ResourceOptions(parent=self), + opts=ResourceOptions( + parent=self, + # TODO: this is a bazooka against an ant. the GSI keeps showing + # changes in the for of adding `__defaults: []` in the GSI + # and each entry in the key_shemas as well as removal of + # the previously specified hash_key and range_key values + # (which were replaced by key_schemas recently). this + # despite the table being removed and rebuilt totally + # since removal of the `[hash|range]_key` fields (meaning + # they should no longer be involved at all). so we're + # ignoring all GSI changes reported from the server. which + # is bad if we make a real change...need to fix + ignore_changes=["global_secondary_indexes"], + ), tags={ "desc_name": ( f"{self.desc_name} Analysis Pipeline Registry DynamoDB Table" From 2360cd76c0ed00a3a8ea662742f7b8883b789e1f Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Fri, 15 May 2026 15:30:25 -0400 Subject: [PATCH 39/53] adde query perms to database read policy (previously only had getitem) --- capeinfra/resources/database.py | 1 + 1 file changed, 1 insertion(+) diff --git a/capeinfra/resources/database.py b/capeinfra/resources/database.py index 3d8900c..37e7cd5 100644 --- a/capeinfra/resources/database.py +++ b/capeinfra/resources/database.py @@ -83,6 +83,7 @@ def policies(self) -> dict[ "actions": [ "dynamodb:DescribeTable", "dynamodb:GetItem", + "dynamodb:Query", # TODO: arguably we don't need to include scan here. # scan reads the whole table (or a bunch of the # table) at once. we grant it currently for the From 3c1dbf3f65f77fe08636ddbf3d388be59a18023e Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Fri, 15 May 2026 15:32:57 -0400 Subject: [PATCH 40/53] removed code that has been moved to capepy. handled change in capepy interface for getting pipeline by name/version --- assets/api/capi/handlers/get_dap_profile.py | 66 +++++++-------------- 1 file changed, 21 insertions(+), 45 deletions(-) diff --git a/assets/api/capi/handlers/get_dap_profile.py b/assets/api/capi/handlers/get_dap_profile.py index 9d1f2f9..ca8c17d 100644 --- a/assets/api/capi/handlers/get_dap_profile.py +++ b/assets/api/capi/handlers/get_dap_profile.py @@ -1,49 +1,14 @@ """Lambda function for handling a post of a new analysis pipeline run.""" import json -from decimal import Decimal from botocore.exceptions import ClientError from capepy.aws.dynamodb import PipelineTable -from capepy.aws.utils import decode_error - - -# TODO: need to add some abstraction of this to capepy. it's repeated here and -# in get_object_etls at least -def bad_param_response(): - """Gets a response data object and status code when bad params are given. - - :return: A tuple containins a response data object and an HTTP 400 status - code. - """ - return ( - { - "message": ( - "Missing required query string parameters: pipeline and version" - ) - }, - 400, - ) - - -# TODO: this should probably go elsewhere. issue is you can't json serialize -# Decimal values, and some of the values coming back from dynamo in the -# pipeline profile spec are Decimal. So this shims them to floats. -def json_serialize_the_unserializable(val): - """Serialize a value (e.g. Decimal) that is otherwise not json serializable. - - Right now this just handles Decimal, but can be updated as needed. - - :param val: The value to serialize. - :return: the serialized value. - :raises: TypeError if even this function cannot serialize. - """ - if isinstance(val, Decimal): - # this results in a reduction of precision which can cause issues. In - # our case (for now at least) it's ok, but we may want to consider other - # mechanisms like string conversions or forcing some rounding. - return float(val) - raise TypeError(f"Value {val} of type {type(val)} is not json serializable") +from capepy.aws.utils import ( + bad_param_response, + decode_error, + json_serialize_the_unserializable, +) def index_handler(event, context): @@ -54,29 +19,40 @@ def index_handler(event, context): :param context: Context object. """ + req_params = {"pipeline", "version"} + try: headers = event.get("headers", {}) qsp = event.get("queryStringParameters") if qsp is None: - resp_data, resp_status = bad_param_response() + resp_data, resp_status = bad_param_response(list(req_params)) else: pipeline_name = qsp.get("pipeline") version = qsp.get("version") if not pipeline_name or not version: - resp_data, resp_status = bad_param_response() + resp_data, resp_status = bad_param_response(list(req_params)) else: # get a reference to the registry table ddb_table = PipelineTable() - dap = ddb_table.get_pipeline(pipeline_name, version) + dap = ddb_table.get_pipelines_by_name(pipeline_name, version) resp_data = [] resp_status = 200 + if dap: - resp_data = dap["profile"] - print(f"resp_data: {resp_data}") + if len(dap) == 1: + resp_data = dap[0]["profile"] + else: # must be more than one, which is bad + resp_status = 409 + resp_data = { + "message": ( + f"More than one DAP returned for name " + f"'{pipeline_name}'@'{version}'." + ) + } # And return our response as a 200 return { "statusCode": resp_status, From 872ebd559dfb15daabbf303f7c7478f53ad32113 Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Fri, 15 May 2026 15:34:08 -0400 Subject: [PATCH 41/53] removed code that was moved to capepy --- assets/api/capi/handlers/get_dap_status.py | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/assets/api/capi/handlers/get_dap_status.py b/assets/api/capi/handlers/get_dap_status.py index 07c8b8a..460efca 100644 --- a/assets/api/capi/handlers/get_dap_status.py +++ b/assets/api/capi/handlers/get_dap_status.py @@ -5,7 +5,7 @@ import boto3 from botocore.exceptions import ClientError -from capepy.aws.utils import decode_error +from capepy.aws.utils import bad_param_response, decode_error logger = logging.getLogger(__name__) @@ -13,18 +13,6 @@ batch_client = boto3.client("batch") -def bad_param_response(): - """Gets a response data object and status code when bad params are given. - - :return: A tuple containins a response data object and an HTTP 400 status - code. - """ - return ( - {"message": ("Missing required query string parameters: jobIds")}, - 400, - ) - - def index_handler(event, context): """Handler for the GET of status of analysis pipeline jobs. @@ -33,16 +21,18 @@ def index_handler(event, context): :param context: Context object. """ + req_params = {"jobIds"} + try: qsp = event.get("queryStringParameters") resp_status = 200 if qsp is None: - resp_data, resp_status = bad_param_response() + resp_data, resp_status = bad_param_response(list(req_params)) else: job_ids = qsp.get("jobIds") if job_ids is None: - resp_data, resp_status = bad_param_response() + resp_data, resp_status = bad_param_response(list(req_params)) else: response = batch_client.describe_jobs( jobs=[id.strip() for id in job_ids.split(",") if id] From 61d3a8c8e866a6659a3e366df807c5ba6d451c7a Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Fri, 15 May 2026 15:36:15 -0400 Subject: [PATCH 42/53] defined variables early as some code paths didn't do that. --- assets/api/capi/handlers/get_workflow_pipeline_profiles.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/assets/api/capi/handlers/get_workflow_pipeline_profiles.py b/assets/api/capi/handlers/get_workflow_pipeline_profiles.py index f4a0a2d..713c2f6 100644 --- a/assets/api/capi/handlers/get_workflow_pipeline_profiles.py +++ b/assets/api/capi/handlers/get_workflow_pipeline_profiles.py @@ -25,6 +25,8 @@ def index_handler(event, context): headers = event.get("headers", {}) qsp = event.get("queryStringParameters") + resp_status = 200 + resp_data = {} if qsp is None: resp_data, resp_status = bad_param_response(list(req_params)) @@ -34,7 +36,6 @@ def index_handler(event, context): if dag_id is None: resp_data, resp_status = bad_param_response(list(req_params)) else: - workflow_table = WorkflowMetaTable() wf = workflow_table.get_workflow_by_id(dag_id) @@ -50,6 +51,7 @@ def index_handler(event, context): resp_status = 404 else: for pid in wf["pipeline_ids"]: + dap = dapreg_table.get_pipeline(pid) if dap: resp_data.append(dap["profile"]) From 07a371a5c2aa4880fd1db01abd1514adb2852421 Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Fri, 15 May 2026 15:37:20 -0400 Subject: [PATCH 43/53] fixed a bug in the api path to get tasks for dag runs --- assets/api/capi/handlers/get_workflow_tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/api/capi/handlers/get_workflow_tasks.py b/assets/api/capi/handlers/get_workflow_tasks.py index bb3f146..a8fe703 100644 --- a/assets/api/capi/handlers/get_workflow_tasks.py +++ b/assets/api/capi/handlers/get_workflow_tasks.py @@ -42,7 +42,7 @@ def index_handler(event, context): resp_data, resp_status = bad_param_response(list(req_params)) else: - api_path = f"/dags/{dag_id}/dagRuns/tasks" + api_path = f"/dags/{dag_id}/tasks" request_params = { "Name": env_name, From 29e35c9e62803a2f12f56a250ff59309d8c55472 Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Fri, 15 May 2026 15:38:23 -0400 Subject: [PATCH 44/53] imported missing functions from capepy. fixed bug in the naming of queryparams in the airflow api invocation --- assets/api/capi/handlers/patch_workflow_run.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/assets/api/capi/handlers/patch_workflow_run.py b/assets/api/capi/handlers/patch_workflow_run.py index 1acbc20..132679c 100644 --- a/assets/api/capi/handlers/patch_workflow_run.py +++ b/assets/api/capi/handlers/patch_workflow_run.py @@ -6,7 +6,7 @@ import boto3 from botocore.exceptions import ClientError -from capepy.aws.utils import decode_error +from capepy.aws.utils import bad_param_response, decode_error def index_handler(event, context): @@ -53,7 +53,7 @@ def index_handler(event, context): "Name": env_name, "Path": f"/dags/{dag_id}/dagRuns/{dag_run_id}", "Method": "PATCH", - "QueryStringParameters": {"update_mask": update_mask}, + "QueryParameters": {"update_mask": update_mask}, "Body": req_body, } From ebc541fa19602ace55d6ba6828acc3612d4af293 Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Fri, 15 May 2026 15:42:10 -0400 Subject: [PATCH 45/53] fixed error in the timestamp format expected by airflow. imported missing functions from capepy. fixed bug in mixing status/data from bad_params_response and response from airflow --- assets/api/capi/handlers/post_workflow_run.py | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/assets/api/capi/handlers/post_workflow_run.py b/assets/api/capi/handlers/post_workflow_run.py index bb41567..a4f8a30 100644 --- a/assets/api/capi/handlers/post_workflow_run.py +++ b/assets/api/capi/handlers/post_workflow_run.py @@ -3,10 +3,11 @@ import datetime import json import os +import re import boto3 from botocore.exceptions import ClientError -from capepy.aws.utils import decode_error +from capepy.aws.utils import bad_param_response, decode_error def index_handler(event, context): @@ -34,6 +35,7 @@ def index_handler(event, context): if qsp is None: resp_data, resp_status = bad_param_response(list(req_params)) else: + dag_id = qsp.get("dagId") dag_params = json.loads(event["body"]) @@ -51,24 +53,35 @@ def index_handler(event, context): # - there are others like data_interval_[start|end] that # are used internally in dags that process data of # specific intervals + + # the logical date must be specified (but may be null) when + # triggering. we're specifying the value. *but* it wants ISO + # 8601 format ending in `Z` which isn't supported in python + # natively till v3.11. so this makes the time string then + # replaces the bad part with `Z` so the airflow api accepts it. + now_str = datetime.datetime.now().isoformat() + zstr = re.sub(r"\..*$", "Z", now_str) + request_params = { "Name": env_name, "Path": f"/dags/{dag_id}/dagRuns", "Method": "POST", "Body": { "conf": dag_params, - "logical_date": datetime.datetime.now().isoformat(), + "logical_date": zstr, # datetime.datetime.now().isoformat(), }, } response = mwaa_client.invoke_rest_api(**request_params) + resp_data = response["RestApiResponse"] + resp_status = response["RestApiStatusCode"] # no matter the status code of the response we can return the same # thing. the difference in 200 vs non-200 is that the json will contain # an error string under the key "detail" instead of workflow data in # the non-200 case return { - "statusCode": response["RestApiStatusCode"], + "statusCode": resp_status, "headers": { "Content-Type": "application/json", # TODO: ISSUE #141 CORS bypass. We do not want this long term. @@ -85,7 +98,7 @@ def index_handler(event, context): "Access-Control-Allow-Origin": "*", "Access-Control-Allow-Methods": "OPTIONS,GET", }, - "body": json.dumps(response["RestApiResponse"]), + "body": json.dumps(resp_data), } except ClientError as err: code, message = decode_error(err) From 71dc5e13c0000ed7b80bca0e33956a49a4527c97 Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Fri, 15 May 2026 15:47:02 -0400 Subject: [PATCH 46/53] spelling errors --- assets/api/capi/capi-openapi-301.yaml.j2 | 2 +- capeinfra/pipeline/registry.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/assets/api/capi/capi-openapi-301.yaml.j2 b/assets/api/capi/capi-openapi-301.yaml.j2 index 4baceef..6713efc 100644 --- a/assets/api/capi/capi-openapi-301.yaml.j2 +++ b/assets/api/capi/capi-openapi-301.yaml.j2 @@ -1906,7 +1906,7 @@ paths: schema: type: string description: - The dag id to get the task instaces for + The dag id to get the task instances for required: true - in: query name: dagRunId diff --git a/capeinfra/pipeline/registry.py b/capeinfra/pipeline/registry.py index a760f4d..b54e31c 100644 --- a/capeinfra/pipeline/registry.py +++ b/capeinfra/pipeline/registry.py @@ -147,7 +147,7 @@ def create_dap_registry_table(self): parent=self, # TODO: this is a bazooka against an ant. the GSI keeps showing # changes in the for of adding `__defaults: []` in the GSI - # and each entry in the key_shemas as well as removal of + # and each entry in the key_schemas as well as removal of # the previously specified hash_key and range_key values # (which were replaced by key_schemas recently). this # despite the table being removed and rebuilt totally From 3b95424df12e06ac021572405d49ef27ec991444 Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Fri, 15 May 2026 15:49:26 -0400 Subject: [PATCH 47/53] fixed version of capepy --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index e172a62..7e64e78 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -./assets/lambda-layers/capepy/capepy-2.2.0-py3-none-any.whl +./assets/lambda-layers/capepy/capepy-3.0.0-py3-none-any.whl boto3>=1.0.0,<2.0.0 boto3-stubs>=1.0.0,<2.0.0 From d049ccc48cef34b69d2fe0c5e907343e31f2e4d3 Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Fri, 15 May 2026 15:58:36 -0400 Subject: [PATCH 48/53] trying to make pyright happy feels so wrong --- capeinfra/swimlanes/private.py | 59 ++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 28 deletions(-) diff --git a/capeinfra/swimlanes/private.py b/capeinfra/swimlanes/private.py index 5e663c4..6374c52 100644 --- a/capeinfra/swimlanes/private.py +++ b/capeinfra/swimlanes/private.py @@ -231,30 +231,38 @@ def _deploy_api(self, api_name): # TODO: this is the new style policy statements we should be moving to policy_statements = [] - policy_statements.append( - Output.all( - arn=self.mwaa_compute_environment.mwaa_environment.arn, - name=self.mwaa_compute_environment.mwaa_environment.name, - ).apply( - # adding invoke rest api perms for the `Op` default role in - # airflow. Need Op as we will be configuring runs in - # addition to triggering (if no config, we'd be able to use - # User role) - lambda kwargs: add_resources( - self.mwaa_compute_environment.policies[ - MwaaEnvironment.PolicyEnum.invoke_api - ], - # TODO: this isn't the arn of the environment or of the - # execution role. rather it seems to be the arn of - # the Op role in for airflow. anyway, if not specified - # like this (previously was using the env arn with - # `/Op` at the end) it fails. Need a good way to - # construct this - f"arn:aws:airflow:{self.aws_region}:{self.aws_account_id}:role/{kwargs['name']}/Op", - f"{kwargs['arn']}", + if self.mwaa_compute_environment is not None: + mep = self.mwaa_compute_environment.policies[ + MwaaEnvironment.PolicyEnum.invoke_api + ] + policy_statements.append( + Output.all( + arn=self.mwaa_compute_environment.mwaa_environment.arn, + name=self.mwaa_compute_environment.mwaa_environment.name, + ).apply( + # adding invoke rest api perms for the `Op` default role in + # airflow. Need Op as we will be configuring runs in + # addition to triggering (if no config, we'd be able to use + # User role) + lambda kwargs: add_resources( + mep, + # TODO: this isn't the arn of the environment or of the + # execution role. rather it seems to be the arn of + # the Op role in for airflow. anyway, if not specified + # like this (previously was using the env arn with + # `/Op` at the end) it fails. Need a good way to + # construct this + f"arn:aws:airflow:{self.aws_region}:{self.aws_account_id}:role/{kwargs['name']}/Op", + f"{kwargs['arn']}", + ) ) ) - ) + + sgis = [] + if self.mwaa_compute_environment is not None: + # TODO: hijacking the sec group here, need to either make this + # one a default for a lot of things or create a new one + sgis = [self.mwaa_compute_environment.security_group.id] self.apis[api_name]["deploy"] = CapeRestApi( f"{self.basename}-{api_name}-api", @@ -270,12 +278,7 @@ def _deploy_api(self, api_name): desc_name=f"{self.apis[api_name]['spec']['desc']}", opts=ResourceOptions(parent=self), lambda_vpc_cfg=aws.lambda_.FunctionVpcConfigArgs( - # vpc_id=self.vpc.id, - # TODO: hijacking the sec group here, need to either make this - # one a default for a lot of things or create a new one - security_group_ids=[ - self.mwaa_compute_environment.security_group.id - ], + security_group_ids=sgis, subnet_ids=[ sn.id for sn in self.get_subnets_by_type("compute").values() ], From 9b7230f492c44ded0dfad03741e34cfe0bfcbf51 Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Mon, 18 May 2026 10:13:28 -0400 Subject: [PATCH 49/53] fixed bactopia 3.2.0 base id --- assets/analysis-pipelines/bactopia/bactopia-base-3.2.0.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/analysis-pipelines/bactopia/bactopia-base-3.2.0.json b/assets/analysis-pipelines/bactopia/bactopia-base-3.2.0.json index 688fa5c..9f43ce0 100644 --- a/assets/analysis-pipelines/bactopia/bactopia-base-3.2.0.json +++ b/assets/analysis-pipelines/bactopia/bactopia-base-3.2.0.json @@ -1,7 +1,7 @@ { "pipelineType": "nextflow", "pipelineName": "Bactopia", - "pipelineId": "bactopia-base-v3.2.0", + "pipelineId": "bactopia-bactopia-base-v3.2.0", "pipelineRunnable": false, "pipelineDescription": "Execute Bactopia v3.2.0", "project": "bactopia/bactopia", From 9e62296f45a45def366ca89147a3a03f9c35bd42 Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Mon, 18 May 2026 10:15:25 -0400 Subject: [PATCH 50/53] removed pipelineRunnable from all profiles --- assets/analysis-pipelines/bactopia/bactopia-base-3.2.0.json | 1 - assets/analysis-pipelines/bactopia/bactopia-base-dev.json | 1 - assets/analysis-pipelines/bactopia/kraken2-bactopia-3.2.0.json | 1 - assets/analysis-pipelines/bactopia/kraken2-bactopia-dev.json | 1 - assets/analysis-pipelines/bactopia/ont-bactopia-3.2.0.json | 1 - assets/analysis-pipelines/bactopia/ont-bactopia-dev.json | 1 - 6 files changed, 6 deletions(-) diff --git a/assets/analysis-pipelines/bactopia/bactopia-base-3.2.0.json b/assets/analysis-pipelines/bactopia/bactopia-base-3.2.0.json index 9f43ce0..1bf3cdd 100644 --- a/assets/analysis-pipelines/bactopia/bactopia-base-3.2.0.json +++ b/assets/analysis-pipelines/bactopia/bactopia-base-3.2.0.json @@ -2,7 +2,6 @@ "pipelineType": "nextflow", "pipelineName": "Bactopia", "pipelineId": "bactopia-bactopia-base-v3.2.0", - "pipelineRunnable": false, "pipelineDescription": "Execute Bactopia v3.2.0", "project": "bactopia/bactopia", "version": "v3.2.0", diff --git a/assets/analysis-pipelines/bactopia/bactopia-base-dev.json b/assets/analysis-pipelines/bactopia/bactopia-base-dev.json index 6dc21a9..3777c80 100644 --- a/assets/analysis-pipelines/bactopia/bactopia-base-dev.json +++ b/assets/analysis-pipelines/bactopia/bactopia-base-dev.json @@ -2,7 +2,6 @@ "pipelineType": "nextflow", "pipelineName": "Bactopia", "pipelineId": "bactopia-bactopia-base-dev", - "pipelineRunnable": false, "pipelineDescription": "Execute Bactopia development release", "project": "bactopia/bactopia", "version": "dev", diff --git a/assets/analysis-pipelines/bactopia/kraken2-bactopia-3.2.0.json b/assets/analysis-pipelines/bactopia/kraken2-bactopia-3.2.0.json index dc6e7ec..23bf167 100644 --- a/assets/analysis-pipelines/bactopia/kraken2-bactopia-3.2.0.json +++ b/assets/analysis-pipelines/bactopia/kraken2-bactopia-3.2.0.json @@ -2,7 +2,6 @@ "pipelineType": "nextflow", "pipelineName": "Bactopia Kraken2", "pipelineId": "bactopia-kraken2-v3.2.0", - "pipelineRunnable": true, "pipelineDescription": "Execute Bactopia's Kraken2 workflow with the development release", "project": "bactopia/bactopia", "version": "v3.2.0", diff --git a/assets/analysis-pipelines/bactopia/kraken2-bactopia-dev.json b/assets/analysis-pipelines/bactopia/kraken2-bactopia-dev.json index 4991b26..10dd8c0 100644 --- a/assets/analysis-pipelines/bactopia/kraken2-bactopia-dev.json +++ b/assets/analysis-pipelines/bactopia/kraken2-bactopia-dev.json @@ -2,7 +2,6 @@ "pipelineType": "nextflow", "pipelineName": "Bactopia Kraken2", "pipelineId": "bactopia-kraken2-dev", - "pipelineRunnable": true, "pipelineDescription": "Execute Bactopia's Kraken2 workflow with the development release", "project": "bactopia/bactopia", "version": "dev", diff --git a/assets/analysis-pipelines/bactopia/ont-bactopia-3.2.0.json b/assets/analysis-pipelines/bactopia/ont-bactopia-3.2.0.json index a094d90..5806a08 100644 --- a/assets/analysis-pipelines/bactopia/ont-bactopia-3.2.0.json +++ b/assets/analysis-pipelines/bactopia/ont-bactopia-3.2.0.json @@ -2,7 +2,6 @@ "pipelineType": "nextflow", "pipelineName": "Bactopia ONT Sample", "pipelineId": "bactopia-ont-v3.2.0", - "pipelineRunnable": true, "pipelineDescription": "Execute Bactopia's ONT sample sequencing workflow with v3.2.0", "project": "bactopia/bactopia", "version": "v3.2.0", diff --git a/assets/analysis-pipelines/bactopia/ont-bactopia-dev.json b/assets/analysis-pipelines/bactopia/ont-bactopia-dev.json index 456db56..a57ad44 100644 --- a/assets/analysis-pipelines/bactopia/ont-bactopia-dev.json +++ b/assets/analysis-pipelines/bactopia/ont-bactopia-dev.json @@ -2,7 +2,6 @@ "pipelineType": "nextflow", "pipelineName": "Bactopia ONT Sample", "pipelineId": "bactopia-ont-dev", - "pipelineRunnable": true, "pipelineDescription": "Execute Bactopia's ONT sample sequencing workflow with the development release", "project": "bactopia/bactopia", "version": "dev", From 45eb156aac98db6a6332bce87dc1c3e8054706b6 Mon Sep 17 00:00:00 2001 From: Micah Halter Date: Mon, 18 May 2026 10:27:58 -0400 Subject: [PATCH 51/53] hacky fix for pyright typing with new pulumi version --- capeinfra/meta/capemeta.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/capeinfra/meta/capemeta.py b/capeinfra/meta/capemeta.py index f66148f..c993b56 100644 --- a/capeinfra/meta/capemeta.py +++ b/capeinfra/meta/capemeta.py @@ -12,7 +12,6 @@ from boto3.dynamodb.types import TypeSerializer from pulumi import ( AssetArchive, - Config, FileArchive, FileAsset, Output, @@ -259,7 +258,11 @@ def __init__(self, **kwargs): name="cape-users", account_recovery_setting={ "recovery_mechanisms": [ - {"name": "verified_email", "priority": 1} + # HACK: pyright typing seems broken for identifying + # dictionary, this is a hacky fix for that + aws.cognito.UserPoolAccountRecoverySettingRecoveryMechanismArgs( + name="verified_email", priority=1 + ) ] }, admin_create_user_config={"allow_admin_create_user_only": True}, From 81200cb4fb4d000b85b63db7d3b8b90b8944c987 Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Mon, 18 May 2026 10:44:58 -0400 Subject: [PATCH 52/53] removed pipeline runnable --- capeinfra/pipeline/registry.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/capeinfra/pipeline/registry.py b/capeinfra/pipeline/registry.py index b54e31c..21925b3 100644 --- a/capeinfra/pipeline/registry.py +++ b/capeinfra/pipeline/registry.py @@ -226,9 +226,6 @@ def load_pipeline_assets(self): { "pipeline_name": {"S": profile["pipelineName"]}, "pipeline_id": {"S": profile["pipelineId"]}, - "pipeline_runnable": { - "BOOL": profile["pipelineRunnable"] - }, "version": {"S": profile["version"]}, "project": {"S": profile["project"]}, "pipeline_type": {"S": profile["pipelineType"]}, From 02d7366abc62db567c0ba10c1a4e76ae9b144a24 Mon Sep 17 00:00:00 2001 From: Drew Pihera Date: Mon, 18 May 2026 11:17:29 -0400 Subject: [PATCH 53/53] removed duplicated options and obe todo --- assets/api/capi/capi-openapi-301.yaml.j2 | 28 ------------------------ 1 file changed, 28 deletions(-) diff --git a/assets/api/capi/capi-openapi-301.yaml.j2 b/assets/api/capi/capi-openapi-301.yaml.j2 index 6713efc..c004a7d 100644 --- a/assets/api/capi/capi-openapi-301.yaml.j2 +++ b/assets/api/capi/capi-openapi-301.yaml.j2 @@ -1669,34 +1669,6 @@ paths: passthroughBehavior: "when_no_match" timeoutInMillis: 29000 type: "mock" - options: - responses: - "200": - $ref: "#/components/responses/200OptionsCors" - x-amazon-apigateway-integration: - responses: - default: - statusCode: "200" - responseParameters: - method.response.header.Access-Control-Allow-Methods: "'OPTIONS,GET'" - method.response.header.Access-Control-Allow-Headers: "'Content-Type,X-Amz-Date,Authorization,X-Api-Key,X-Amz-Security-Token'" - method.response.header.Access-Control-Allow-Origin: "'*'" - requestTemplates: - application/json: "{'statusCode':200}" - passthroughBehavior: "when_no_match" - timeoutInMillis: 29000 - type: "mock" - {# - TODO: - - add: - - get dag run (/api/v2/dags/{dag_id}/dagRuns/{dag_run_id}) - - for status of the dag run - - get dag run task instances (/api/v2/dags/{dag_id}/dagRuns/{dag_run_id}/taskInstances) - - for status of the dag run tasks. - - support a qsp for running tasks (or maybe any valid task state - https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/utils/state/index.html#airflow.utils.state.TaskInstanceState) - - #} /workflows/run: get: parameters: