From 718e371d1f5506ec3d0389f2af33df27b453976a Mon Sep 17 00:00:00 2001 From: "S. Paquette" Date: Mon, 8 Dec 2025 10:44:22 -0800 Subject: [PATCH 1/5] -> Logging changes --- awsglue/transfer_to_gc/transfer_logs.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/awsglue/transfer_to_gc/transfer_logs.py b/awsglue/transfer_to_gc/transfer_logs.py index 5f68267..c31dc9c 100644 --- a/awsglue/transfer_to_gc/transfer_logs.py +++ b/awsglue/transfer_to_gc/transfer_logs.py @@ -1,9 +1,12 @@ import boto3 import time -import logging import pprint +from awsglue.context import GlueContext +from pyspark.context import SparkContext -logger = logging.getLogger(__name__) +sc = SparkContext() +glueContext = GlueContext(sc) +logger = glueContext.get_logger() region='us-east-1' ds_client = boto3.client('datasync', region_name=region) @@ -35,7 +38,7 @@ if is_stopped: # Unable to start the instance in 20 minutes--something might be wrong - logger.error("[ERROR] Unable to start instance in {} minutes! Exiting.".format(str(WAIT_MAX_20/60)) + logger.error("[ERROR] Unable to start instance in {} minutes! Exiting.".format(str(WAIT_MAX_20/60))) exit(1) logger.info("[STATUS] EC2 instance started.") From a83eb7280490f27d5dfde8b267ebb0c2dede79b8 Mon Sep 17 00:00:00 2001 From: "S. Paquette" Date: Mon, 8 Dec 2025 11:11:05 -0800 Subject: [PATCH 2/5] -> Logging changes --- awsglue/transfer_to_gc/transfer_logs.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/awsglue/transfer_to_gc/transfer_logs.py b/awsglue/transfer_to_gc/transfer_logs.py index c31dc9c..8fb88d1 100644 --- a/awsglue/transfer_to_gc/transfer_logs.py +++ b/awsglue/transfer_to_gc/transfer_logs.py @@ -1,12 +1,11 @@ import boto3 import time import pprint -from awsglue.context import GlueContext -from pyspark.context import SparkContext +import logging -sc = SparkContext() -glueContext = GlueContext(sc) -logger = glueContext.get_logger() +logging.basicConfig() +logging.getLogger().setLevel(logging.INFO) +logger = logging.getLogger(__name__) region='us-east-1' ds_client = boto3.client('datasync', region_name=region) From 9474e37b5e39b103e56182800517510fd369765e Mon Sep 17 00:00:00 2001 From: "S. Paquette" Date: Mon, 8 Dec 2025 12:35:22 -0800 Subject: [PATCH 3/5] -> Yet another bug --- awsglue/transfer_to_gc/transfer_logs.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/awsglue/transfer_to_gc/transfer_logs.py b/awsglue/transfer_to_gc/transfer_logs.py index 8fb88d1..4335e84 100644 --- a/awsglue/transfer_to_gc/transfer_logs.py +++ b/awsglue/transfer_to_gc/transfer_logs.py @@ -27,6 +27,7 @@ state=get_inst['Reservations'][0]['Instances'][0]['State']['Name'] is_stopped = bool(state=='stopped') wait = 0 + start_time = time.time() while is_stopped and wait < WAIT_MAX_20: ec2_client.start_instances(InstanceIds=[instanceId]) time.sleep(WAIT_VM_INT) @@ -39,14 +40,17 @@ # Unable to start the instance in 20 minutes--something might be wrong logger.error("[ERROR] Unable to start instance in {} minutes! Exiting.".format(str(WAIT_MAX_20/60))) exit(1) + stop_time = time.time() logger.info("[STATUS] EC2 instance started.") + logger.info("[STATUS] Time to start VM: {}s".format(str(stop_time-start_time))) filters=[{'Name':'Name', 'Values':['aws_pub_logs_to_google'], 'Operator':'Equals'}] tasks=ds_client.list_tasks()['Tasks'] tasks=[tsk for tsk in tasks if tsk.get('Name',None) in task_list] task_result = {tsk.get('Name',None): 'incomplete' for tsk in tasks} for task in tasks: + start_time = time.time() wait = 0 task_desc=ds_client.describe_task(TaskArn=task['TaskArn']) is_unavail = bool(task_desc['Status'] == 'UNAVAILABLE') @@ -65,12 +69,14 @@ cur_desc=ds_client.describe_task(TaskArn=task['TaskArn']) is_avail = bool(cur_desc['Status']=='AVAILABLE') wait = wait+WAIT_TASK_INT + stop_time = time.time() if not is_avail: # Task unavailable after 60 minutes - it might be stuck logger.error("[STATUS] Task {} didn't become available in the time alotted ({} minutes) - possibly incomplete.".format(task.get('Name',None),str(WAIT_MAX_60/60))) task_result[task.get('Name',None)] = 'over time' else: - task_result[task.get('Name',None)] = 'complete' + task_result[task.get('Name',None)] = 'complete' + logger.info("[STATUS] Task {} completed in {}s.".format(task.get('Name',None), str(stop_time-start_time))) else: # Task never became available! logger.error("[STATUS] Task {} never became available after {} minutes--skipping.".format(task.get('Name',None),str(WAIT_MAX_10/60))) From f72df239a48d662b12822fd8b38fc89be87ed1e1 Mon Sep 17 00:00:00 2001 From: "S. Paquette" Date: Mon, 8 Dec 2025 12:50:54 -0800 Subject: [PATCH 4/5] -> Yet another bug --- awsglue/transfer_to_gc/transfer_logs.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/awsglue/transfer_to_gc/transfer_logs.py b/awsglue/transfer_to_gc/transfer_logs.py index 4335e84..49d8e93 100644 --- a/awsglue/transfer_to_gc/transfer_logs.py +++ b/awsglue/transfer_to_gc/transfer_logs.py @@ -51,6 +51,7 @@ task_result = {tsk.get('Name',None): 'incomplete' for tsk in tasks} for task in tasks: start_time = time.time() + task_name = task.get('Name',None) wait = 0 task_desc=ds_client.describe_task(TaskArn=task['TaskArn']) is_unavail = bool(task_desc['Status'] == 'UNAVAILABLE') @@ -61,6 +62,7 @@ is_unavail = bool(task_desc['Status'] == 'UNAVAILABLE') if (task_desc['Status'] == 'AVAILABLE'): + logger.info("[STATUS] Task {} execution beginning.".format(task_name)) ds_client.start_task_execution(TaskArn=task['TaskArn']) wait = 0 is_avail = False @@ -72,14 +74,14 @@ stop_time = time.time() if not is_avail: # Task unavailable after 60 minutes - it might be stuck - logger.error("[STATUS] Task {} didn't become available in the time alotted ({} minutes) - possibly incomplete.".format(task.get('Name',None),str(WAIT_MAX_60/60))) - task_result[task.get('Name',None)] = 'over time' + logger.error("[STATUS] Task {} didn't become available in the time alotted ({} minutes) - possibly incomplete.".format(task_name,str(WAIT_MAX_60/60))) + task_result[task_name] = 'over time' else: - task_result[task.get('Name',None)] = 'complete' - logger.info("[STATUS] Task {} completed in {}s.".format(task.get('Name',None), str(stop_time-start_time))) + task_result[task_name] = 'complete' + logger.info("[STATUS] Task {} completed in {}s.".format(task_name, str(stop_time-start_time))) else: # Task never became available! - logger.error("[STATUS] Task {} never became available after {} minutes--skipping.".format(task.get('Name',None),str(WAIT_MAX_10/60))) + logger.error("[STATUS] Task {} never became available after {} minutes--skipping.".format(task_name,str(WAIT_MAX_10/60))) time.sleep(WAIT_BTW_TASK_INT) logger.info("[STATUS] Final task dispositions: ") From 6d0e961c89ed795c98d0f0e4f4800df724315a64 Mon Sep 17 00:00:00 2001 From: "S. Paquette" Date: Mon, 8 Dec 2025 12:58:09 -0800 Subject: [PATCH 5/5] -> Yet another bug --- awsglue/transfer_to_gc/transfer_logs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/awsglue/transfer_to_gc/transfer_logs.py b/awsglue/transfer_to_gc/transfer_logs.py index 49d8e93..4db1fcc 100644 --- a/awsglue/transfer_to_gc/transfer_logs.py +++ b/awsglue/transfer_to_gc/transfer_logs.py @@ -11,7 +11,7 @@ ds_client = boto3.client('datasync', region_name=region) ec2_client = boto3.client('ec2', region_name=region) r=1 -task_list = {'aws_pub_logs_to_google','aws_cr_logs_to_google','aws_two_logs_to_google'} +task_list = ['aws_pub_logs_to_google','aws_cr_logs_to_google','aws_two_logs_to_google'] VM_FILTER = [{'Name':'tag:Name', 'Values':['DataSync_for_Logs']}] ec='DataSync_for_Logs' WAIT_MAX_20 = (20 * 60) @@ -85,6 +85,6 @@ time.sleep(WAIT_BTW_TASK_INT) logger.info("[STATUS] Final task dispositions: ") - logger.info(pprint.pp(task_result,width=10)) + logger.info(pprint.pformat(task_result,width=10)) ec2_client.stop_instances(InstanceIds=[instanceId])