From 90348169454a0aa4c5cf7af782208730eb299ec1 Mon Sep 17 00:00:00 2001 From: Aleksei Tumakaev Date: Mon, 18 Aug 2025 13:21:47 +0000 Subject: [PATCH 1/7] Use queries instead of views in summary.py --- .../source/lib/python/rocpd/summary.py | 187 +++++++++--------- 1 file changed, 99 insertions(+), 88 deletions(-) diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/summary.py b/projects/rocprofiler-sdk/source/lib/python/rocpd/summary.py index 816b50b2637..324e27000f7 100644 --- a/projects/rocprofiler-sdk/source/lib/python/rocpd/summary.py +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/summary.py @@ -60,7 +60,7 @@ def get_temp_view_names(connection: RocpdImportData) -> List[str]: return [ v[0] for v in execute_statement( - connection, "SELECT name FROM sqlite_temp_master WHERE type='view';" + connection, "SELECT name FROM sqlite_temp_master WHERE type='view'" ).fetchall() ] @@ -72,30 +72,34 @@ def get_temp_view_columns(connection: RocpdImportData, view_name: str) -> List[s return [row[1] for row in cursor.fetchall()] -def make_temp_view_query(view_name, query) -> str: - return "CREATE TEMPORARY VIEW IF NOT EXISTS `{}` AS {}".format(view_name, query) - - -def export_view( - connection: RocpdImportData, view_name, output_format, output_path, filename="" +def export_query( + connection: RocpdImportData, + query_name, + query, + output_format, + output_path, + filename="", ) -> None: - """Write the contents of a SQL view to an output format.""" + """Write the contents of a SQL query to an output format.""" - query = "SELECT * FROM `{}`".format(view_name) - query_one = "SELECT * FROM `{}` LIMIT 1".format(view_name) + query_not_empty = f""" + SELECT EXISTS ( + {query} + ) + """ - # just return if view is empty - if not connection.execute(query_one).fetchone(): + # just return if the result is empty + if not connection.execute(query_not_empty).fetchone()[0]: return # prepare the output filename if not filename: - output_filename = view_name + output_filename = query_name else: - output_filename = f"{filename}_{view_name}" + output_filename = f"{filename}_{query_name}" if output_format == "console": - print(f"\n{view_name.upper()}:") + print(f"\n{query_name.upper()}:") # call query module to export. query will append the extension export_path = os.path.join(output_path, output_filename) @@ -106,10 +110,11 @@ def export_view( def generate_summary_query( view_name: str, + view_query="", name_column="name", by_rank=False, ) -> Tuple[str, str]: - """Generate the SQL statement to create a summary view.""" + """Generate the SQL statement to create a summary query.""" if by_rank: view_suffix = "_summary_by_rank" @@ -139,8 +144,19 @@ def generate_summary_query( full_view_name = f"{view_name}{view_suffix}" + view_select = ( + f""" + {view_name} AS ( + {view_query} + ), + """ + if view_query + else "" + ) + summary_query = f""" WITH + {view_select} avg_data AS ( SELECT {group_by_columns.replace(name_column, f"{name_column} AS name")}, @@ -183,7 +199,7 @@ def generate_summary_query( aggregated_data AD {total_duration_join} ORDER BY - {"AD.pid," if by_rank else ""} AD.total_duration DESC; + {"AD.pid," if by_rank else ""} AD.total_duration DESC """ return (full_view_name, summary_query) @@ -214,7 +230,7 @@ def generate_domain_query(connection: RocpdImportData, by_rank=False) -> Tuple[s ] if len(summary_views) < 1: - return view_name + return () union_selects = [ f" SELECT '{s.replace(view_suffix, '').upper()}' as domain, * FROM {s} " @@ -259,14 +275,16 @@ def generate_domain_query(connection: RocpdImportData, by_rank=False) -> Tuple[s FROM grouped_domains GD {join_condition} - {order_by}; + {order_by} """ return (view_name, domain_select) -def create_summary_views(connection: RocpdImportData, by_rank=False) -> None: - """Create summary views for eligible temporary views in the database.""" +def create_summary_queries(connection: RocpdImportData, by_rank=False) -> dict[str, str]: + """Create summary queries for eligible temporary views in the database.""" + + queries = {} NAME_COLUMN_MAP = { "memory_allocations": "type", @@ -286,30 +304,33 @@ def create_summary_views(connection: RocpdImportData, by_rank=False) -> None: if not required_columns.issubset(columns): continue - # Create regular summary view - summary_view_name, summary_query = generate_summary_query( - view_name, name_column=NAME_COLUMN_MAP.get(view_name, "name") + # Create regular summary query + summary_query_name, summary_query = generate_summary_query( + view_name, "", name_column=NAME_COLUMN_MAP.get(view_name, "name") ) - connection.execute(make_temp_view_query(summary_view_name, summary_query)) + queries[summary_query_name] = summary_query - # Create per-rank summary + # Create per-rank summary query if by_rank: - per_rank_view_name, summary_by_rank_query = generate_summary_query( + per_rank_query_name, summary_by_rank_query = generate_summary_query( view_name, + "", name_column=NAME_COLUMN_MAP.get(view_name, "name"), by_rank=True, ) - connection.execute( - make_temp_view_query(per_rank_view_name, summary_by_rank_query) - ) + queries[per_rank_query_name] = summary_by_rank_query + + return queries -def create_summary_region_views( +def create_summary_region_queries( connection: RocpdImportData, by_rank=False, region_categories=None -) -> None: - """Create summary and region views""" +) -> dict[str, str]: + """Create summary and region queries""" - query = "SELECT DISTINCT(category) FROM regions_and_samples;" + queries = {} + + query = "SELECT DISTINCT(category) FROM regions_and_samples" categories = execute_statement(connection, query).fetchall() if region_categories is None: @@ -325,66 +346,66 @@ def create_summary_region_views( for k, v in category_map.items(): if len(v) > 0: conditions = [f"category LIKE '{c}'" for c in v] - temp_region_view = f""" - CREATE TEMPORARY VIEW IF NOT EXISTS `{k}` AS + region_query = f""" SELECT * FROM regions_and_samples - WHERE {" OR ".join(conditions)}; + WHERE {" OR ".join(conditions)} """ - connection.execute(temp_region_view) + queries[k] = region_query - # Create regular summary view - summary_view_name, summary_query = generate_summary_query(k) - connection.execute(make_temp_view_query(summary_view_name, summary_query)) + # Create regular summary query + summary_query_name, summary_query = generate_summary_query(k, region_query) + queries[summary_query_name] = summary_query # Create per-rank summary view if by_rank: - per_rank_view_name, summary_by_rank_query = generate_summary_query( - k, by_rank=True - ) - connection.execute( - make_temp_view_query(per_rank_view_name, summary_by_rank_query) + per_rank_query_name, summary_by_rank_query = generate_summary_query( + k, region_query, by_rank=True ) + queries[per_rank_query_name] = summary_by_rank_query # Markers if "MARKER" not in region_categories: - return + return queries - view_name = "markers" - markers_create = f""" - CREATE TEMPORARY VIEW IF NOT EXISTS `{view_name}` AS + markers_query_name = "markers" + markers_query = f""" SELECT JSON_EXTRACT(extdata, '$.message') AS marker_name, * FROM regions_and_samples WHERE category LIKE 'MARKER_%' """ - connection.execute(markers_create) - # Create regular summary view - summary_view_name, summary_query = generate_summary_query( - view_name, name_column="marker_name" + # Create regular summary query + summary_query_name, summary_query = generate_summary_query( + markers_query_name, markers_query, name_column="marker_name" ) - connection.execute(make_temp_view_query(summary_view_name, summary_query)) + queries[summary_query_name] = summary_query - # Create per-rank summary view + # Create per-rank summary query if by_rank: - per_rank_view_name, summary_by_rank_query = generate_summary_query( - view_name, name_column="marker_name", by_rank=True - ) - connection.execute( - make_temp_view_query(per_rank_view_name, summary_by_rank_query) + per_rank_query_name, summary_by_rank_query = generate_summary_query( + markers_query_name, markers_query, name_column="marker_name", by_rank=True ) + queries[per_rank_query_name] = summary_by_rank_query + + return queries + +def create_domain_query(connection: RocpdImportData, by_rank=False) -> dict[str, str]: + """Create a domain summary query by aggregating all summary views.""" -def create_domain_view(connection: RocpdImportData, by_rank=False) -> str: - """Create a domain summary view by aggregating all summary views.""" + result = generate_domain_query(connection, by_rank=by_rank) + if not result: + return {} - view_name, domain_query = generate_domain_query(connection, by_rank=by_rank) + query_name, query = result + queries = {} # Create the domain summary view - connection.execute(make_temp_view_query(view_name, domain_query)) + queries[query_name] = query - return view_name + return queries def generate_all_summaries(connection: RocpdImportData, **kwargs: Any) -> None: @@ -408,34 +429,24 @@ def generate_all_summaries(connection: RocpdImportData, **kwargs: Any) -> None: ), ) - # create the temporary summary views - create_summary_views(connection, by_rank) - create_summary_region_views(connection, by_rank, region_categories=region_categories) + queries = {} + + # create the temporary summary queries + queries |= create_summary_queries(connection, by_rank) + queries |= create_summary_region_queries( + connection, by_rank, region_categories=region_categories + ) if domain_summary: - create_domain_view(connection) + queries |= create_domain_query(connection) # Create domain summary per rank only if both domain_summary and summary_by_rank are enabled if by_rank: - create_domain_view(connection, by_rank=True) + queries |= create_domain_query(connection, by_rank=True) - # Write regular summary views + # Write summary print("\nSummary files:") - summary_views = [ - itr for itr in get_temp_view_names(connection) if itr.endswith("_summary") - ] - for v in summary_views: - export_view(connection, v, output_format, output_path, filename) - - # Write per-rank summary views if flag is set - if by_rank: - print("\nSummary files by rank:") - summary_by_rank_views = [ - itr - for itr in get_temp_view_names(connection) - if itr.endswith("_summary_by_rank") - ] - for v in summary_by_rank_views: - export_view(connection, v, output_format, output_path, filename) + for query_name, query in queries.items(): + export_query(connection, query_name, query, output_format, output_path, filename) # From 6bb24cabcc3896d1f664b1d2d02e47ba4c1821bc Mon Sep 17 00:00:00 2001 From: Aleksei Tumakaev Date: Mon, 18 Aug 2025 14:01:18 +0000 Subject: [PATCH 2/7] Export queries when created --- .../source/lib/python/rocpd/summary.py | 111 +++++++++--------- 1 file changed, 56 insertions(+), 55 deletions(-) diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/summary.py b/projects/rocprofiler-sdk/source/lib/python/rocpd/summary.py index 324e27000f7..51a2533bc8b 100644 --- a/projects/rocprofiler-sdk/source/lib/python/rocpd/summary.py +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/summary.py @@ -24,6 +24,7 @@ ############################################################################### import argparse +from dataclasses import dataclass import os import math @@ -33,6 +34,15 @@ from . import output_config +@dataclass +class ExportConfig: + """Configuration for exporting summary queries.""" + + output_format: str = "console" + output_path: str = "./rocpd-output-data" + filename: str = "" + + def check_function_availability(connection, function_name): """ Checks if a given function exists in the SQLite database. @@ -73,12 +83,7 @@ def get_temp_view_columns(connection: RocpdImportData, view_name: str) -> List[s def export_query( - connection: RocpdImportData, - query_name, - query, - output_format, - output_path, - filename="", + connection: RocpdImportData, config: ExportConfig, query_name, query ) -> None: """Write the contents of a SQL query to an output format.""" @@ -93,18 +98,18 @@ def export_query( return # prepare the output filename - if not filename: + if not config.filename: output_filename = query_name else: - output_filename = f"{filename}_{query_name}" + output_filename = f"{config.filename}_{query_name}" - if output_format == "console": + if config.output_format == "console": print(f"\n{query_name.upper()}:") # call query module to export. query will append the extension - export_path = os.path.join(output_path, output_filename) + export_path = os.path.join(config.output_path, output_filename) export_sqlite_query( - connection, query, export_format=output_format, export_path=export_path + connection, query, export_format=config.output_format, export_path=export_path ) @@ -281,10 +286,10 @@ def generate_domain_query(connection: RocpdImportData, by_rank=False) -> Tuple[s return (view_name, domain_select) -def create_summary_queries(connection: RocpdImportData, by_rank=False) -> dict[str, str]: - """Create summary queries for eligible temporary views in the database.""" - - queries = {} +def export_summary_queries( + connection: RocpdImportData, config: ExportConfig, by_rank=False +): + """Create and export summary queries for eligible temporary views in the database.""" NAME_COLUMN_MAP = { "memory_allocations": "type", @@ -308,7 +313,7 @@ def create_summary_queries(connection: RocpdImportData, by_rank=False) -> dict[s summary_query_name, summary_query = generate_summary_query( view_name, "", name_column=NAME_COLUMN_MAP.get(view_name, "name") ) - queries[summary_query_name] = summary_query + export_query(connection, config, summary_query_name, summary_query) # Create per-rank summary query if by_rank: @@ -318,17 +323,16 @@ def create_summary_queries(connection: RocpdImportData, by_rank=False) -> dict[s name_column=NAME_COLUMN_MAP.get(view_name, "name"), by_rank=True, ) - queries[per_rank_query_name] = summary_by_rank_query - - return queries + export_query(connection, config, per_rank_query_name, summary_by_rank_query) -def create_summary_region_queries( - connection: RocpdImportData, by_rank=False, region_categories=None -) -> dict[str, str]: - """Create summary and region queries""" - - queries = {} +def export_summary_region_queries( + connection: RocpdImportData, + config: ExportConfig, + by_rank=False, + region_categories=None, +): + """Create and export summary and region queries""" query = "SELECT DISTINCT(category) FROM regions_and_samples" categories = execute_statement(connection, query).fetchall() @@ -352,22 +356,25 @@ def create_summary_region_queries( WHERE {" OR ".join(conditions)} """ - queries[k] = region_query + # Export the region summary query + export_query(connection, config, k, region_query) # Create regular summary query summary_query_name, summary_query = generate_summary_query(k, region_query) - queries[summary_query_name] = summary_query + export_query(connection, config, summary_query_name, summary_query) - # Create per-rank summary view + # Create per-rank summary query if by_rank: per_rank_query_name, summary_by_rank_query = generate_summary_query( k, region_query, by_rank=True ) - queries[per_rank_query_name] = summary_by_rank_query + export_query( + connection, config, per_rank_query_name, summary_by_rank_query + ) # Markers if "MARKER" not in region_categories: - return queries + return markers_query_name = "markers" markers_query = f""" @@ -380,32 +387,27 @@ def create_summary_region_queries( summary_query_name, summary_query = generate_summary_query( markers_query_name, markers_query, name_column="marker_name" ) - queries[summary_query_name] = summary_query + export_query(connection, config, summary_query_name, summary_query) # Create per-rank summary query if by_rank: per_rank_query_name, summary_by_rank_query = generate_summary_query( markers_query_name, markers_query, name_column="marker_name", by_rank=True ) - queries[per_rank_query_name] = summary_by_rank_query - - return queries + export_query(connection, config, per_rank_query_name, summary_by_rank_query) -def create_domain_query(connection: RocpdImportData, by_rank=False) -> dict[str, str]: - """Create a domain summary query by aggregating all summary views.""" +def export_domain_query(connection: RocpdImportData, config: ExportConfig, by_rank=False): + """Create and export a domain summary query by aggregating all summary views.""" result = generate_domain_query(connection, by_rank=by_rank) if not result: - return {} + return query_name, query = result - queries = {} - - # Create the domain summary view - queries[query_name] = query - return queries + # Export the domain summary query + export_query(connection, config, query_name, query) def generate_all_summaries(connection: RocpdImportData, **kwargs: Any) -> None: @@ -429,24 +431,23 @@ def generate_all_summaries(connection: RocpdImportData, **kwargs: Any) -> None: ), ) - queries = {} + config = ExportConfig( + output_format=output_format, + output_path=output_path, + filename=filename, + ) - # create the temporary summary queries - queries |= create_summary_queries(connection, by_rank) - queries |= create_summary_region_queries( - connection, by_rank, region_categories=region_categories + # Create and export the summary queries + export_summary_queries(connection, config, by_rank) + export_summary_region_queries( + connection, config, by_rank, region_categories=region_categories ) if domain_summary: - queries |= create_domain_query(connection) - # Create domain summary per rank only if both domain_summary and summary_by_rank are enabled + export_domain_query(connection, config) + # Create and export domain summary per rank only if both domain_summary and summary_by_rank are enabled if by_rank: - queries |= create_domain_query(connection, by_rank=True) - - # Write summary - print("\nSummary files:") - for query_name, query in queries.items(): - export_query(connection, query_name, query, output_format, output_path, filename) + export_domain_query(connection, config, by_rank=True) # From 822a7462ce5fa1970274bfaa3cd2710b5071726a Mon Sep 17 00:00:00 2001 From: Aleksei Tumakaev Date: Wed, 20 Aug 2025 11:06:02 +0000 Subject: [PATCH 3/7] Remove HIP and HSA from output --- projects/rocprofiler-sdk/source/lib/python/rocpd/summary.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/summary.py b/projects/rocprofiler-sdk/source/lib/python/rocpd/summary.py index 51a2533bc8b..776f5f5ebe0 100644 --- a/projects/rocprofiler-sdk/source/lib/python/rocpd/summary.py +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/summary.py @@ -356,9 +356,6 @@ def export_summary_region_queries( WHERE {" OR ".join(conditions)} """ - # Export the region summary query - export_query(connection, config, k, region_query) - # Create regular summary query summary_query_name, summary_query = generate_summary_query(k, region_query) export_query(connection, config, summary_query_name, summary_query) From bd5c0b179dc626cc3f0edc9ae0dd53acc1b9ddcb Mon Sep 17 00:00:00 2001 From: Aleksei Tumakaev Date: Wed, 20 Aug 2025 21:05:01 +0000 Subject: [PATCH 4/7] Fix domain query --- .../source/lib/python/rocpd/summary.py | 53 ++++++++++++++----- 1 file changed, 40 insertions(+), 13 deletions(-) diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/summary.py b/projects/rocprofiler-sdk/source/lib/python/rocpd/summary.py index 776f5f5ebe0..8d0d2286e71 100644 --- a/projects/rocprofiler-sdk/source/lib/python/rocpd/summary.py +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/summary.py @@ -210,7 +210,9 @@ def generate_summary_query( return (full_view_name, summary_query) -def generate_domain_query(connection: RocpdImportData, by_rank=False) -> Tuple[str, str]: +def generate_domain_query( + connection: RocpdImportData, summary_queries, by_rank=False +) -> Tuple[str, str]: """Generate the SQL statement for domain summary by doing union over all summary views.""" if by_rank: @@ -230,20 +232,27 @@ def generate_domain_query(connection: RocpdImportData, by_rank=False) -> Tuple[s join_condition = "CROSS JOIN total_duration TD" order_by = 'ORDER BY GD."DURATION (nsec)" DESC' - summary_views = [ - itr for itr in get_temp_view_names(connection) if itr.endswith(view_suffix) - ] + summary_views = { + query_name: query + for query_name, query in summary_queries.items() + if query_name.endswith(view_suffix) + } if len(summary_views) < 1: return () + summary_selects = [ + f"{query_name} AS ({query})," for query_name, query in summary_views.items() + ] + union_selects = [ - f" SELECT '{s.replace(view_suffix, '').upper()}' as domain, * FROM {s} " - for s in summary_views + f" SELECT '{query_name.replace(view_suffix, '').upper()}' as domain, * FROM {query_name} " + for query_name, query in summary_views.items() ] domain_select = f""" WITH + {f"".join(summary_selects)} all_domains AS ( {f" UNION ALL ".join(union_selects)} ), @@ -301,6 +310,8 @@ def export_summary_queries( views = get_temp_view_names(connection) + queries = {} + for view_name in views: if any(pattern in view_name for pattern in avoid_view_pattern): continue @@ -314,6 +325,7 @@ def export_summary_queries( view_name, "", name_column=NAME_COLUMN_MAP.get(view_name, "name") ) export_query(connection, config, summary_query_name, summary_query) + queries[summary_query_name] = summary_query # Create per-rank summary query if by_rank: @@ -324,6 +336,9 @@ def export_summary_queries( by_rank=True, ) export_query(connection, config, per_rank_query_name, summary_by_rank_query) + queries[per_rank_query_name] = summary_by_rank_query + + return queries def export_summary_region_queries( @@ -347,6 +362,8 @@ def export_summary_region_queries( if "MARKER" not in cat.upper() } + queries = {} + for k, v in category_map.items(): if len(v) > 0: conditions = [f"category LIKE '{c}'" for c in v] @@ -359,6 +376,7 @@ def export_summary_region_queries( # Create regular summary query summary_query_name, summary_query = generate_summary_query(k, region_query) export_query(connection, config, summary_query_name, summary_query) + queries[summary_query_name] = summary_query # Create per-rank summary query if by_rank: @@ -368,10 +386,11 @@ def export_summary_region_queries( export_query( connection, config, per_rank_query_name, summary_by_rank_query ) + queries[per_rank_query_name] = summary_by_rank_query # Markers if "MARKER" not in region_categories: - return + return queries markers_query_name = "markers" markers_query = f""" @@ -385,6 +404,7 @@ def export_summary_region_queries( markers_query_name, markers_query, name_column="marker_name" ) export_query(connection, config, summary_query_name, summary_query) + queries[summary_query_name] = summary_query # Create per-rank summary query if by_rank: @@ -392,12 +412,17 @@ def export_summary_region_queries( markers_query_name, markers_query, name_column="marker_name", by_rank=True ) export_query(connection, config, per_rank_query_name, summary_by_rank_query) + queries[per_rank_query_name] = summary_by_rank_query + return queries -def export_domain_query(connection: RocpdImportData, config: ExportConfig, by_rank=False): + +def export_domain_query( + connection: RocpdImportData, config: ExportConfig, summary_queries, by_rank=False +): """Create and export a domain summary query by aggregating all summary views.""" - result = generate_domain_query(connection, by_rank=by_rank) + result = generate_domain_query(connection, summary_queries, by_rank=by_rank) if not result: return @@ -434,17 +459,19 @@ def generate_all_summaries(connection: RocpdImportData, **kwargs: Any) -> None: filename=filename, ) + summary_queries = {} + # Create and export the summary queries - export_summary_queries(connection, config, by_rank) - export_summary_region_queries( + summary_queries |= export_summary_queries(connection, config, by_rank) + summary_queries |= export_summary_region_queries( connection, config, by_rank, region_categories=region_categories ) if domain_summary: - export_domain_query(connection, config) + export_domain_query(connection, config, summary_queries) # Create and export domain summary per rank only if both domain_summary and summary_by_rank are enabled if by_rank: - export_domain_query(connection, config, by_rank=True) + export_domain_query(connection, config, summary_queries, by_rank=True) # From a72ae9398f69ae27d54307edafeb9e370f34cd2a Mon Sep 17 00:00:00 2001 From: Aleksei Tumakaev Date: Thu, 21 Aug 2025 14:16:59 +0000 Subject: [PATCH 5/7] Export summary queries in the main function --- .../source/lib/python/rocpd/summary.py | 87 ++++++++----------- 1 file changed, 35 insertions(+), 52 deletions(-) diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/summary.py b/projects/rocprofiler-sdk/source/lib/python/rocpd/summary.py index 8d0d2286e71..ba5786eaa57 100644 --- a/projects/rocprofiler-sdk/source/lib/python/rocpd/summary.py +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/summary.py @@ -24,7 +24,6 @@ ############################################################################### import argparse -from dataclasses import dataclass import os import math @@ -34,15 +33,6 @@ from . import output_config -@dataclass -class ExportConfig: - """Configuration for exporting summary queries.""" - - output_format: str = "console" - output_path: str = "./rocpd-output-data" - filename: str = "" - - def check_function_availability(connection, function_name): """ Checks if a given function exists in the SQLite database. @@ -83,7 +73,12 @@ def get_temp_view_columns(connection: RocpdImportData, view_name: str) -> List[s def export_query( - connection: RocpdImportData, config: ExportConfig, query_name, query + connection: RocpdImportData, + output_path, + output_file, + output_format, + query_name, + query, ) -> None: """Write the contents of a SQL query to an output format.""" @@ -98,18 +93,18 @@ def export_query( return # prepare the output filename - if not config.filename: + if not output_file: output_filename = query_name else: - output_filename = f"{config.filename}_{query_name}" + output_filename = f"{output_file}_{query_name}" - if config.output_format == "console": + if output_format == "console": print(f"\n{query_name.upper()}:") # call query module to export. query will append the extension - export_path = os.path.join(config.output_path, output_filename) + export_path = os.path.join(output_path, output_filename) export_sqlite_query( - connection, query, export_format=config.output_format, export_path=export_path + connection, query, export_format=output_format, export_path=export_path ) @@ -295,10 +290,8 @@ def generate_domain_query( return (view_name, domain_select) -def export_summary_queries( - connection: RocpdImportData, config: ExportConfig, by_rank=False -): - """Create and export summary queries for eligible temporary views in the database.""" +def create_summary_queries(connection: RocpdImportData, by_rank=False): + """Create summary queries for eligible temporary views in the database.""" NAME_COLUMN_MAP = { "memory_allocations": "type", @@ -324,7 +317,6 @@ def export_summary_queries( summary_query_name, summary_query = generate_summary_query( view_name, "", name_column=NAME_COLUMN_MAP.get(view_name, "name") ) - export_query(connection, config, summary_query_name, summary_query) queries[summary_query_name] = summary_query # Create per-rank summary query @@ -335,19 +327,17 @@ def export_summary_queries( name_column=NAME_COLUMN_MAP.get(view_name, "name"), by_rank=True, ) - export_query(connection, config, per_rank_query_name, summary_by_rank_query) queries[per_rank_query_name] = summary_by_rank_query return queries -def export_summary_region_queries( +def create_summary_region_queries( connection: RocpdImportData, - config: ExportConfig, by_rank=False, region_categories=None, ): - """Create and export summary and region queries""" + """Create summary and region queries""" query = "SELECT DISTINCT(category) FROM regions_and_samples" categories = execute_statement(connection, query).fetchall() @@ -375,7 +365,6 @@ def export_summary_region_queries( # Create regular summary query summary_query_name, summary_query = generate_summary_query(k, region_query) - export_query(connection, config, summary_query_name, summary_query) queries[summary_query_name] = summary_query # Create per-rank summary query @@ -383,9 +372,6 @@ def export_summary_region_queries( per_rank_query_name, summary_by_rank_query = generate_summary_query( k, region_query, by_rank=True ) - export_query( - connection, config, per_rank_query_name, summary_by_rank_query - ) queries[per_rank_query_name] = summary_by_rank_query # Markers @@ -403,7 +389,6 @@ def export_summary_region_queries( summary_query_name, summary_query = generate_summary_query( markers_query_name, markers_query, name_column="marker_name" ) - export_query(connection, config, summary_query_name, summary_query) queries[summary_query_name] = summary_query # Create per-rank summary query @@ -411,25 +396,21 @@ def export_summary_region_queries( per_rank_query_name, summary_by_rank_query = generate_summary_query( markers_query_name, markers_query, name_column="marker_name", by_rank=True ) - export_query(connection, config, per_rank_query_name, summary_by_rank_query) queries[per_rank_query_name] = summary_by_rank_query return queries -def export_domain_query( - connection: RocpdImportData, config: ExportConfig, summary_queries, by_rank=False -): - """Create and export a domain summary query by aggregating all summary views.""" +def create_domain_query(connection: RocpdImportData, summary_queries, by_rank=False): + """Create a domain summary query by aggregating all summary views.""" result = generate_domain_query(connection, summary_queries, by_rank=by_rank) if not result: - return + return {} query_name, query = result - # Export the domain summary query - export_query(connection, config, query_name, query) + return {query_name: query} def generate_all_summaries(connection: RocpdImportData, **kwargs: Any) -> None: @@ -437,7 +418,7 @@ def generate_all_summaries(connection: RocpdImportData, **kwargs: Any) -> None: domain_summary = kwargs.get("domain_summary", False) by_rank = kwargs.get("summary_by_rank", False) - filename = kwargs.get("output_file", "") + output_file = kwargs.get("output_file", "") output_path = kwargs.get("output_path", "./rocpd-output-data") region_categories = kwargs.get("region_categories", None) output_format = kwargs.get("format", "console") @@ -453,25 +434,27 @@ def generate_all_summaries(connection: RocpdImportData, **kwargs: Any) -> None: ), ) - config = ExportConfig( - output_format=output_format, - output_path=output_path, - filename=filename, - ) - summary_queries = {} - # Create and export the summary queries - summary_queries |= export_summary_queries(connection, config, by_rank) - summary_queries |= export_summary_region_queries( - connection, config, by_rank, region_categories=region_categories + # Create the summary queries + summary_queries |= create_summary_queries(connection, by_rank) + summary_queries |= create_summary_region_queries( + connection, by_rank, region_categories=region_categories ) if domain_summary: - export_domain_query(connection, config, summary_queries) - # Create and export domain summary per rank only if both domain_summary and summary_by_rank are enabled + summary_queries |= create_domain_query(connection, summary_queries) + # Create and domain summary per rank only if both domain_summary and summary_by_rank are enabled if by_rank: - export_domain_query(connection, config, summary_queries, by_rank=True) + summary_queries |= create_domain_query( + connection, summary_queries, by_rank=True + ) + + # Export all summary queries + for query_name, query in summary_queries.items(): + export_query( + connection, output_path, output_file, output_format, query_name, query + ) # From e5c8191e2551d7c48f4c385cd3cd4e58df0d684a Mon Sep 17 00:00:00 2001 From: Aleksei Tumakaev Date: Mon, 25 Aug 2025 12:49:16 +0000 Subject: [PATCH 6/7] Fix comments and variable names --- .../source/lib/python/rocpd/summary.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/summary.py b/projects/rocprofiler-sdk/source/lib/python/rocpd/summary.py index ba5786eaa57..80776d909fc 100644 --- a/projects/rocprofiler-sdk/source/lib/python/rocpd/summary.py +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/summary.py @@ -208,7 +208,7 @@ def generate_summary_query( def generate_domain_query( connection: RocpdImportData, summary_queries, by_rank=False ) -> Tuple[str, str]: - """Generate the SQL statement for domain summary by doing union over all summary views.""" + """Generate the SQL statement for domain summary by doing union over all summary queries.""" if by_rank: view_suffix = "_summary_by_rank" @@ -227,22 +227,22 @@ def generate_domain_query( join_condition = "CROSS JOIN total_duration TD" order_by = 'ORDER BY GD."DURATION (nsec)" DESC' - summary_views = { + summary_dictionary = { query_name: query for query_name, query in summary_queries.items() if query_name.endswith(view_suffix) } - if len(summary_views) < 1: + if len(summary_dictionary) < 1: return () summary_selects = [ - f"{query_name} AS ({query})," for query_name, query in summary_views.items() + f"{query_name} AS ({query}) ," for query_name, query in summary_dictionary.items() ] union_selects = [ f" SELECT '{query_name.replace(view_suffix, '').upper()}' as domain, * FROM {query_name} " - for query_name, query in summary_views.items() + for query_name, query in summary_dictionary.items() ] domain_select = f""" @@ -379,7 +379,7 @@ def create_summary_region_queries( return queries markers_query_name = "markers" - markers_query = f""" + markers_query = """ SELECT JSON_EXTRACT(extdata, '$.message') AS marker_name, * FROM regions_and_samples WHERE category LIKE 'MARKER_%' @@ -402,7 +402,7 @@ def create_summary_region_queries( def create_domain_query(connection: RocpdImportData, summary_queries, by_rank=False): - """Create a domain summary query by aggregating all summary views.""" + """Create a domain summary query by aggregating all summary queries.""" result = generate_domain_query(connection, summary_queries, by_rank=by_rank) if not result: @@ -414,7 +414,7 @@ def create_domain_query(connection: RocpdImportData, summary_queries, by_rank=Fa def generate_all_summaries(connection: RocpdImportData, **kwargs: Any) -> None: - """Generate all summary views and write them to CSV files.""" + """Generate all summaries and export them to selected format.""" domain_summary = kwargs.get("domain_summary", False) by_rank = kwargs.get("summary_by_rank", False) @@ -444,7 +444,7 @@ def generate_all_summaries(connection: RocpdImportData, **kwargs: Any) -> None: if domain_summary: summary_queries |= create_domain_query(connection, summary_queries) - # Create and domain summary per rank only if both domain_summary and summary_by_rank are enabled + # Create domain summary per rank only if both domain_summary and summary_by_rank are enabled if by_rank: summary_queries |= create_domain_query( connection, summary_queries, by_rank=True From 6de2e416fd86b7dfaf1008506161f30836bcb19b Mon Sep 17 00:00:00 2001 From: Aleksei Tumakaev Date: Tue, 26 Aug 2025 12:17:32 +0000 Subject: [PATCH 7/7] Change syntax for old python versions --- .../source/lib/python/rocpd/summary.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/summary.py b/projects/rocprofiler-sdk/source/lib/python/rocpd/summary.py index 80776d909fc..da36b2a058a 100644 --- a/projects/rocprofiler-sdk/source/lib/python/rocpd/summary.py +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/summary.py @@ -437,17 +437,19 @@ def generate_all_summaries(connection: RocpdImportData, **kwargs: Any) -> None: summary_queries = {} # Create the summary queries - summary_queries |= create_summary_queries(connection, by_rank) - summary_queries |= create_summary_region_queries( - connection, by_rank, region_categories=region_categories + summary_queries.update(create_summary_queries(connection, by_rank)) + summary_queries.update( + create_summary_region_queries( + connection, by_rank, region_categories=region_categories + ) ) if domain_summary: - summary_queries |= create_domain_query(connection, summary_queries) + summary_queries.update(create_domain_query(connection, summary_queries)) # Create domain summary per rank only if both domain_summary and summary_by_rank are enabled if by_rank: - summary_queries |= create_domain_query( - connection, summary_queries, by_rank=True + summary_queries.update( + create_domain_query(connection, summary_queries, by_rank=True) ) # Export all summary queries