Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/pull_request_template.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
## Version v0.6.
## Version v0.7.

## Description
Brief description of changes.
Expand Down
144 changes: 144 additions & 0 deletions deriva/adapters/graph/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -643,3 +643,147 @@ def clear_graph(self) -> None:
except Exception as e:
logger.error(f"Failed to clear graph: {e}")
raise

def clear_graph_for_repo(self, repo_name: str) -> int:
"""Clear all nodes and edges for a specific repository.

Deletes only Graph-namespace nodes where repository_name matches.
All node types (including Repository) get repository_name set
by add_node(), so a single filter covers everything.

Args:
repo_name: Repository name to clear

Returns:
Number of nodes deleted
"""
if self.db is None:
raise RuntimeError("Not connected to grafeo. Call connect() first.")

try:
ns = self.namespace
# Count first (grafeo can't RETURN after DETACH DELETE)
count_query = f"""
MATCH (n:`{ns}`)
WHERE n.repository_name = $repo_name
RETURN count(n) as cnt
"""
count_result = self.db.execute_read(count_query, {"repo_name": repo_name})
count = count_result[0]["cnt"] if count_result else 0

if count > 0:
delete_query = f"""
MATCH (n:`{ns}`)
WHERE n.repository_name = $repo_name
DETACH DELETE n
"""
self.db.execute_write(delete_query, {"repo_name": repo_name})

logger.info(
"Cleared %d nodes for repo '%s' from namespace '%s'",
count,
repo_name,
self.namespace,
)
return count

except Exception as e:
logger.error("Failed to clear graph for repo '%s': %s", repo_name, e)
raise

def has_extraction(self, repo_name: str) -> bool:
"""Check if extraction data exists for a repository.

Looks for a Repository node with the given repository_name
in the Graph namespace.

Args:
repo_name: Repository name to check

Returns:
True if extraction data exists
"""
if self.db is None:
raise RuntimeError("Not connected to grafeo. Call connect() first.")

try:
query = """
MATCH (n:Repository)
WHERE n.repository_name = $repo_name
RETURN count(n) as cnt
"""
result = self.db.execute_read(query, {"repo_name": repo_name})
return (result[0]["cnt"] > 0) if result else False

except Exception as e:
logger.error("Failed to check extraction for '%s': %s", repo_name, e)
return False

def get_extraction_fingerprint(self, repo_name: str) -> str | None:
"""Get the stored extraction fingerprint for a repository.

The fingerprint is a hash of extraction config versions and repo commit,
stored on the Repository node after extraction completes.

Args:
repo_name: Repository name

Returns:
Fingerprint hash string, or None if not set
"""
if self.db is None:
raise RuntimeError("Not connected to grafeo. Call connect() first.")

try:
query = """
MATCH (n:Repository)
WHERE n.repository_name = $repo_name
RETURN n.extraction_fingerprint as fingerprint
"""
result = self.db.execute_read(query, {"repo_name": repo_name})
if result and result[0].get("fingerprint"):
return result[0]["fingerprint"]
return None

except Exception as e:
logger.error("Failed to get fingerprint for '%s': %s", repo_name, e)
return None

def set_extraction_fingerprint(self, repo_name: str, fingerprint: str) -> bool:
"""Store an extraction fingerprint on the Repository node.

Called after extraction completes to record what configs and
repo state produced the current graph data.

Args:
repo_name: Repository name
fingerprint: Hash string to store

Returns:
True if updated, False if Repository node not found
"""
if self.db is None:
raise RuntimeError("Not connected to grafeo. Call connect() first.")

try:
query = """
MATCH (n:Repository)
WHERE n.repository_name = $repo_name
SET n.extraction_fingerprint = $fingerprint
RETURN n.id as id
"""
result = self.db.execute_write(
query, {"repo_name": repo_name, "fingerprint": fingerprint}
)
if result:
logger.info(
"Set extraction fingerprint for '%s': %s...",
repo_name,
fingerprint[:12],
)
return True
return False

except Exception as e:
logger.error("Failed to set fingerprint for '%s': %s", repo_name, e)
raise
2 changes: 1 addition & 1 deletion deriva/adapters/repository/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ class FileNode:

def to_dict(self) -> dict[str, Any]:
"""Convert to dictionary recursively."""
result = {
result: dict[str, Any] = {
"name": self.name,
"path": self.path,
"type": self.type,
Expand Down
187 changes: 187 additions & 0 deletions deriva/app/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,96 @@ def _(clear_graph_btn, get_graph_refresh, mo, session, set_graph_refresh):
return


@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
## Extraction Graph Visualization
""")
return


@app.cell
def _(get_graph_refresh, mo, session):
from anywidget_graph import Graph

_ = get_graph_refresh()

_node_types = ["Repository", "Directory", "File", "BusinessConcept", "Technology", "TypeDefinition", "Method", "Test", "ExternalDependency"]
_type_colors = {
"Repository": "#e74c3c",
"Directory": "#f39c12",
"File": "#3498db",
"BusinessConcept": "#9b59b6",
"Technology": "#1abc9c",
"TypeDefinition": "#2ecc71",
"Method": "#e67e22",
"Test": "#95a5a6",
"ExternalDependency": "#34495e",
}

_nodes = []
_node_ids = set()
for _nt in _node_types:
for _n in session.get_graph_nodes(_nt):
if _n["id"] and _n["id"] not in _node_ids:
_node_ids.add(_n["id"])
_nid = _n["id"] or ""
_nodes.append(
{
"id": _nid,
"label": _n["label"] or _nid.split("::")[-1] or _nt,
"group": _nt,
"color": _type_colors.get(_nt, "#95a5a6"),
}
)

# Get edges via Cypher (deduplicated)
_edges = []
_seen_edges = set()
if _node_ids:
_edge_results = session.query_graph("MATCH (src)-[r]->(dst) RETURN src.id as source, type(r) as label, dst.id as target")
for _e in _edge_results:
_src = _e["source"]
_tgt = _e["target"]
if _src is None or _tgt is None:
continue
if _src not in _node_ids or _tgt not in _node_ids:
continue
_key = (_src, _tgt, _e.get("label", ""))
if _key not in _seen_edges:
_seen_edges.add(_key)
_edges.append(
{
"source": _e["source"],
"target": _e["target"],
"label": _e.get("label", ""),
}
)

if _nodes:
extraction_graph = Graph.from_dict(
{"nodes": _nodes, "edges": _edges},
width=800,
height=500,
show_labels=True,
show_edge_labels=False,
dark_mode=True,
layout="force",
)
_output = mo.vstack(
[
mo.md(f"**Nodes:** {len(_nodes)} | **Edges:** {len(_edges)}"),
extraction_graph,
]
)
else:
extraction_graph = None
_output = mo.md("_Run extraction to see the graph_")

_output
return (extraction_graph,)


@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
Expand Down Expand Up @@ -590,6 +680,103 @@ def _(clear_model_btn, get_model_refresh, mo, session, set_model_refresh):
return


@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
## Model Graph Visualization
""")
return


@app.cell
def _(get_model_refresh, mo, session):
from anywidget_graph import Graph as ModelGraph

_ = get_model_refresh()

_layer_colors = {
"ApplicationComponent": "#3498db",
"ApplicationInterface": "#2980b9",
"ApplicationService": "#1f6dad",
"DataObject": "#5dade2",
"BusinessActor": "#e74c3c",
"BusinessProcess": "#c0392b",
"BusinessFunction": "#e67e22",
"BusinessEvent": "#f39c12",
"BusinessObject": "#d35400",
"Node": "#1abc9c",
"Device": "#16a085",
"SystemSoftware": "#2ecc71",
"TechnologyService": "#27ae60",
}

_elements = session.get_archimate_elements()
_relationships = session.get_archimate_relationships()

_nodes = []
_node_ids = set()
for _el in _elements:
_etype = _el.get("element_type", "")
_eid = _el.get("identifier", "")
if _eid and _eid not in _node_ids:
_node_ids.add(_eid)
# Determine layer for grouping
if _etype.startswith("Business"):
_layer = "Business"
elif _etype.startswith("Application") or _etype == "DataObject":
_layer = "Application"
else:
_layer = "Technology"
_nodes.append(
{
"id": _eid,
"label": _el.get("name", _eid),
"group": _layer,
"color": _layer_colors.get(_etype, "#95a5a6"),
}
)

_edges = []
_seen_edges = set()
for _rel in _relationships:
_src = _rel.get("source", "")
_tgt = _rel.get("target", "")
_rtype = _rel.get("relationship_type", "")
_key = (_src, _tgt, _rtype)
if _src in _node_ids and _tgt in _node_ids and _key not in _seen_edges:
_seen_edges.add(_key)
_edges.append(
{
"source": _src,
"target": _tgt,
"label": _rtype,
}
)

if _nodes:
model_graph = ModelGraph.from_dict(
{"nodes": _nodes, "edges": _edges},
width=800,
height=500,
show_labels=True,
show_edge_labels=True,
dark_mode=True,
layout="force",
)
_output = mo.vstack(
[
mo.md(f"**Elements:** {len(_nodes)} | **Relationships:** {len(_edges)}"),
model_graph,
]
)
else:
model_graph = None
_output = mo.md("_Run derivation to see the model graph_")

_output
return (model_graph,)


@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
Expand Down
6 changes: 5 additions & 1 deletion deriva/cli/commands/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,11 @@ def run_stage(
else:
derivation_configs = session.get_derivation_configs()
for cfg in derivation_configs:
name = cfg.get("step_name", cfg.get("name", ""))
name = (
cfg.get("element_type", "")
or cfg.get("step_name", "")
or cfg.get("name", "")
)
if name == only_step:
session.enable_step("derivation", name)
else:
Expand Down
Loading
Loading