Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion lib/trinity_coordinator/runtime.ex
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,15 @@ defmodule TrinityCoordinator.Runtime do
@doc """
Returns a compact backend label for a tensor.
"""
def tensor_backend(%Nx.Tensor{} = tensor) do
def tensor_backend(%Nx.Tensor{data: %backend_struct{}} = tensor) do
inspected = inspect(tensor)

cond do
String.contains?(inspected, "EXLA.Backend<cuda") -> "EXLA.Backend<cuda:"
String.contains?(inspected, "EXLA.Backend<host") -> "EXLA.Backend<host:"
String.contains?(inspected, "EXLA.Backend<") -> "EXLA.Backend"
backend_struct == Emily.Backend -> "Emily.Backend"
backend_struct == Nx.BinaryBackend -> "Nx.BinaryBackend"
String.contains?(inspected, "Nx.BinaryBackend") -> "Nx.BinaryBackend"
true -> "unknown"
end
Comment on lines +45 to 56

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nshkrdotcom what's the intention behind this selection?
I think it's a bit brittle in that any backend that you want to support ends up having to be explicitly supported.

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@polvalente You're right that the cond was brittle, and that brittleness goes deeper than the one site you flagged. The same shape was duplicated in three near-identical private backend_from_label/1 clauses in Sakana.{Artifact, Head, PythonImporter}, and each silently fell back to Nx.BinaryBackend for any label the cond didn't enumerate. Once tensor_backend/1 started producing generic labels for backends like "EMLX.Backend" or "Emily.Backend" (which it now does in 21c3088), the silent coerce-to-BinaryBackend would have been a correctness hazard in the alignment / transfer call sites, not cosmetics.

Landed on main:

  • lib/trinity_coordinator/runtime.extensor_backend/1 now binds %Nx.Tensor{data: %backend_struct{}} and the default returns backend_struct |> Module.split() |> Enum.join("."). EXLA's <cuda:N> / <host:N> device-info prefixes are preserved on the inspect-based path (they encode device identity into the inspect string, so the inspect form is still the right tool there). Added in 21c3088, hardened in the same commit.
  • New lib/trinity_coordinator/runtime/backend_label.ex with from_label/1 ({:ok, backend_spec} | {:error, {:unknown_backend_label, label}}) and from_label!/1 (logs Logger.warning and falls back to Nx.BinaryBackend for unknown labels — preserves the prior behaviour but makes it audible instead of silent). The three Sakana modules now call the helper; their private cond chains are gone.
  • Phase 2 test coverage in test/trinity_coordinator/runtime_backend_label_test.exs pins the generic-default contract using synthesised fixture backend modules so it runs without a CUDA host. Phase 3 coverage in test/trinity_coordinator/runtime/backend_label_test.exs pins the EMLX label round-trip (the lane that would have been silently broken under the old fallback) plus the Logger.warning on unknown labels.

The same generalization is what unblocked landing a first-class :emily profile (93cbcae): the four-line def resolve(:emily) clause mirrors :emlx plus ships an empirical margin override, and accepts_backend_label?/2 accepts "Emily.Backend" for free via the generic prefix path you suggested. No per-backend code edits needed.

Thanks for the catch.

Expand Down
25 changes: 24 additions & 1 deletion lib/trinity_coordinator/runtime_profile.ex
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,29 @@ defmodule TrinityCoordinator.RuntimeProfile do
}
end

# Validation-pass-only profile (Emily 0.4.0). Mirrors the :emlx lane's
# intent (Apple Silicon, no CUDA, full Qwen runtime, exporter on) but
# routes through Emily.Backend instead of EMLX.Backend.
# Coordinator.load/1 applies Emily.Bumblebee.FastKernels rewrites to
# model_info.model when this profile is selected; on any other backend
# the rewritten layers fall through their composed-defn fallbacks.
def resolve(:emily) do
%__MODULE__{
name: :emily,
nx_backend: Emily.Backend,
require_cuda?: false,
qwen_runtime?: true,
export_svd?: true,
large_svd?: false,
artifact_runtime?: true,
default_slm_profile: :qwen_coordinator,
notes: [
"Apple Silicon profile via Emily 0.4.0. Validation-only; the canonical ",
"Apple lane is :emlx. See guides/runtime_profiles.md."
]
}
end

def resolve({:custom, backend, opts}) when is_atom(backend) and is_list(opts) do
%__MODULE__{
name: :custom,
Expand All @@ -155,7 +178,7 @@ defmodule TrinityCoordinator.RuntimeProfile do
Returns the list of built-in profile names.
"""
@spec builtin_names() :: [atom()]
def builtin_names, do: [:cuda_exla, :host_exla, :binary, :mock_tiny, :emlx]
def builtin_names, do: [:cuda_exla, :host_exla, :binary, :mock_tiny, :emlx, :emily]

@doc """
Sets the current process default Nx backend to the profile's backend.
Expand Down
20 changes: 20 additions & 0 deletions lib/trinity_coordinator/sakana/coordinator.ex
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ defmodule TrinityCoordinator.Sakana.Coordinator do
|> Map.update!(:load_options, fn lo -> Keyword.put(lo, :backend, backend) end)

with {:ok, {model_info, tokenizer}} <- SLMProfile.load_profile(slm_profile),
{:ok, model_info} <- maybe_apply_fast_kernels(model_info, profile),
{:ok, manifest} <- Artifact.load_manifest(opts[:artifact_dir]),
head_weights <- Artifact.load_router_head!(opts[:artifact_dir], manifest: manifest),
{:ok, head_state} <-
Expand Down Expand Up @@ -114,6 +115,25 @@ defmodule TrinityCoordinator.Sakana.Coordinator do
{:error, {:coordinator_load_error, Exception.message(e)}}
end

# Applies Emily.Bumblebee.FastKernels rewrites to the Bumblebee model
# when the :emily profile is selected. The rewrite swaps RMSNorm /
# LayerNorm / RoPE / SDPA Axon layers for `Emily.Fast.*` calls that
# dispatch to fused `mx::fast::*` kernels under Emily.Backend (and
# fall through to composed-defn equivalents on any other backend).
defp maybe_apply_fast_kernels(model_info, %RuntimeProfile{name: :emily}) do
cond do
not Code.ensure_loaded?(Emily.Bumblebee.FastKernels) ->
{:error,
{:emily_fast_kernels_unavailable,
"Emily.Bumblebee.FastKernels not loaded; ensure :emily, :axon, and :bumblebee are all in the dep tree."}}

true ->
{:ok, update_in(model_info.model, &Emily.Bumblebee.FastKernels.apply/1)}
end
end

defp maybe_apply_fast_kernels(model_info, _profile), do: {:ok, model_info}

@doc """
Extracts a vector with the adapted model and routes it through the artifact head.
"""
Expand Down
5 changes: 5 additions & 0 deletions mix.exs
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,11 @@ defmodule TrinityCoordinator.MixProject do
# application's deps; the :emlx runtime profile then resolves to
# the EMLX.Backend at runtime via Code.ensure_loaded?/1. See
# guides/runtime_profiles.md.
# VALIDATION ONLY: Emily 0.4.0 pulled in via the feature branch so
# this BEAM can run the export + prompt eval on Apple Silicon.
# Same optional/load-only pattern as EMLX above; do NOT carry this
# line back to main.
{:emily, "~> 0.4", only: [:dev, :test]},
DependencySources.dep(:inference, __DIR__),
DependencySources.dep(:agent_session_manager, __DIR__),
DependencySources.dep(:gemini_cli_sdk, __DIR__),
Expand Down
10 changes: 10 additions & 0 deletions mix.lock
Original file line number Diff line number Diff line change
@@ -1,24 +1,34 @@
%{
"agent_session_manager": {:git, "https://github.com/nshkrdotcom/agent_session_manager.git", "f25dcb540b7c56872d8f5bd07bbf4458c0282e0d", [branch: "main"]},
"axon": {:hex, :axon, "0.8.1", "c4a975e62a14ab6c374997b77367ec3a4c2740952ac474d3b0f202c91b7f75c4", [:mix], [{:kino, "~> 0.7", [hex: :kino, repo: "hexpm", optional: true]}, {:kino_vega_lite, "~> 0.1.7", [hex: :kino_vega_lite, repo: "hexpm", optional: true]}, {:nx, "~> 0.10", [hex: :nx, repo: "hexpm", optional: false]}, {:polaris, "~> 0.1", [hex: :polaris, repo: "hexpm", optional: false]}, {:table_rex, "~> 3.1 or ~> 4.1", [hex: :table_rex, repo: "hexpm", optional: true]}], "hexpm", "682a3517489300507ac9345f28341e7fa95bc5b4960d645816074ce551795d37"},
"boundary": {:hex, :boundary, "0.10.4", "5fec5d2736c12f9bfe1720c3a2bd8c48c3547c24d6002ebf8e087570afd5bd2f", [:mix], [], "hexpm", "8baf6f23987afdb1483033ed0bde75c9c703613c22ed58d5f23bf948f203247c"},
"bumblebee": {:git, "https://github.com/elixir-nx/bumblebee.git", "d0774e8ab8c4d5ac60ade95ec8dc9e1f0efd7306", [ref: "d0774e8ab8c4d5ac60ade95ec8dc9e1f0efd7306"]},
"bunt": {:hex, :bunt, "1.0.0", "081c2c665f086849e6d57900292b3a161727ab40431219529f13c4ddcf3e7a44", [:mix], [], "hexpm", "dc5f86aa08a5f6fa6b8096f0735c4e76d54ae5c9fa2c143e5a1fc7c1cd9bb6b5"},
"castore": {:hex, :castore, "1.0.19", "6903cabdfd9d1af46454126e7c8385186659dd33ecfb74a885cae52221ad6109", [:mix], [], "hexpm", "3669e6cab13f54c2df26b3e6833745d647f35b6e30d8ddd5975df0d5c842ca98"},
"cli_subprocess_core": {:git, "https://github.com/nshkrdotcom/cli_subprocess_core.git", "4fd7c330353ba86cddc82c2316f7f53a79879e4f", [branch: "main"]},
"complex": {:hex, :complex, "0.7.0", "695632ef9487517aa5d57edd1697801079d622414cb2e1a7cf538b1f9a50f205", [:mix], [], "hexpm", "0ee39c0803129f546e7f3f640da8f021c9e659402bf59da6f7f2c4848f068f8d"},
"credo": {:hex, :credo, "1.7.18", "5c5596bf7aedf9c8c227f13272ac499fe8eae6237bd326f2f07dfc173786f042", [:mix], [{:bunt, "~> 0.2.1 or ~> 1.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:file_system, "~> 0.2 or ~> 1.0", [hex: :file_system, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "a189d164685fd945809e862fe76a7420c4398fa288d76257662aecb909d6b3e5"},
"decimal": {:hex, :decimal, "2.4.1", "6c0fbede12fb122ba685e9ab41c6a40c129e322b3aa192f9e072e61f3a6ffaf2", [:mix], [], "hexpm", "7e618897933a8455f19a727d7c5e50a2c071a544b700e5e724298ecb4340187f"},
"dialyxir": {:hex, :dialyxir, "1.4.7", "dda948fcee52962e4b6c5b4b16b2d8fa7d50d8645bbae8b8685c3f9ecb7f5f4d", [:mix], [{:erlex, ">= 0.2.8", [hex: :erlex, repo: "hexpm", optional: false]}], "hexpm", "b34527202e6eb8cee198efec110996c25c5898f43a4094df157f8d28f27d9efe"},
"earmark_parser": {:hex, :earmark_parser, "1.4.44", "f20830dd6b5c77afe2b063777ddbbff09f9759396500cdbe7523efd58d7a339c", [:mix], [], "hexpm", "4778ac752b4701a5599215f7030989c989ffdc4f6df457c5f36938cc2d2a2750"},
"elixir_make": {:hex, :elixir_make, "0.9.0", "6484b3cd8c0cee58f09f05ecaf1a140a8c97670671a6a0e7ab4dc326c3109726", [:mix], [], "hexpm", "db23d4fd8b757462ad02f8aa73431a426fe6671c80b200d9710caf3d1dd0ffdb"},
"emily": {:hex, :emily, "0.4.0", "675169697f68e46c8effdd8840e3d57b8bebb9012924e4228d8709caa0cca880", [:mix], [{:axon, "~> 0.8", [hex: :axon, repo: "hexpm", optional: true]}, {:bumblebee, "~> 0.7", [hex: :bumblebee, repo: "hexpm", optional: true]}, {:elixir_make, "~> 0.9", [hex: :elixir_make, repo: "hexpm", optional: false]}, {:fine, "~> 0.1", [hex: :fine, repo: "hexpm", optional: false]}, {:nx, "~> 0.12", [hex: :nx, repo: "hexpm", optional: false]}, {:tokenizers, "~> 0.5", [hex: :tokenizers, repo: "hexpm", optional: true]}], "hexpm", "b0a645c979e3a79a9b5b1d17c4327bb2e80fef15e63b5c74625d92bb0918132c"},
"erlex": {:hex, :erlex, "0.2.9", "7debbbaa9f4f368b8cd648983e0f1d7963028508e9c59e9d4ed504e94ef52a55", [:mix], [], "hexpm", "8cfffc0ec7159e6d73de2ab28a588064de80f88b2798d5cbe4482cbbc200178b"},
"erlexec": {:hex, :erlexec, "2.3.0", "90dff0f8974c71e85325c277d81855a7ac2882ce43ce4110d227d1e91c4f1b37", [:rebar3], [], "hexpm", "2cf7dcb67c91dce78c31efdf08de8bd103dd7c8a297d20db1b76dfc00b5a5a24"},
"ex_doc": {:hex, :ex_doc, "0.40.3", "4a972ffe64bc07dc605af487e98fc19b72a4185f55ca031b94c0552d6071c1d9", [:mix], [{:earmark_parser, "~> 1.4.44", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.0", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14 or ~> 1.0", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1 or ~> 1.0", [hex: :makeup_erlang, repo: "hexpm", optional: false]}, {:makeup_html, ">= 0.1.0", [hex: :makeup_html, repo: "hexpm", optional: true]}], "hexpm", "2756e357742fecd9749b489b85d67c9ce99c465f2e75728d9e6dc8d704b973de"},
"execution_plane": {:git, "https://github.com/nshkrdotcom/execution_plane.git", "c2d747eef776d7d0647175742dd86147293c8b0e", [branch: "main", subdir: "core/execution_plane"]},
"execution_plane_jsonrpc": {:git, "https://github.com/nshkrdotcom/execution_plane.git", "c2d747eef776d7d0647175742dd86147293c8b0e", [branch: "main", subdir: "protocols/execution_plane_jsonrpc"]},
"execution_plane_process": {:git, "https://github.com/nshkrdotcom/execution_plane.git", "c2d747eef776d7d0647175742dd86147293c8b0e", [branch: "main", subdir: "runtimes/execution_plane_process"]},
"exla": {:git, "https://github.com/elixir-nx/nx.git", "6424c8902380380cd7a8c282b0557d653aead018", [sparse: "exla", ref: "6424c8902380380cd7a8c282b0557d653aead018"]},
"file_system": {:hex, :file_system, "1.1.1", "31864f4685b0148f25bd3fbef2b1228457c0c89024ad67f7a81a3ffbc0bbad3a", [:mix], [], "hexpm", "7a15ff97dfe526aeefb090a7a9d3d03aa907e100e262a0f8f7746b78f8f87a5d"},
"finch": {:hex, :finch, "0.22.0", "5c48fa6f9706a78eb9036cacb67b8b996b4e66d111c543f4c29bb0f879a6806b", [:mix], [{:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.8", [hex: :mint, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.4 or ~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 1.1", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "b94e83c47780fc6813f746a1f1a34ee65cda42da4c5ea26a68f0acc4498e23dc"},
"fine": {:hex, :fine, "0.1.6", "4bf7151493443c454aac9f2fa2f34f5fefd0346a83fb5586a016c4a135c63247", [:mix], [], "hexpm", "5638eb4495488e885ebec167fa57973e5c35e1a50c344eb7666c90ec1c4e3b12"},
"gemini_cli_sdk": {:git, "https://github.com/nshkrdotcom/gemini_cli_sdk.git", "1a919803479296673155aa8c006ac259a6032f95", [branch: "main"]},
"ground_plane_contracts": {:git, "https://github.com/nshkrdotcom/ground_plane.git", "aab153c1b48b06281d5f13487cdcb3dac39e5cb8", [branch: "main", subdir: "core/ground_plane_contracts"]},
"ground_plane_persistence_policy": {:git, "https://github.com/nshkrdotcom/ground_plane.git", "aab153c1b48b06281d5f13487cdcb3dac39e5cb8", [branch: "main", subdir: "core/persistence_policy"]},
"hf_hub": {:hex, :hf_hub, "0.2.0", "12ccd182cd28f46382f3e2254d71ca770e5c3f77f26e2f85ff31393918eb2977", [:mix], [{:explorer, "~> 0.10", [hex: :explorer, repo: "hexpm", optional: true]}, {:jason, "~> 1.4", [hex: :jason, repo: "hexpm", optional: false]}, {:req, "~> 0.5", [hex: :req, repo: "hexpm", optional: false]}, {:yaml_elixir, "~> 2.9", [hex: :yaml_elixir, repo: "hexpm", optional: false]}], "hexpm", "b815c22f70f2ee31ba7da3c8a13c7ed43d058662a650c2eed56b0603a34fb7d8"},
"hpax": {:hex, :hpax, "1.0.3", "ed67ef51ad4df91e75cc6a1494f851850c0bd98ebc0be6e81b026e765ee535aa", [:mix], [], "hexpm", "8eab6e1cfa8d5918c2ce4ba43588e894af35dbd8e91e6e55c817bca5847df34a"},
"inference": {:git, "https://github.com/nshkrdotcom/inference.git", "6e992f43780501bf3df08dbce0192f427c4f0559", [branch: "main", subdir: "apps/inference"]},
"jason": {:hex, :jason, "1.4.5", "2e3a008590b0b8d7388c20293e9dcc9cf3e5d642fd2a114e4cbbb52e595d940a", [:mix], [{:decimal, "~> 1.0 or ~> 2.0 or ~> 3.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "b0c823996102bcd0239b3c2444eb00409b72f6a140c1950bc8b457d836b30684"},
"makeup": {:hex, :makeup, "1.2.1", "e90ac1c65589ef354378def3ba19d401e739ee7ee06fb47f94c687016e3713d1", [:mix], [{:nimble_parsec, "~> 1.4", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "d36484867b0bae0fea568d10131197a4c2e47056a6fbe84922bf6ba71c8d17ce"},
"makeup_elixir": {:hex, :makeup_elixir, "1.0.1", "e928a4f984e795e41e3abd27bfc09f51db16ab8ba1aebdba2b3a575437efafc2", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "7284900d412a3e5cfd97fdaed4f5ed389b8f2b4cb49efc0eb3bd10e2febf9507"},
Expand Down