diff --git a/lib/trinity_coordinator/runtime.ex b/lib/trinity_coordinator/runtime.ex index 72b2911..a780ecf 100644 --- a/lib/trinity_coordinator/runtime.ex +++ b/lib/trinity_coordinator/runtime.ex @@ -42,13 +42,15 @@ defmodule TrinityCoordinator.Runtime do @doc """ Returns a compact backend label for a tensor. """ - def tensor_backend(%Nx.Tensor{} = tensor) do + def tensor_backend(%Nx.Tensor{data: %backend_struct{}} = tensor) do inspected = inspect(tensor) cond do String.contains?(inspected, "EXLA.Backend "EXLA.Backend "EXLA.Backend "EXLA.Backend" + backend_struct == Emily.Backend -> "Emily.Backend" + backend_struct == Nx.BinaryBackend -> "Nx.BinaryBackend" String.contains?(inspected, "Nx.BinaryBackend") -> "Nx.BinaryBackend" true -> "unknown" end diff --git a/lib/trinity_coordinator/runtime_profile.ex b/lib/trinity_coordinator/runtime_profile.ex index 65e1676..8c001d9 100644 --- a/lib/trinity_coordinator/runtime_profile.ex +++ b/lib/trinity_coordinator/runtime_profile.ex @@ -136,6 +136,29 @@ defmodule TrinityCoordinator.RuntimeProfile do } end + # Validation-pass-only profile (Emily 0.4.0). Mirrors the :emlx lane's + # intent (Apple Silicon, no CUDA, full Qwen runtime, exporter on) but + # routes through Emily.Backend instead of EMLX.Backend. + # Coordinator.load/1 applies Emily.Bumblebee.FastKernels rewrites to + # model_info.model when this profile is selected; on any other backend + # the rewritten layers fall through their composed-defn fallbacks. + def resolve(:emily) do + %__MODULE__{ + name: :emily, + nx_backend: Emily.Backend, + require_cuda?: false, + qwen_runtime?: true, + export_svd?: true, + large_svd?: false, + artifact_runtime?: true, + default_slm_profile: :qwen_coordinator, + notes: [ + "Apple Silicon profile via Emily 0.4.0. Validation-only; the canonical ", + "Apple lane is :emlx. See guides/runtime_profiles.md." + ] + } + end + def resolve({:custom, backend, opts}) when is_atom(backend) and is_list(opts) do %__MODULE__{ name: :custom, @@ -155,7 +178,7 @@ defmodule TrinityCoordinator.RuntimeProfile do Returns the list of built-in profile names. """ @spec builtin_names() :: [atom()] - def builtin_names, do: [:cuda_exla, :host_exla, :binary, :mock_tiny, :emlx] + def builtin_names, do: [:cuda_exla, :host_exla, :binary, :mock_tiny, :emlx, :emily] @doc """ Sets the current process default Nx backend to the profile's backend. diff --git a/lib/trinity_coordinator/sakana/coordinator.ex b/lib/trinity_coordinator/sakana/coordinator.ex index 2906dd0..0aedd49 100644 --- a/lib/trinity_coordinator/sakana/coordinator.ex +++ b/lib/trinity_coordinator/sakana/coordinator.ex @@ -83,6 +83,7 @@ defmodule TrinityCoordinator.Sakana.Coordinator do |> Map.update!(:load_options, fn lo -> Keyword.put(lo, :backend, backend) end) with {:ok, {model_info, tokenizer}} <- SLMProfile.load_profile(slm_profile), + {:ok, model_info} <- maybe_apply_fast_kernels(model_info, profile), {:ok, manifest} <- Artifact.load_manifest(opts[:artifact_dir]), head_weights <- Artifact.load_router_head!(opts[:artifact_dir], manifest: manifest), {:ok, head_state} <- @@ -114,6 +115,25 @@ defmodule TrinityCoordinator.Sakana.Coordinator do {:error, {:coordinator_load_error, Exception.message(e)}} end + # Applies Emily.Bumblebee.FastKernels rewrites to the Bumblebee model + # when the :emily profile is selected. The rewrite swaps RMSNorm / + # LayerNorm / RoPE / SDPA Axon layers for `Emily.Fast.*` calls that + # dispatch to fused `mx::fast::*` kernels under Emily.Backend (and + # fall through to composed-defn equivalents on any other backend). + defp maybe_apply_fast_kernels(model_info, %RuntimeProfile{name: :emily}) do + cond do + not Code.ensure_loaded?(Emily.Bumblebee.FastKernels) -> + {:error, + {:emily_fast_kernels_unavailable, + "Emily.Bumblebee.FastKernels not loaded; ensure :emily, :axon, and :bumblebee are all in the dep tree."}} + + true -> + {:ok, update_in(model_info.model, &Emily.Bumblebee.FastKernels.apply/1)} + end + end + + defp maybe_apply_fast_kernels(model_info, _profile), do: {:ok, model_info} + @doc """ Extracts a vector with the adapted model and routes it through the artifact head. """ diff --git a/mix.exs b/mix.exs index 6398c58..cede3dd 100644 --- a/mix.exs +++ b/mix.exs @@ -114,6 +114,11 @@ defmodule TrinityCoordinator.MixProject do # application's deps; the :emlx runtime profile then resolves to # the EMLX.Backend at runtime via Code.ensure_loaded?/1. See # guides/runtime_profiles.md. + # VALIDATION ONLY: Emily 0.4.0 pulled in via the feature branch so + # this BEAM can run the export + prompt eval on Apple Silicon. + # Same optional/load-only pattern as EMLX above; do NOT carry this + # line back to main. + {:emily, "~> 0.4", only: [:dev, :test]}, DependencySources.dep(:inference, __DIR__), DependencySources.dep(:agent_session_manager, __DIR__), DependencySources.dep(:gemini_cli_sdk, __DIR__), diff --git a/mix.lock b/mix.lock index ca9b46e..0c7cbaf 100644 --- a/mix.lock +++ b/mix.lock @@ -1,24 +1,34 @@ %{ + "agent_session_manager": {:git, "https://github.com/nshkrdotcom/agent_session_manager.git", "f25dcb540b7c56872d8f5bd07bbf4458c0282e0d", [branch: "main"]}, "axon": {:hex, :axon, "0.8.1", "c4a975e62a14ab6c374997b77367ec3a4c2740952ac474d3b0f202c91b7f75c4", [:mix], [{:kino, "~> 0.7", [hex: :kino, repo: "hexpm", optional: true]}, {:kino_vega_lite, "~> 0.1.7", [hex: :kino_vega_lite, repo: "hexpm", optional: true]}, {:nx, "~> 0.10", [hex: :nx, repo: "hexpm", optional: false]}, {:polaris, "~> 0.1", [hex: :polaris, repo: "hexpm", optional: false]}, {:table_rex, "~> 3.1 or ~> 4.1", [hex: :table_rex, repo: "hexpm", optional: true]}], "hexpm", "682a3517489300507ac9345f28341e7fa95bc5b4960d645816074ce551795d37"}, "boundary": {:hex, :boundary, "0.10.4", "5fec5d2736c12f9bfe1720c3a2bd8c48c3547c24d6002ebf8e087570afd5bd2f", [:mix], [], "hexpm", "8baf6f23987afdb1483033ed0bde75c9c703613c22ed58d5f23bf948f203247c"}, "bumblebee": {:git, "https://github.com/elixir-nx/bumblebee.git", "d0774e8ab8c4d5ac60ade95ec8dc9e1f0efd7306", [ref: "d0774e8ab8c4d5ac60ade95ec8dc9e1f0efd7306"]}, "bunt": {:hex, :bunt, "1.0.0", "081c2c665f086849e6d57900292b3a161727ab40431219529f13c4ddcf3e7a44", [:mix], [], "hexpm", "dc5f86aa08a5f6fa6b8096f0735c4e76d54ae5c9fa2c143e5a1fc7c1cd9bb6b5"}, "castore": {:hex, :castore, "1.0.19", "6903cabdfd9d1af46454126e7c8385186659dd33ecfb74a885cae52221ad6109", [:mix], [], "hexpm", "3669e6cab13f54c2df26b3e6833745d647f35b6e30d8ddd5975df0d5c842ca98"}, + "cli_subprocess_core": {:git, "https://github.com/nshkrdotcom/cli_subprocess_core.git", "4fd7c330353ba86cddc82c2316f7f53a79879e4f", [branch: "main"]}, "complex": {:hex, :complex, "0.7.0", "695632ef9487517aa5d57edd1697801079d622414cb2e1a7cf538b1f9a50f205", [:mix], [], "hexpm", "0ee39c0803129f546e7f3f640da8f021c9e659402bf59da6f7f2c4848f068f8d"}, "credo": {:hex, :credo, "1.7.18", "5c5596bf7aedf9c8c227f13272ac499fe8eae6237bd326f2f07dfc173786f042", [:mix], [{:bunt, "~> 0.2.1 or ~> 1.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:file_system, "~> 0.2 or ~> 1.0", [hex: :file_system, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "a189d164685fd945809e862fe76a7420c4398fa288d76257662aecb909d6b3e5"}, "decimal": {:hex, :decimal, "2.4.1", "6c0fbede12fb122ba685e9ab41c6a40c129e322b3aa192f9e072e61f3a6ffaf2", [:mix], [], "hexpm", "7e618897933a8455f19a727d7c5e50a2c071a544b700e5e724298ecb4340187f"}, "dialyxir": {:hex, :dialyxir, "1.4.7", "dda948fcee52962e4b6c5b4b16b2d8fa7d50d8645bbae8b8685c3f9ecb7f5f4d", [:mix], [{:erlex, ">= 0.2.8", [hex: :erlex, repo: "hexpm", optional: false]}], "hexpm", "b34527202e6eb8cee198efec110996c25c5898f43a4094df157f8d28f27d9efe"}, "earmark_parser": {:hex, :earmark_parser, "1.4.44", "f20830dd6b5c77afe2b063777ddbbff09f9759396500cdbe7523efd58d7a339c", [:mix], [], "hexpm", "4778ac752b4701a5599215f7030989c989ffdc4f6df457c5f36938cc2d2a2750"}, "elixir_make": {:hex, :elixir_make, "0.9.0", "6484b3cd8c0cee58f09f05ecaf1a140a8c97670671a6a0e7ab4dc326c3109726", [:mix], [], "hexpm", "db23d4fd8b757462ad02f8aa73431a426fe6671c80b200d9710caf3d1dd0ffdb"}, + "emily": {:hex, :emily, "0.4.0", "675169697f68e46c8effdd8840e3d57b8bebb9012924e4228d8709caa0cca880", [:mix], [{:axon, "~> 0.8", [hex: :axon, repo: "hexpm", optional: true]}, {:bumblebee, "~> 0.7", [hex: :bumblebee, repo: "hexpm", optional: true]}, {:elixir_make, "~> 0.9", [hex: :elixir_make, repo: "hexpm", optional: false]}, {:fine, "~> 0.1", [hex: :fine, repo: "hexpm", optional: false]}, {:nx, "~> 0.12", [hex: :nx, repo: "hexpm", optional: false]}, {:tokenizers, "~> 0.5", [hex: :tokenizers, repo: "hexpm", optional: true]}], "hexpm", "b0a645c979e3a79a9b5b1d17c4327bb2e80fef15e63b5c74625d92bb0918132c"}, "erlex": {:hex, :erlex, "0.2.9", "7debbbaa9f4f368b8cd648983e0f1d7963028508e9c59e9d4ed504e94ef52a55", [:mix], [], "hexpm", "8cfffc0ec7159e6d73de2ab28a588064de80f88b2798d5cbe4482cbbc200178b"}, "erlexec": {:hex, :erlexec, "2.3.0", "90dff0f8974c71e85325c277d81855a7ac2882ce43ce4110d227d1e91c4f1b37", [:rebar3], [], "hexpm", "2cf7dcb67c91dce78c31efdf08de8bd103dd7c8a297d20db1b76dfc00b5a5a24"}, "ex_doc": {:hex, :ex_doc, "0.40.3", "4a972ffe64bc07dc605af487e98fc19b72a4185f55ca031b94c0552d6071c1d9", [:mix], [{:earmark_parser, "~> 1.4.44", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.0", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14 or ~> 1.0", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1 or ~> 1.0", [hex: :makeup_erlang, repo: "hexpm", optional: false]}, {:makeup_html, ">= 0.1.0", [hex: :makeup_html, repo: "hexpm", optional: true]}], "hexpm", "2756e357742fecd9749b489b85d67c9ce99c465f2e75728d9e6dc8d704b973de"}, + "execution_plane": {:git, "https://github.com/nshkrdotcom/execution_plane.git", "c2d747eef776d7d0647175742dd86147293c8b0e", [branch: "main", subdir: "core/execution_plane"]}, + "execution_plane_jsonrpc": {:git, "https://github.com/nshkrdotcom/execution_plane.git", "c2d747eef776d7d0647175742dd86147293c8b0e", [branch: "main", subdir: "protocols/execution_plane_jsonrpc"]}, + "execution_plane_process": {:git, "https://github.com/nshkrdotcom/execution_plane.git", "c2d747eef776d7d0647175742dd86147293c8b0e", [branch: "main", subdir: "runtimes/execution_plane_process"]}, "exla": {:git, "https://github.com/elixir-nx/nx.git", "6424c8902380380cd7a8c282b0557d653aead018", [sparse: "exla", ref: "6424c8902380380cd7a8c282b0557d653aead018"]}, "file_system": {:hex, :file_system, "1.1.1", "31864f4685b0148f25bd3fbef2b1228457c0c89024ad67f7a81a3ffbc0bbad3a", [:mix], [], "hexpm", "7a15ff97dfe526aeefb090a7a9d3d03aa907e100e262a0f8f7746b78f8f87a5d"}, "finch": {:hex, :finch, "0.22.0", "5c48fa6f9706a78eb9036cacb67b8b996b4e66d111c543f4c29bb0f879a6806b", [:mix], [{:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.8", [hex: :mint, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.4 or ~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 1.1", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "b94e83c47780fc6813f746a1f1a34ee65cda42da4c5ea26a68f0acc4498e23dc"}, "fine": {:hex, :fine, "0.1.6", "4bf7151493443c454aac9f2fa2f34f5fefd0346a83fb5586a016c4a135c63247", [:mix], [], "hexpm", "5638eb4495488e885ebec167fa57973e5c35e1a50c344eb7666c90ec1c4e3b12"}, + "gemini_cli_sdk": {:git, "https://github.com/nshkrdotcom/gemini_cli_sdk.git", "1a919803479296673155aa8c006ac259a6032f95", [branch: "main"]}, + "ground_plane_contracts": {:git, "https://github.com/nshkrdotcom/ground_plane.git", "aab153c1b48b06281d5f13487cdcb3dac39e5cb8", [branch: "main", subdir: "core/ground_plane_contracts"]}, + "ground_plane_persistence_policy": {:git, "https://github.com/nshkrdotcom/ground_plane.git", "aab153c1b48b06281d5f13487cdcb3dac39e5cb8", [branch: "main", subdir: "core/persistence_policy"]}, "hf_hub": {:hex, :hf_hub, "0.2.0", "12ccd182cd28f46382f3e2254d71ca770e5c3f77f26e2f85ff31393918eb2977", [:mix], [{:explorer, "~> 0.10", [hex: :explorer, repo: "hexpm", optional: true]}, {:jason, "~> 1.4", [hex: :jason, repo: "hexpm", optional: false]}, {:req, "~> 0.5", [hex: :req, repo: "hexpm", optional: false]}, {:yaml_elixir, "~> 2.9", [hex: :yaml_elixir, repo: "hexpm", optional: false]}], "hexpm", "b815c22f70f2ee31ba7da3c8a13c7ed43d058662a650c2eed56b0603a34fb7d8"}, "hpax": {:hex, :hpax, "1.0.3", "ed67ef51ad4df91e75cc6a1494f851850c0bd98ebc0be6e81b026e765ee535aa", [:mix], [], "hexpm", "8eab6e1cfa8d5918c2ce4ba43588e894af35dbd8e91e6e55c817bca5847df34a"}, + "inference": {:git, "https://github.com/nshkrdotcom/inference.git", "6e992f43780501bf3df08dbce0192f427c4f0559", [branch: "main", subdir: "apps/inference"]}, "jason": {:hex, :jason, "1.4.5", "2e3a008590b0b8d7388c20293e9dcc9cf3e5d642fd2a114e4cbbb52e595d940a", [:mix], [{:decimal, "~> 1.0 or ~> 2.0 or ~> 3.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "b0c823996102bcd0239b3c2444eb00409b72f6a140c1950bc8b457d836b30684"}, "makeup": {:hex, :makeup, "1.2.1", "e90ac1c65589ef354378def3ba19d401e739ee7ee06fb47f94c687016e3713d1", [:mix], [{:nimble_parsec, "~> 1.4", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "d36484867b0bae0fea568d10131197a4c2e47056a6fbe84922bf6ba71c8d17ce"}, "makeup_elixir": {:hex, :makeup_elixir, "1.0.1", "e928a4f984e795e41e3abd27bfc09f51db16ab8ba1aebdba2b3a575437efafc2", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "7284900d412a3e5cfd97fdaed4f5ed389b8f2b4cb49efc0eb3bd10e2febf9507"},