From 9c118f289820c790bef03a156b7d902dc3df502e Mon Sep 17 00:00:00 2001 From: Ali Caglayan Date: Tue, 7 Apr 2026 21:51:09 +0200 Subject: [PATCH 1/3] feat(pkg): use the relocatable compiler Add ocaml-dune/opam-repository-relocatable as a default repository for package management. This is a mirror of dra27/opam-repository#relocatable which provides overlay packages for building a relocatable OCaml compiler. When this repository is included, the solver will pick up the relocatable-compiler meta-package, allowing toolchain builds to be cached as regular packages instead of using the toolchain cache. Signed-off-by: Ali Caglayan --- doc/changes/added/14357.md | 2 ++ src/dune_pkg/workspace.ml | 10 ++++++++++ src/dune_pkg/workspace.mli | 1 + src/source/workspace.ml | 4 +++- 4 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 doc/changes/added/14357.md diff --git a/doc/changes/added/14357.md b/doc/changes/added/14357.md new file mode 100644 index 00000000000..fc23228c1fc --- /dev/null +++ b/doc/changes/added/14357.md @@ -0,0 +1,2 @@ +- Enable the relocatable compiler by default for package management (#14357, + @Alizter) diff --git a/src/dune_pkg/workspace.ml b/src/dune_pkg/workspace.ml index 64cf00beeb9..00dccef964e 100644 --- a/src/dune_pkg/workspace.ml +++ b/src/dune_pkg/workspace.ml @@ -51,6 +51,16 @@ module Repository = struct } ;; + let relocatable = + { name = "relocatable" + ; url = + ( Loc.none + , OpamUrl.of_string + "git+https://github.com/ocaml-dune/opam-repository-relocatable.git#relocatable" + ) + } + ;; + let binary_packages = { name = "binary-packages" ; url = diff --git a/src/dune_pkg/workspace.mli b/src/dune_pkg/workspace.mli index 9187e3d88e7..9760e35ac4b 100644 --- a/src/dune_pkg/workspace.mli +++ b/src/dune_pkg/workspace.mli @@ -9,6 +9,7 @@ module Repository : sig val equal : t -> t -> bool val upstream : t val overlay : t + val relocatable : t val binary_packages : t val decode : t Decoder.t diff --git a/src/source/workspace.ml b/src/source/workspace.ml index 6a815374695..1537c35effd 100644 --- a/src/source/workspace.ml +++ b/src/source/workspace.ml @@ -13,7 +13,9 @@ module Pin_stanza = Dune_lang.Pin_stanza module Repository = Dune_pkg.Pkg_workspace.Repository module Solver_env = Dune_pkg.Solver_env -let default_repositories = [ Repository.overlay; Repository.upstream ] +let default_repositories = + [ Repository.overlay; Repository.relocatable; Repository.upstream ] +;; module Lock_dir = struct type t = From ac212b06a0f87f34e845759df41455fe20aa297e Mon Sep 17 00:00:00 2001 From: Ali Caglayan Date: Tue, 28 Apr 2026 12:32:19 +0200 Subject: [PATCH 2/3] fix(pkg): add relocatable-compiler to compiler_package_names The relocatable-compiler package installs libraries (including threads, str, unix) under target/lib/ocaml/, matching the layout of other compiler packages. Without being listed in compiler_package_names, install_roots did not append the /ocaml subdirectory to its lib_root, causing dev-tools like utop to fail with "Library threads not found". Signed-off-by: Ali Caglayan --- doc/explanation/package-management.md | 11 ++++++++--- src/dune_pkg/dev_tool.ml | 7 ++++++- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/doc/explanation/package-management.md b/doc/explanation/package-management.md index 977e9624176..1d79cdd9632 100644 --- a/doc/explanation/package-management.md +++ b/doc/explanation/package-management.md @@ -137,16 +137,21 @@ However, it is also possible to declare specific revisions of the repositories, to get a reproducible solution. Due to using Git, any previous revision of the repository can be used by specifying a commit hash. -Dune uses two repositories by default: +Dune uses three repositories by default, in order of priority: -* `upstream` refers to the default branch of `opam-repository`, which contains - all the publicly released packages. * `overlay` refers to [opam-overlays](https://github.com/ocaml-dune/opam-overlays), which defines packages patched to work with package management. The long-term goal is to have as few packages as possible in this repository as more and more packages work within Dune Package Management upstream. Check the [compatibility](#compatibility) section for details. +* `relocatable` refers to the `relocatable` branch of + [dra27/opam-repository](https://github.com/dra27/opam-repository/tree/relocatable), + which provides a relocatable version of the OCaml compiler. This allows the + compiler to be built and cached independently of the project's build + directory. +* `upstream` refers to the default branch of `opam-repository`, which contains + all the publicly released packages. #### Solving diff --git a/src/dune_pkg/dev_tool.ml b/src/dune_pkg/dev_tool.ml index 4f9bc6bb904..93860f6b4be 100644 --- a/src/dune_pkg/dev_tool.ml +++ b/src/dune_pkg/dev_tool.ml @@ -128,7 +128,12 @@ let needs_to_build_with_same_compiler_as_project = function let compiler_package_names = List.map ~f:Package_name.of_string - [ "ocaml"; "ocaml-base-compiler"; "ocaml-variants"; "ocaml-compiler" ] + [ "ocaml" + ; "ocaml-base-compiler" + ; "ocaml-variants" + ; "ocaml-compiler" + ; "relocatable-compiler" + ] ;; let is_compiler_package name = From 01402235858421ca329f8c87dd40472168f88a9c Mon Sep 17 00:00:00 2001 From: Ali Caglayan Date: Mon, 4 May 2026 17:53:37 +0200 Subject: [PATCH 3/3] fix(digest): throttle file_async to avoid EMFILE file_async opens an fd eagerly and for large files holds it open across a fiber yield while queued for the background thread pool. When called via unbounded parallel_map over many targets (e.g. the relocatable compiler with thousands of files), fds accumulate faster than they are closed, exhausting the process fd limit. Add a global Fiber.Throttle in file_async to bound the number of concurrent digest operations. Signed-off-by: Ali Caglayan --- doc/changes/added/14357.md | 2 +- src/dune_digest/digest.ml | 46 ++++++++++++++++++++++---------------- src/dune_digest/digest.mli | 4 +++- 3 files changed, 31 insertions(+), 21 deletions(-) diff --git a/doc/changes/added/14357.md b/doc/changes/added/14357.md index fc23228c1fc..871ba104a77 100644 --- a/doc/changes/added/14357.md +++ b/doc/changes/added/14357.md @@ -1,2 +1,2 @@ - Enable the relocatable compiler by default for package management (#14357, - @Alizter) + fixes #14012, @Alizter) diff --git a/src/dune_digest/digest.ml b/src/dune_digest/digest.ml index ec2ee3ac728..8ddc319aca4 100644 --- a/src/dune_digest/digest.ml +++ b/src/dune_digest/digest.ml @@ -104,28 +104,36 @@ let file file = digest_and_close_fd fd ;; +(* Throttle concurrent [file_async] calls so an unbounded [parallel_map] over + many targets (e.g. the relocatable compiler) does not exhaust the process + fd limit. 100 sits comfortably above the background thread pool's worker + count (so digesting never starves it) while staying well under the typical + 1024 fd soft limit; raising that limit is hazardous because some code + still falls back to [select()], which has a hard FD_SETSIZE (1024) cap. *) +let digest_throttle = lazy (Fiber.Throttle.create 100) let async_digest_minimum = 1_000 let file_async file = - let open Fiber.O in - let* () = Fiber.return () in - let fd = open_for_digest file in - Counter.incr Metrics.Digest.File.count; - let size = - match Unix.fstat (Fd.unsafe_to_unix_file_descr fd) with - | exception exn -> - Fd.close fd; - raise exn - | stat -> stat.st_size - in - Counter.add Metrics.Digest.File.bytes size; - if size = 0 - then - let+ () = Fiber.return @@ Fd.close fd in - Lazy.force zero - else if size < async_digest_minimum - then Fiber.return (digest_and_close_fd fd) - else Dune_scheduler.Scheduler.async_exn (fun () -> digest_and_close_fd fd) + Fiber.Throttle.run (Lazy.force digest_throttle) ~f:(fun () -> + let open Fiber.O in + let* () = Fiber.return () in + let fd = open_for_digest file in + Counter.incr Metrics.Digest.File.count; + let size = + match Unix.fstat (Fd.unsafe_to_unix_file_descr fd) with + | exception exn -> + Fd.close fd; + raise exn + | stat -> stat.st_size + in + Counter.add Metrics.Digest.File.bytes size; + if size = 0 + then + let+ () = Fiber.return @@ Fd.close fd in + Lazy.force zero + else if size < async_digest_minimum + then Fiber.return (digest_and_close_fd fd) + else Dune_scheduler.Scheduler.async_exn (fun () -> digest_and_close_fd fd)) ;; let equal = Blake3_mini.Digest.equal diff --git a/src/dune_digest/digest.mli b/src/dune_digest/digest.mli index f81a74ec478..685172967b5 100644 --- a/src/dune_digest/digest.mli +++ b/src/dune_digest/digest.mli @@ -108,5 +108,7 @@ val path_with_stats_async -> (t, Path_digest_error.t) result Fiber.t (** Digest a file taking the [executable] bit into account. Should not be called - on a directory. *) + on a directory. Digesting is done in the background thread pool, with the + number of concurrent calls capped by a global throttle so that we do not + exceed the process's open file descriptor limit. *) val file_with_executable_bit : executable:bool -> Path.t -> t Fiber.t