Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
working version
---------------
* add Speedscope export format: set `format=speedscope` in `OCAML_LANDMARKS`
to write a sampled flame-graph profile openable at https://www.speedscope.app
(combine with `time` for second-precision weights)

version 1.6, 12 may 2026
------------------------
Expand Down
8 changes: 5 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -251,9 +251,11 @@ This variable is parsed as a comma-separated list of items of the form

* When loading an instrumented program (at runtime):

* `format` with possible arguments: `textual` (default) or `json`. It controls
the output format of the profiling which is either a console friendly
representation or json encoding of the callgraph.
* `format` with possible arguments: `textual` (default), `json`, or `speedscope`.
It controls the output format of the profiling: a console-friendly
representation, a JSON encoding of the callgraph, or a
[Speedscope](https://www.speedscope.app) sampled profile (combine with
`time` for second-precision weights, otherwise weights are in CPU cycles).

* `threshold` with a number between 0.0 and 100.0 as argument (default: 1.0). If the threshold is not zero the textual output will hide nodes in the callgraph below this threshold (in percent of time of their parent). This option is meaningless for other formats.

Expand Down
1 change: 1 addition & 0 deletions dune-project
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
)
(depends
(ocaml (>= 4.08))
(yojson (>= 1.6))
(js_of_ocaml (and :with-test (> 5)))
)
(conflicts ocaml-option-bytecode-only)
Expand Down
1 change: 1 addition & 0 deletions landmarks.opam
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ bug-reports: "https://github.com/LexiFi/landmarks/issues"
depends: [
"dune" {>= "3.16"}
"ocaml" {>= "4.08"}
"yojson" {>= "1.6"}
"js_of_ocaml" {with-test & > "5"}
"odoc" {with-doc}
]
Expand Down
1 change: 1 addition & 0 deletions src/dune
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,6 @@
(language c)
(names utils))
(js_of_ocaml (javascript_files utils.js))
(libraries yojson)
(instrumentation.backend
(ppx landmarks-ppx)))
8 changes: 7 additions & 1 deletion src/landmark.ml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ let allocated_bytes_major () = Int64.to_int (allocated_bytes_major ())
exception LandmarkFailure of string

module Graph = Graph
module Speedscope = Speedscope

module SparseArray = struct
type 'a t = {
Expand Down Expand Up @@ -256,6 +257,7 @@ type textual_option = {threshold : float}
type profile_format =
| JSON
| Textual of textual_option
| Speedscope

let profiling_ref = ref false
let profile_with_debug = ref false
Expand Down Expand Up @@ -783,6 +785,8 @@ let exit_hook () =
Graph.output ~threshold out cg
| Channel out, JSON ->
Graph.output_json out cg
| Channel out, Speedscope ->
Speedscope.export_to_channel out cg
| Temporary temp_dir, format ->
let tmp_file, oc =
Filename.open_temp_file ?temp_dir "profile_at_exit" ".tmp"
Expand All @@ -792,7 +796,8 @@ let exit_hook () =
flush stdout;
(match format with
| Textual {threshold} -> Graph.output ~threshold oc cg
| JSON -> Graph.output_json oc cg);
| JSON -> Graph.output_json oc cg
| Speedscope -> Speedscope.export_to_channel oc cg);
close_out oc
end

Expand Down Expand Up @@ -843,6 +848,7 @@ let parse_env_options s =
| _ -> format := Textual {threshold = 1.0};
end
| [ "format"; "json" ] -> format := JSON;
| [ "format"; "speedscope" ] -> format := Speedscope
| [ "format"; unknown ] -> invalid_for "format" unknown
| [ "output"; "stderr" ] -> output := Channel stderr
| [ "output"; "stdout" ] -> output := Channel stdout
Expand Down
6 changes: 6 additions & 0 deletions src/landmark.mli
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
(* Copyright (C) 2000-2025 LexiFi *)

module Graph = Graph
module Speedscope = Speedscope

(** The main module *)

Expand Down Expand Up @@ -92,6 +93,11 @@ type textual_option = {threshold : float}
type profile_format =
| JSON (** Easily parsable export format. *)
| Textual of textual_option (** Console friendly output; nodes below the threshold (0.0 <= threshold <= 100.0) are not displayed in the callgraph. *)
| Speedscope
(** Sampled profile in the Speedscope file format, openable at
{{: https://www.speedscope.app } speedscope.app}.
Enable [sys_time] in {!profiling_options} for second-precision
weights; otherwise weights are in raw CPU cycles. *)

(** The profiling options control the behavior of the landmark infrastructure. *)
type profiling_options = {
Expand Down
99 changes: 99 additions & 0 deletions src/speedscope.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
(*
Export to the Speedscope format
*)

let schema_url = "https://www.speedscope.app/file-format-schema.json"
let exporter_name = "landmarks"

let parse_location loc =
match String.rindex_opt loc ':' with
| None -> loc, None
| Some i ->
let file = String.sub loc 0 i in
let rest = String.sub loc (i + 1) (String.length loc - i - 1) in
(match int_of_string_opt rest with
| Some n -> file, Some n
| None -> loc, None)

(* One Speedscope frame per unique landmark (by landmark_id), skipping Root. *)
let make_frames (graph : Graph.graph) =
let tbl = Hashtbl.create 16 in
let frames = ref [] in
let next_idx = ref 0 in
Array.iter (fun (node : Graph.node) ->
if node.kind <> Graph.Root && not (Hashtbl.mem tbl node.landmark_id) then begin
let file, line = parse_location node.location in
let frame = Speedscope_fmt.create_frame ~name:node.name ~file ?line () in
Hashtbl.add tbl node.landmark_id !next_idx;
frames := frame :: !frames;
incr next_idx
end
) graph.nodes;
List.rev !frames, tbl

(* DFS producing one sample per call-graph node with positive self-time.
Each sample is a stack of frame indices from outermost to innermost
caller (Speedscope's "bottom to top" convention).
Counter and Sampler nodes are skipped. *)
let collect_samples ~use_sys_time (graph : Graph.graph) frame_idx =
let samples = ref [] in
let weights = ref [] in
let visited = Hashtbl.create 16 in
let node_time (n : Graph.node) = if use_sys_time then n.sys_time else n.time in
let rec aux stack (node : Graph.node) =
if not (Hashtbl.mem visited node.id) then begin
Hashtbl.add visited node.id ();
match node.kind with
| Graph.Root ->
List.iter (aux stack) (Graph.children graph node)
| Graph.Counter | Graph.Sampler -> ()
| Graph.Normal ->
let fidx = Hashtbl.find frame_idx node.landmark_id in
let stack' = fidx :: stack in (* maintained reversed; reversed on emit *)
let child_list = Graph.children graph node in
let child_time =
List.fold_left (fun acc c -> acc +. node_time c) 0.0 child_list
in
let self_time = node_time node -. child_time in
if self_time > 0.0 then begin
samples := List.rev stack' :: !samples;
weights := self_time :: !weights
end;
List.iter (aux stack') child_list
end
in
aux [] (Graph.root graph);
List.rev !samples, List.rev !weights

let export_to_channel oc (graph : Graph.graph) =
let frames, frame_idx = make_frames graph in
let use_sys_time =
Array.exists (fun (n : Graph.node) -> n.sys_time > 0.0) graph.nodes
in
let samples, weights = collect_samples ~use_sys_time graph frame_idx in
let end_value = List.fold_left ( +. ) 0.0 weights in
let weight_unit =
if use_sys_time then Speedscope_fmt.Seconds else Speedscope_fmt.None_
in
let profile = Speedscope_fmt.create_sampled_profile
~type_:"sampled"
~name:graph.label
~unit:weight_unit
~start_value:0.0
~end_value
~samples
~weights
()
in
let shared = Speedscope_fmt.create_profile_shared ~frames () in
let file = Speedscope_fmt.create_file_format
~schema:schema_url
?name:(if graph.label = "" then None else Some graph.label)
~exporter:exporter_name
~profiles:[profile]
~shared
()
in
Yojson.Safe.pretty_to_channel ~std:true oc
(Speedscope_fmt.yojson_of_file_format file);
output_char oc '\n'
15 changes: 15 additions & 0 deletions src/speedscope.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
(** Export to the Speedscope format

See https://www.speedscope.app for using the visualization app
and https://github.com/jlfwong/speedscope/blob/main/src/lib/file-format-spec.ts
for the annotated format specification.
*)

val export_to_channel : out_channel -> Graph.graph -> unit
(** Write a Speedscope sampled profile to [out_channel].

If [sys_time] was collected during profiling, weights are in seconds;
otherwise raw CPU-cycle counts are used with unit "none".

The resulting JSON can be opened at
{{: https://www.speedscope.app } speedscope.app}. *)
118 changes: 118 additions & 0 deletions src/speedscope_fmt.atd
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
<doc text="
Speedscope file-format types.

Schema: https://www.speedscope.app/file-format-schema.json
Spec (TS): https://github.com/jlfwong/speedscope/blob/main/src/lib/file-format-spec.ts
Import docs: https://github.com/jlfwong/speedscope/wiki/Importing-from-custom-sources

To regenerate speedscope_fmt.ml and speedscope_fmt.mli from this file:
{{{
atdml speedscope_fmt.atd
}}}
">

type value_unit
<doc text="Unit in which all profile values are expressed."> = [
| Bytes <json name="bytes">
| Microseconds <json name="microseconds">
| Milliseconds <json name="milliseconds">
| Nanoseconds <json name="nanoseconds">
| None_ <json name="none">
| Seconds <json name="seconds">
]

type frame = {
name : string;
?file : string option;
?line : int option;
?col : int option;
}

(* We only export sampled profiles; the Speedscope format also supports
evented profiles. The 'type' field is the discriminator used by
Speedscope for the profile union and must always be "sampled". *)
type sampled_profile = {
type_
<json name="type">
<doc text="Type of profile.
Used as a discriminator in the profile union to future-proof
the file format. For sampled profiles, always 'sampled'.">
: string;

name
<doc text="Name of the profile.
Typically a filename for the source of the profile.">
: string;

unit
<json name="unit">
<doc text="Unit in which all values in this profile are expressed.">
: value_unit;

start_value
<json name="startValue">
<doc text="The starting value of the profile.
Typically a timestamp. All event values are displayed
relative to startValue.">
: float;

end_value
<json name="endValue">
<doc text="The final value of the profile.
Must be >= startValue. Useful when the recorded profile
extends past the last event.">
: float;

samples
<doc text="List of stacks.
Each stack is a list of indices into the shared frames array.">
: int list list;

weights
<doc text="Weight of the sample at the corresponding index.
Must have the same length as samples.">
: float list;
}

(* The "shared" section of a Speedscope file.
"shared" is a reserved word in ATD, hence the name profile_shared here;
the JSON key is "shared" via the annotation on the file_format field below. *)
type profile_shared
<doc text="Data shared between profiles.">
= {
frames : frame list;
}

(* "$schema" uses a JSON name annotation because "$" is not a valid
OCaml identifier character. *)
type file_format = {
schema
<json name="$schema">
: string;

?name
<doc text="The name of the contained profile group.
If omitted, the viewer uses the filename.">
: string option;

?exporter
<doc text="The name of the program that exported this profile.
Not consumed by speedscope, but useful for debugging.
Recommended format: {{name@version}}.">
: string option;

?active_profile_index
<json name="activeProfileIndex">
<doc text="Index into the profiles array to display on load.
Defaults to the first profile if omitted.">
: int option;

profiles
<doc text="List of profile definitions.">
: sampled_profile list;

shared
<json name="shared">
<doc text="Data shared between profiles.">
: profile_shared;
}
Loading