diff --git a/.gitignore b/.gitignore index 16ea30f..72700c5 100755 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,6 @@ _build .merlin *.install admin/website + +# Speedscope demo +/profile.speedscope diff --git a/CHANGES.md b/CHANGES.md index 76b3702..fe4afb6 100755 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,11 @@ working version --------------- +* add Speedscope export format: set `format=speedscope` in `OCAML_LANDMARKS` + to write a sampled flame-graph profile openable at https://www.speedscope.app + (combine with `time` for second-precision weights). It is available + via a new landmarks-exports library. +* add custom export format: set `format=custom` in `OCAML_LANDMARKS` + and register your exporter using `Landmark.register_custom_exporter`. version 1.6, 12 may 2026 ------------------------ diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..7390c2f --- /dev/null +++ b/Makefile @@ -0,0 +1,15 @@ +.PHONY: build +build: + dune build + +.PHONY: test +test: + dune test + +# Speedscope demo and sanity check +.PHONY: speedscope-demo +speedscope-demo: + dune build tests/speedscope/example.exe + OCAML_LANDMARKS="format=speedscope,output=profile.speedscope,time" \ + ./_build/default/tests/speedscope/example.exe + @echo "๐Ÿ‘‰ Upload profile.speedscope at https://www.speedscope.app/" diff --git a/README.md b/README.md index cc03ff4..7bb4a85 100755 --- a/README.md +++ b/README.md @@ -251,9 +251,13 @@ This variable is parsed as a comma-separated list of items of the form * When loading an instrumented program (at runtime): - * `format` with possible arguments: `textual` (default) or `json`. It controls - the output format of the profiling which is either a console friendly - representation or json encoding of the callgraph. + * `format` with possible arguments: `textual` (default), `json`, + or `speedscope`. `speedscope` requires the extra package + `landmarks-exports`. + It controls the output format of the profiling: a console-friendly + representation, a JSON encoding of the callgraph, or a + [Speedscope](https://www.speedscope.app) sampled profile (combine with + `time` for second-precision weights, otherwise weights are in CPU cycles). * `threshold` with a number between 0.0 and 100.0 as argument (default: 1.0). If the threshold is not zero the textual output will hide nodes in the callgraph below this threshold (in percent of time of their parent). This option is meaningless for other formats. diff --git a/dune b/dune new file mode 100644 index 0000000..c7ea468 --- /dev/null +++ b/dune @@ -0,0 +1,4 @@ +(env + ; '_' targets any profile + (_ + (flags (:standard -w +A-30-42-41-40-4-70 -safe-string -strict-sequence)))) diff --git a/dune-project b/dune-project index 31727ff..497bdef 100755 --- a/dune-project +++ b/dune-project @@ -15,10 +15,11 @@ (name landmarks) (synopsis "A simple profiling library") (description - "\| Landmarks is a simple profiling library for OCaml. It provides - "\| primitives to measure time spent in portion of instrumented code. The - "\| instrumentation of the code may either done by hand, automatically or - "\| semi-automatically using the ppx pepreprocessor (see landmarks-ppx package). + "Landmarks is a simple profiling library for OCaml. It provides \ + primitives to measure time spent in portion of instrumented code. \ + The instrumentation of the code may either done by hand, automatically \ + or semi-automatically using the ppx preprocessor (see landmarks-ppx \ + package)." ) (depends (ocaml (>= 4.08)) @@ -38,3 +39,15 @@ landmarks library.") (landmarks (= 1.6)) ) ) + +(package + (name landmarks-speedscope) + (synopsis "Additional export formats for the Landmarks profiling library") + (description + "This provides export to the Speedscope format and possibly more formats \ + in the future.") + (depends + (landmarks (= :version)) + (yojson (>= 3.0.0)) + ) +) diff --git a/landmarks-speedscope.opam b/landmarks-speedscope.opam new file mode 100644 index 0000000..c2c586d --- /dev/null +++ b/landmarks-speedscope.opam @@ -0,0 +1,32 @@ +# This file is generated by dune, edit dune-project instead +opam-version: "2.0" +version: "1.6" +synopsis: "Additional export formats for the Landmarks profiling library" +description: + "This provides export to the Speedscope format and possibly more formats in the future." +maintainer: ["Marc Lasson "] +authors: ["Marc Lasson "] +license: "MIT" +homepage: "https://github.com/LexiFi/landmarks" +bug-reports: "https://github.com/LexiFi/landmarks/issues" +depends: [ + "dune" {>= "3.16"} + "landmarks" {= version} + "yojson" {>= "3.0.0"} + "odoc" {with-doc} +] +build: [ + ["dune" "subst"] {dev} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] +] +dev-repo: "git+https://github.com/LexiFi/landmarks.git" diff --git a/landmarks.opam b/landmarks.opam index 1c66374..523e93b 100644 --- a/landmarks.opam +++ b/landmarks.opam @@ -2,12 +2,8 @@ opam-version: "2.0" version: "1.6" synopsis: "A simple profiling library" -description: """ -Landmarks is a simple profiling library for OCaml. It provides -primitives to measure time spent in portion of instrumented code. The -instrumentation of the code may either done by hand, automatically or -semi-automatically using the ppx pepreprocessor (see landmarks-ppx package). -""" +description: + "Landmarks is a simple profiling library for OCaml. It provides primitives to measure time spent in portion of instrumented code. The instrumentation of the code may either done by hand, automatically or semi-automatically using the ppx preprocessor (see landmarks-ppx package)." maintainer: ["Marc Lasson "] authors: ["Marc Lasson "] license: "MIT" diff --git a/speedscope/dune b/speedscope/dune new file mode 100755 index 0000000..63ceac3 --- /dev/null +++ b/speedscope/dune @@ -0,0 +1,11 @@ +; A library for exporting profiling data to various formats +(library + (name landmarks_speedscope) + (public_name landmarks-speedscope) + (libraries + landmarks + yojson + ) + ; Force linking and evaluation of this library's modules + (library_flags -linkall) +) diff --git a/speedscope/landmarks_speedscope.ml b/speedscope/landmarks_speedscope.ml new file mode 100644 index 0000000..f400407 --- /dev/null +++ b/speedscope/landmarks_speedscope.ml @@ -0,0 +1,109 @@ +(* + Export to the Speedscope format +*) + +open Landmark + +let schema_url = "https://www.speedscope.app/file-format-schema.json" +let exporter_name = "landmarks" + +let parse_location loc = + match String.rindex_opt loc ':' with + | None -> loc, None + | Some i -> + let file = String.sub loc 0 i in + let rest = String.sub loc (i + 1) (String.length loc - i - 1) in + (match int_of_string_opt rest with + | Some n -> file, Some n + | None -> loc, None) + +(* One Speedscope frame per unique landmark (by landmark_id), skipping Root. *) +let make_frames (graph : Graph.graph) = + let tbl = Hashtbl.create 16 in + let frames = ref [] in + let next_idx = ref 0 in + Array.iter (fun (node : Graph.node) -> + if node.kind <> Graph.Root && not (Hashtbl.mem tbl node.landmark_id) then begin + let file, line = parse_location node.location in + let frame = Speedscope_fmt.create_frame ~name:node.name ~file ?line () in + Hashtbl.add tbl node.landmark_id !next_idx; + frames := frame :: !frames; + incr next_idx + end + ) graph.nodes; + List.rev !frames, tbl + +(* DFS producing one sample per call-graph node with positive self-time. + Each sample is a stack of frame indices from outermost to innermost + caller (Speedscope's "bottom to top" convention). + Counter and Sampler nodes are skipped. *) +let collect_samples ~use_sys_time (graph : Graph.graph) frame_idx = + let samples = ref [] in + let weights = ref [] in + let node_time (n : Graph.node) = if use_sys_time then n.sys_time else n.time in + Graph.dfs + (fun ancestors (node : Graph.node) -> + match node.kind with + | Root -> true + | Counter | Sampler -> false + | Normal -> + let fidx = Hashtbl.find frame_idx node.landmark_id in + let child_list = Graph.children graph node in + let child_time = + List.fold_left (fun acc c -> acc +. node_time c) 0.0 child_list + in + let self_time = node_time node -. child_time in + if self_time > 0.0 then begin + let stack = + fidx :: + List.filter_map + (fun (a : Graph.node) -> + match a.kind with + | Normal -> Some (Hashtbl.find frame_idx a.landmark_id) + | Root | Counter | Sampler -> None) + ancestors + in + samples := List.rev stack :: !samples; + weights := self_time :: !weights + end; + true) + (fun _ _ -> ()) + graph; + List.rev !samples, List.rev !weights + +let exporter oc (graph : Graph.graph) = + let frames, frame_idx = make_frames graph in + let use_sys_time = + Array.exists (fun (n : Graph.node) -> n.sys_time > 0.0) graph.nodes + in + let samples, weights = collect_samples ~use_sys_time graph frame_idx in + let end_value = List.fold_left ( +. ) 0.0 weights in + let weight_unit = + if use_sys_time then Speedscope_fmt.Seconds else Speedscope_fmt.None_ + in + let profile = Speedscope_fmt.create_sampled_profile + ~type_:"sampled" + ~name:graph.label + ~unit:weight_unit + ~start_value:0.0 + ~end_value + ~samples + ~weights + () + in + let shared = Speedscope_fmt.create_profile_shared ~frames () in + let file = Speedscope_fmt.create_file_format + ~schema:schema_url + ?name:(if graph.label = "" then None else Some graph.label) + ~exporter:exporter_name + ~profiles:[profile] + ~shared + () + in + Yojson.Safe.pretty_to_channel ~std:true oc + (Speedscope_fmt.yojson_of_file_format file); + output_char oc '\n' + +(* This relies on the [-linkall] flag passed with [-a] when building + the library to ensure the registration takes place. *) +let () = Landmark.register_exporter "speedscope" exporter diff --git a/speedscope/landmarks_speedscope.mli b/speedscope/landmarks_speedscope.mli new file mode 100644 index 0000000..507ef7b --- /dev/null +++ b/speedscope/landmarks_speedscope.mli @@ -0,0 +1,19 @@ +(** Export to the Speedscope format + + See https://www.speedscope.app for using the visualization app + and https://github.com/jlfwong/speedscope/blob/main/src/lib/file-format-spec.ts + for the annotated format specification. +*) + +val exporter : out_channel -> Landmark.Graph.graph -> unit +(** Write a Speedscope sampled profile to [out_channel]. + + If [sys_time] was collected during profiling, weights are in seconds; + otherwise raw CPU-cycle counts are used with unit "none". + + The resulting JSON can be opened at + {{: https://www.speedscope.app } speedscope.app}. + + This exporter is automatically registered with the Landmarks library + to provide support for [format=speedscope]. +*) diff --git a/speedscope/speedscope_fmt.atd b/speedscope/speedscope_fmt.atd new file mode 100644 index 0000000..0d7db68 --- /dev/null +++ b/speedscope/speedscope_fmt.atd @@ -0,0 +1,118 @@ + + +type value_unit + = [ + | Bytes + | Microseconds + | Milliseconds + | Nanoseconds + | None_ + | Seconds +] + +type frame = { + name : string; + ?file : string option; + ?line : int option; + ?col : int option; +} + +(* We only export sampled profiles; the Speedscope format also supports + evented profiles. The 'type' field is the discriminator used by + Speedscope for the profile union and must always be "sampled". *) +type sampled_profile = { + type_ + + + : string; + + name + + : string; + + unit + + + : value_unit; + + start_value + + + : float; + + end_value + + + : float; + + samples + + : int list list; + + weights + + : float list; +} + +(* The "shared" section of a Speedscope file. + "shared" is a reserved word in ATD, hence the name profile_shared here; + the JSON key is "shared" via the annotation on the file_format field below. *) +type profile_shared + += { + frames : frame list; +} + +(* "$schema" uses a JSON name annotation because "$" is not a valid + OCaml identifier character. *) +type file_format = { + schema + + : string; + + ?name + + : string option; + + ?exporter + + : string option; + + ?active_profile_index + + + : int option; + + profiles + + : sampled_profile list; + + shared + + + : profile_shared; +} diff --git a/speedscope/speedscope_fmt.ml b/speedscope/speedscope_fmt.ml new file mode 100644 index 0000000..f28da15 --- /dev/null +++ b/speedscope/speedscope_fmt.ml @@ -0,0 +1,473 @@ +(* Auto-generated from "speedscope_fmt.atd" by atdml. *) +[@@@ocaml.warning "-27-32-33-35-39"] + +(** + Speedscope file-format types. + + Schema: https://www.speedscope.app/file-format-schema.json Spec (TS): + https://github.com/jlfwong/speedscope/blob/main/src/lib/file-format-spec.ts + Import docs: + https://github.com/jlfwong/speedscope/wiki/Importing-from-custom-sources + + To regenerate speedscope_fmt.ml and speedscope_fmt.mli from this file: + + {v + atdml speedscope_fmt.atd + + v} +*) + +(* Inlined runtime โ€” no external dependency needed. *) +module Atdml_runtime = struct + (* Returns true iff the list has strictly more than [n] elements, + without traversing past element n+1. *) + let rec list_length_gt n = function + | _ :: rest -> if n = 0 then true else list_length_gt (n - 1) rest + | [] -> false + + module Yojson = struct + let bad_type expected_type x = + Printf.ksprintf failwith "expected %s, got: %s" + expected_type (Yojson.Safe.to_string x) + + let bad_sum type_name x = + Printf.ksprintf failwith "invalid variant for type '%s': %s" + type_name (Yojson.Safe.to_string x) + + let missing_field type_name field_name = + Printf.ksprintf failwith "missing field '%s' in object of type '%s'" + field_name type_name + + let bool_of_yojson = function + | `Bool b -> b + | x -> bad_type "bool" x + + let yojson_of_bool b = `Bool b + + let int_of_yojson = function + | `Int n -> n + | x -> bad_type "int" x + + let yojson_of_int n = `Int n + + let float_of_yojson = function + | `Float f -> f + | `Int n -> Float.of_int n + | x -> bad_type "float" x + + let yojson_of_float f = `Float f + + let string_of_yojson = function + | `String s -> s + | x -> bad_type "string" x + + let yojson_of_string s = `String s + + let unit_of_yojson = function + | `Null -> () + | x -> bad_type "null" x + + let yojson_of_unit () = `Null + + let list_of_yojson f = function + | `List xs -> List.map f xs + | x -> bad_type "array" x + + let yojson_of_list f xs = `List (List.map f xs) + + let option_of_yojson f = function + | `String "None" -> None + | `List [`String "Some"; x] -> Some (f x) + | x -> bad_type "option" x + + let yojson_of_option f = function + | None -> `String "None" + | Some x -> `List [`String "Some"; f x] + + let nullable_of_yojson f = function + | `Null -> None + | x -> Some (f x) + + let yojson_of_nullable f = function + | None -> `Null + | Some x -> f x + + let assoc_of_yojson f = function + | `Assoc pairs -> List.map (fun (k, v) -> (k, f v)) pairs + | x -> bad_type "object" x + + let yojson_of_assoc f xs = + `Assoc (List.map (fun (k, v) -> (k, f v)) xs) + end +end + +(** Unit in which all profile values are expressed. *) +type value_unit = + | Bytes + | Microseconds + | Milliseconds + | Nanoseconds + | None_ + | Seconds + +let value_unit_of_yojson (x : Yojson.Safe.t) : value_unit = + match x with + | `String "bytes" -> Bytes + | `String "microseconds" -> Microseconds + | `String "milliseconds" -> Milliseconds + | `String "nanoseconds" -> Nanoseconds + | `String "none" -> None_ + | `String "seconds" -> Seconds + | _ -> Atdml_runtime.Yojson.bad_sum "value_unit" x + +let yojson_of_value_unit (x : value_unit) : Yojson.Safe.t = + match x with + | Bytes -> `String "bytes" + | Microseconds -> `String "microseconds" + | Milliseconds -> `String "milliseconds" + | Nanoseconds -> `String "nanoseconds" + | None_ -> `String "none" + | Seconds -> `String "seconds" + +let value_unit_of_json s = + value_unit_of_yojson (Yojson.Safe.from_string s) + +let json_of_value_unit x = + Yojson.Safe.to_string (yojson_of_value_unit x) + +module Value_unit = struct + type nonrec t = value_unit + let of_yojson = value_unit_of_yojson + let to_yojson = yojson_of_value_unit + let of_json = value_unit_of_json + let to_json = json_of_value_unit +end + +type sampled_profile = { + type_: string; + (** + Type of profile. Used as a discriminator in the profile union to + future-proof the file format. For sampled profiles, always 'sampled'. + *) + name: string; + (** + Name of the profile. Typically a filename for the source of the + profile. + *) + unit: value_unit; (** Unit in which all values in this profile are expressed. *) + start_value: float; + (** + The starting value of the profile. Typically a timestamp. All event + values are displayed relative to startValue. + *) + end_value: float; + (** + The final value of the profile. Must be >= startValue. Useful when the + recorded profile extends past the last event. + *) + samples: int list list; + (** + List of stacks. Each stack is a list of indices into the shared frames + array. + *) + weights: float list; + (** + Weight of the sample at the corresponding index. Must have the same + length as samples. + *) +} + +let create_sampled_profile ~type_ ~name ~unit ~start_value ~end_value ~samples ~weights () : sampled_profile = + { type_; name; unit; start_value; end_value; samples; weights } + +let sampled_profile_of_yojson (x : Yojson.Safe.t) : sampled_profile = + match x with + | `Assoc fields -> + (* Duplicate JSON keys: behavior is unspecified (RFC 8259 ยง4 says keys SHOULD + be unique). Below the threshold, List.assoc_opt returns the first binding; + above it, the hashtable returns the last. *) + let assoc_ = + if Atdml_runtime.list_length_gt 5 fields then + let tbl = Hashtbl.create 16 in + List.iter (fun (k, v) -> Hashtbl.add tbl k v) fields; + (fun key -> Hashtbl.find_opt tbl key) + else (fun key -> List.assoc_opt key fields) + in + let type_ = + match assoc_ "type" with + | Some v -> Atdml_runtime.Yojson.string_of_yojson v + | None -> Atdml_runtime.Yojson.missing_field "sampled_profile" "type" + in + let name = + match assoc_ "name" with + | Some v -> Atdml_runtime.Yojson.string_of_yojson v + | None -> Atdml_runtime.Yojson.missing_field "sampled_profile" "name" + in + let unit = + match assoc_ "unit" with + | Some v -> value_unit_of_yojson v + | None -> Atdml_runtime.Yojson.missing_field "sampled_profile" "unit" + in + let start_value = + match assoc_ "startValue" with + | Some v -> Atdml_runtime.Yojson.float_of_yojson v + | None -> Atdml_runtime.Yojson.missing_field "sampled_profile" "startValue" + in + let end_value = + match assoc_ "endValue" with + | Some v -> Atdml_runtime.Yojson.float_of_yojson v + | None -> Atdml_runtime.Yojson.missing_field "sampled_profile" "endValue" + in + let samples = + match assoc_ "samples" with + | Some v -> (Atdml_runtime.Yojson.list_of_yojson (Atdml_runtime.Yojson.list_of_yojson Atdml_runtime.Yojson.int_of_yojson)) v + | None -> Atdml_runtime.Yojson.missing_field "sampled_profile" "samples" + in + let weights = + match assoc_ "weights" with + | Some v -> (Atdml_runtime.Yojson.list_of_yojson Atdml_runtime.Yojson.float_of_yojson) v + | None -> Atdml_runtime.Yojson.missing_field "sampled_profile" "weights" + in + { type_; name; unit; start_value; end_value; samples; weights } + | _ -> Atdml_runtime.Yojson.bad_type "sampled_profile" x + +let yojson_of_sampled_profile (x : sampled_profile) : Yojson.Safe.t = + `Assoc (List.concat [ + [("type", Atdml_runtime.Yojson.yojson_of_string x.type_)]; + [("name", Atdml_runtime.Yojson.yojson_of_string x.name)]; + [("unit", yojson_of_value_unit x.unit)]; + [("startValue", Atdml_runtime.Yojson.yojson_of_float x.start_value)]; + [("endValue", Atdml_runtime.Yojson.yojson_of_float x.end_value)]; + [("samples", (Atdml_runtime.Yojson.yojson_of_list (Atdml_runtime.Yojson.yojson_of_list Atdml_runtime.Yojson.yojson_of_int)) x.samples)]; + [("weights", (Atdml_runtime.Yojson.yojson_of_list Atdml_runtime.Yojson.yojson_of_float) x.weights)]; + ]) + +let sampled_profile_of_json s = + sampled_profile_of_yojson (Yojson.Safe.from_string s) + +let json_of_sampled_profile x = + Yojson.Safe.to_string (yojson_of_sampled_profile x) + +module Sampled_profile = struct + type nonrec t = sampled_profile + let create = create_sampled_profile + let of_yojson = sampled_profile_of_yojson + let to_yojson = yojson_of_sampled_profile + let of_json = sampled_profile_of_json + let to_json = json_of_sampled_profile +end + +type frame = { + name: string; + file: string option; + line: int option; + col: int option; +} + +let create_frame ~name ?file ?line ?col () : frame = + { name; file; line; col } + +let frame_of_yojson (x : Yojson.Safe.t) : frame = + match x with + | `Assoc fields -> + (* Duplicate JSON keys: behavior is unspecified (RFC 8259 ยง4 says keys SHOULD + be unique). Below the threshold, List.assoc_opt returns the first binding; + above it, the hashtable returns the last. *) + let assoc_ = + if Atdml_runtime.list_length_gt 5 fields then + let tbl = Hashtbl.create 16 in + List.iter (fun (k, v) -> Hashtbl.add tbl k v) fields; + (fun key -> Hashtbl.find_opt tbl key) + else (fun key -> List.assoc_opt key fields) + in + let name = + match assoc_ "name" with + | Some v -> Atdml_runtime.Yojson.string_of_yojson v + | None -> Atdml_runtime.Yojson.missing_field "frame" "name" + in + let file = + match assoc_ "file" with + | None | Some `Null -> None + | Some v -> Some (Atdml_runtime.Yojson.string_of_yojson v) + in + let line = + match assoc_ "line" with + | None | Some `Null -> None + | Some v -> Some (Atdml_runtime.Yojson.int_of_yojson v) + in + let col = + match assoc_ "col" with + | None | Some `Null -> None + | Some v -> Some (Atdml_runtime.Yojson.int_of_yojson v) + in + { name; file; line; col } + | _ -> Atdml_runtime.Yojson.bad_type "frame" x + +let yojson_of_frame (x : frame) : Yojson.Safe.t = + `Assoc (List.concat [ + [("name", Atdml_runtime.Yojson.yojson_of_string x.name)]; + (match x.file with None -> [] | Some v -> [("file", Atdml_runtime.Yojson.yojson_of_string v)]); + (match x.line with None -> [] | Some v -> [("line", Atdml_runtime.Yojson.yojson_of_int v)]); + (match x.col with None -> [] | Some v -> [("col", Atdml_runtime.Yojson.yojson_of_int v)]); + ]) + +let frame_of_json s = + frame_of_yojson (Yojson.Safe.from_string s) + +let json_of_frame x = + Yojson.Safe.to_string (yojson_of_frame x) + +module Frame = struct + type nonrec t = frame + let create = create_frame + let of_yojson = frame_of_yojson + let to_yojson = yojson_of_frame + let of_json = frame_of_json + let to_json = json_of_frame +end + +(** Data shared between profiles. *) +type profile_shared = { + frames: frame list; +} + +let create_profile_shared ~frames () : profile_shared = + { frames } + +let profile_shared_of_yojson (x : Yojson.Safe.t) : profile_shared = + match x with + | `Assoc fields -> + (* Duplicate JSON keys: behavior is unspecified (RFC 8259 ยง4 says keys SHOULD + be unique). Below the threshold, List.assoc_opt returns the first binding; + above it, the hashtable returns the last. *) + let assoc_ = + if Atdml_runtime.list_length_gt 5 fields then + let tbl = Hashtbl.create 16 in + List.iter (fun (k, v) -> Hashtbl.add tbl k v) fields; + (fun key -> Hashtbl.find_opt tbl key) + else (fun key -> List.assoc_opt key fields) + in + let frames = + match assoc_ "frames" with + | Some v -> (Atdml_runtime.Yojson.list_of_yojson frame_of_yojson) v + | None -> Atdml_runtime.Yojson.missing_field "profile_shared" "frames" + in + { frames } + | _ -> Atdml_runtime.Yojson.bad_type "profile_shared" x + +let yojson_of_profile_shared (x : profile_shared) : Yojson.Safe.t = + `Assoc (List.concat [ + [("frames", (Atdml_runtime.Yojson.yojson_of_list yojson_of_frame) x.frames)]; + ]) + +let profile_shared_of_json s = + profile_shared_of_yojson (Yojson.Safe.from_string s) + +let json_of_profile_shared x = + Yojson.Safe.to_string (yojson_of_profile_shared x) + +module Profile_shared = struct + type nonrec t = profile_shared + let create = create_profile_shared + let of_yojson = profile_shared_of_yojson + let to_yojson = yojson_of_profile_shared + let of_json = profile_shared_of_json + let to_json = json_of_profile_shared +end + +type file_format = { + schema: string; + name: string option; + (** + The name of the contained profile group. If omitted, the viewer uses + the filename. + *) + exporter: string option; + (** + The name of the program that exported this profile. Not consumed by + speedscope, but useful for debugging. Recommended format: + [name\@version]. + *) + active_profile_index: int option; + (** + Index into the profiles array to display on load. Defaults to the + first profile if omitted. + *) + profiles: sampled_profile list; (** List of profile definitions. *) + shared: profile_shared; (** Data shared between profiles. *) +} + +let create_file_format ~schema ?name ?exporter ?active_profile_index ~profiles ~shared () : file_format = + { schema; name; exporter; active_profile_index; profiles; shared } + +let file_format_of_yojson (x : Yojson.Safe.t) : file_format = + match x with + | `Assoc fields -> + (* Duplicate JSON keys: behavior is unspecified (RFC 8259 ยง4 says keys SHOULD + be unique). Below the threshold, List.assoc_opt returns the first binding; + above it, the hashtable returns the last. *) + let assoc_ = + if Atdml_runtime.list_length_gt 5 fields then + let tbl = Hashtbl.create 16 in + List.iter (fun (k, v) -> Hashtbl.add tbl k v) fields; + (fun key -> Hashtbl.find_opt tbl key) + else (fun key -> List.assoc_opt key fields) + in + let schema = + match assoc_ "$schema" with + | Some v -> Atdml_runtime.Yojson.string_of_yojson v + | None -> Atdml_runtime.Yojson.missing_field "file_format" "$schema" + in + let name = + match assoc_ "name" with + | None | Some `Null -> None + | Some v -> Some (Atdml_runtime.Yojson.string_of_yojson v) + in + let exporter = + match assoc_ "exporter" with + | None | Some `Null -> None + | Some v -> Some (Atdml_runtime.Yojson.string_of_yojson v) + in + let active_profile_index = + match assoc_ "activeProfileIndex" with + | None | Some `Null -> None + | Some v -> Some (Atdml_runtime.Yojson.int_of_yojson v) + in + let profiles = + match assoc_ "profiles" with + | Some v -> (Atdml_runtime.Yojson.list_of_yojson sampled_profile_of_yojson) v + | None -> Atdml_runtime.Yojson.missing_field "file_format" "profiles" + in + let shared = + match assoc_ "shared" with + | Some v -> profile_shared_of_yojson v + | None -> Atdml_runtime.Yojson.missing_field "file_format" "shared" + in + { schema; name; exporter; active_profile_index; profiles; shared } + | _ -> Atdml_runtime.Yojson.bad_type "file_format" x + +let yojson_of_file_format (x : file_format) : Yojson.Safe.t = + `Assoc (List.concat [ + [("$schema", Atdml_runtime.Yojson.yojson_of_string x.schema)]; + (match x.name with None -> [] | Some v -> [("name", Atdml_runtime.Yojson.yojson_of_string v)]); + (match x.exporter with None -> [] | Some v -> [("exporter", Atdml_runtime.Yojson.yojson_of_string v)]); + (match x.active_profile_index with None -> [] | Some v -> [("activeProfileIndex", Atdml_runtime.Yojson.yojson_of_int v)]); + [("profiles", (Atdml_runtime.Yojson.yojson_of_list yojson_of_sampled_profile) x.profiles)]; + [("shared", yojson_of_profile_shared x.shared)]; + ]) + +let file_format_of_json s = + file_format_of_yojson (Yojson.Safe.from_string s) + +let json_of_file_format x = + Yojson.Safe.to_string (yojson_of_file_format x) + +module File_format = struct + type nonrec t = file_format + let create = create_file_format + let of_yojson = file_format_of_yojson + let to_yojson = yojson_of_file_format + let of_json = file_format_of_json + let to_json = json_of_file_format +end + diff --git a/speedscope/speedscope_fmt.mli b/speedscope/speedscope_fmt.mli new file mode 100644 index 0000000..afbf0d8 --- /dev/null +++ b/speedscope/speedscope_fmt.mli @@ -0,0 +1,168 @@ +(* Auto-generated from "speedscope_fmt.atd" by atdml. *) + +(** + Speedscope file-format types. + + Schema: https://www.speedscope.app/file-format-schema.json Spec (TS): + https://github.com/jlfwong/speedscope/blob/main/src/lib/file-format-spec.ts + Import docs: + https://github.com/jlfwong/speedscope/wiki/Importing-from-custom-sources + + To regenerate speedscope_fmt.ml and speedscope_fmt.mli from this file: + + {v + atdml speedscope_fmt.atd + + v} +*) + +(** Unit in which all profile values are expressed. *) +type value_unit = + | Bytes + | Microseconds + | Milliseconds + | Nanoseconds + | None_ + | Seconds + +val value_unit_of_yojson : Yojson.Safe.t -> value_unit +val yojson_of_value_unit : value_unit -> Yojson.Safe.t +val value_unit_of_json : string -> value_unit +val json_of_value_unit : value_unit -> string + +module Value_unit : sig + type nonrec t = value_unit + val of_yojson : Yojson.Safe.t -> t + val to_yojson : t -> Yojson.Safe.t + val of_json : string -> t + val to_json : t -> string +end + +type sampled_profile = { + type_: string; + (** + Type of profile. Used as a discriminator in the profile union to + future-proof the file format. For sampled profiles, always 'sampled'. + *) + name: string; + (** + Name of the profile. Typically a filename for the source of the + profile. + *) + unit: value_unit; (** Unit in which all values in this profile are expressed. *) + start_value: float; + (** + The starting value of the profile. Typically a timestamp. All event + values are displayed relative to startValue. + *) + end_value: float; + (** + The final value of the profile. Must be >= startValue. Useful when the + recorded profile extends past the last event. + *) + samples: int list list; + (** + List of stacks. Each stack is a list of indices into the shared frames + array. + *) + weights: float list; + (** + Weight of the sample at the corresponding index. Must have the same + length as samples. + *) +} + +val create_sampled_profile : type_:string -> name:string -> unit:value_unit -> start_value:float -> end_value:float -> samples:int list list -> weights:float list -> unit -> sampled_profile +val sampled_profile_of_yojson : Yojson.Safe.t -> sampled_profile +val yojson_of_sampled_profile : sampled_profile -> Yojson.Safe.t +val sampled_profile_of_json : string -> sampled_profile +val json_of_sampled_profile : sampled_profile -> string + +module Sampled_profile : sig + type nonrec t = sampled_profile + val create : type_:string -> name:string -> unit:value_unit -> start_value:float -> end_value:float -> samples:int list list -> weights:float list -> unit -> t + val of_yojson : Yojson.Safe.t -> t + val to_yojson : t -> Yojson.Safe.t + val of_json : string -> t + val to_json : t -> string +end + +type frame = { + name: string; + file: string option; + line: int option; + col: int option; +} + +val create_frame : name:string -> ?file:string -> ?line:int -> ?col:int -> unit -> frame +val frame_of_yojson : Yojson.Safe.t -> frame +val yojson_of_frame : frame -> Yojson.Safe.t +val frame_of_json : string -> frame +val json_of_frame : frame -> string + +module Frame : sig + type nonrec t = frame + val create : name:string -> ?file:string -> ?line:int -> ?col:int -> unit -> t + val of_yojson : Yojson.Safe.t -> t + val to_yojson : t -> Yojson.Safe.t + val of_json : string -> t + val to_json : t -> string +end + +(** Data shared between profiles. *) +type profile_shared = { + frames: frame list; +} + +val create_profile_shared : frames:frame list -> unit -> profile_shared +val profile_shared_of_yojson : Yojson.Safe.t -> profile_shared +val yojson_of_profile_shared : profile_shared -> Yojson.Safe.t +val profile_shared_of_json : string -> profile_shared +val json_of_profile_shared : profile_shared -> string + +module Profile_shared : sig + type nonrec t = profile_shared + val create : frames:frame list -> unit -> t + val of_yojson : Yojson.Safe.t -> t + val to_yojson : t -> Yojson.Safe.t + val of_json : string -> t + val to_json : t -> string +end + +type file_format = { + schema: string; + name: string option; + (** + The name of the contained profile group. If omitted, the viewer uses + the filename. + *) + exporter: string option; + (** + The name of the program that exported this profile. Not consumed by + speedscope, but useful for debugging. Recommended format: + [name\@version]. + *) + active_profile_index: int option; + (** + Index into the profiles array to display on load. Defaults to the + first profile if omitted. + *) + profiles: sampled_profile list; (** List of profile definitions. *) + shared: profile_shared; (** Data shared between profiles. *) +} + +val create_file_format : schema:string -> ?name:string -> ?exporter:string -> ?active_profile_index:int -> profiles:sampled_profile list -> shared:profile_shared -> unit -> file_format +val file_format_of_yojson : Yojson.Safe.t -> file_format +val yojson_of_file_format : file_format -> Yojson.Safe.t +val file_format_of_json : string -> file_format +val json_of_file_format : file_format -> string + +module File_format : sig + type nonrec t = file_format + val create : schema:string -> ?name:string -> ?exporter:string -> ?active_profile_index:int -> profiles:sampled_profile list -> shared:profile_shared -> unit -> t + val of_yojson : Yojson.Safe.t -> t + val to_yojson : t -> Yojson.Safe.t + val of_json : string -> t + val to_json : t -> string +end + diff --git a/src/dune b/src/dune index d138a06..61221a3 100755 --- a/src/dune +++ b/src/dune @@ -2,8 +2,6 @@ (name landmark) (public_name landmarks) (no_dynlink) - (flags - (:standard -w +A-30-42-41-40-4-70 -safe-string -strict-sequence)) (foreign_stubs (language c) (names utils)) diff --git a/src/graph.mli b/src/graph.mli index 80521eb..2be35fe 100644 --- a/src/graph.mli +++ b/src/graph.mli @@ -73,7 +73,8 @@ val path_dfs: (bool -> node list -> node -> unit) -> val dfs: (node list -> node -> bool) -> (node list -> node -> unit) -> graph -> unit (** A specialization of [path_dfs] that does not need to read the visited flag. - The returned values of the first function tells whether or not the traversal should continue visiting the children of the current node. *) + The returned values of the first function tells whether or not the + traversal should continue visiting the children of the current node. *) (** {3 Utility functions } *) diff --git a/src/landmark.ml b/src/landmark.ml index 5996026..ddacd11 100644 --- a/src/landmark.ml +++ b/src/landmark.ml @@ -256,6 +256,7 @@ type textual_option = {threshold : float} type profile_format = | JSON | Textual of textual_option + | External of string let profiling_ref = ref false let profile_with_debug = ref false @@ -696,6 +697,38 @@ let stop_profiling () = (** EXPORTING / IMPORTING SLAVE PROFILINGS **) +let warning s = + Printf.eprintf "[LANDMARKS] %s.\n%!" s + +type exporter = out_channel -> Graph.graph -> unit + +let external_exporters : (string, exporter) Hashtbl.t = Hashtbl.create 10 + +let register_exporter format_name exporter = + if Hashtbl.mem external_exporters format_name then + warning (Printf.sprintf + "Multiple registration of an exporter named %S" format_name); + Hashtbl.replace external_exporters format_name exporter + +let get_format_names () = + Hashtbl.fold (fun name _f acc -> name :: acc) + external_exporters + ["json"; "textual"] + |> List.sort String.compare + +let invoke_external_exporter format_name oc graph = + match Hashtbl.find_opt external_exporters format_name with + | None -> + warning ( + Printf.sprintf + "Missing exporter for 'format=%s'. \ + Available formats are: %s" + format_name + (String.concat ", " (get_format_names ())) + ) + | Some exporter -> + exporter oc graph + let array_list_map f l = let size = List.length l in match l with @@ -783,6 +816,8 @@ let exit_hook () = Graph.output ~threshold out cg | Channel out, JSON -> Graph.output_json out cg + | Channel out, External name -> + invoke_external_exporter name out cg | Temporary temp_dir, format -> let tmp_file, oc = Filename.open_temp_file ?temp_dir "profile_at_exit" ".tmp" @@ -792,7 +827,8 @@ let exit_hook () = flush stdout; (match format with | Textual {threshold} -> Graph.output ~threshold oc cg - | JSON -> Graph.output_json oc cg); + | JSON -> Graph.output_json oc cg + | External name -> invoke_external_exporter name oc cg); close_out oc end @@ -810,9 +846,6 @@ let parse_env_options s = let split_trim c s = List.map String.trim (Misc.split c s) in - let warning s = - eprintf "[LANDMARKS] %s.\n%!" s - in let parse_option s = let invalid_for opt given = warning (sprintf @@ -843,7 +876,9 @@ let parse_env_options s = | _ -> format := Textual {threshold = 1.0}; end | [ "format"; "json" ] -> format := JSON; - | [ "format"; unknown ] -> invalid_for "format" unknown + | [ "format"; other ] -> + (* a exporter will have to be registered *) + format := External other | [ "output"; "stderr" ] -> output := Channel stderr | [ "output"; "stdout" ] -> output := Channel stdout | [ "output"; temporary ] when Misc.starts_with ~prefix:"temporary" temporary -> @@ -884,10 +919,13 @@ let parse_env_options s = {debug = !debug; allocated_bytes = !allocated_bytes; sys_time = !sys_time; output = !output; format = !format; recursive = !recursive} -let () = match Sys.getenv "OCAML_LANDMARKS" with +let init () = + match Sys.getenv "OCAML_LANDMARKS" with | exception Not_found -> () | str -> try start_profiling ~profiling_options:(parse_env_options str) () with Exit -> () +let () = init () + external raise : exn -> 'a = "%raise" diff --git a/src/landmark.mli b/src/landmark.mli index 7588fde..9fc1227 100644 --- a/src/landmark.mli +++ b/src/landmark.mli @@ -92,6 +92,9 @@ type textual_option = {threshold : float} type profile_format = | JSON (** Easily parsable export format. *) | Textual of textual_option (** Console friendly output; nodes below the threshold (0.0 <= threshold <= 100.0) are not displayed in the callgraph. *) + | External of string (** Other exporter that must be registered with + {!register_external_exporter} by the time + it's invoked. *) (** The profiling options control the behavior of the landmark infrastructure. *) type profiling_options = { @@ -155,3 +158,14 @@ val pop_profiling_state: unit -> unit external raise : exn -> 'a = "%raise" (** This a redefinition of [Stdlib.raise] to allow generated code to work with -no-stdlib.*) + +(** {3 Runtime initialization} *) + +type exporter = out_channel -> Graph.graph -> unit + +(** Register an extra exporter. + + The exporter to use is specified by the [format] value in the [OCAML_LANDMARKS] + environment variable. +*) +val register_exporter : string -> exporter -> unit diff --git a/tests/basic/test.ml b/tests/basic/test.ml index a818bb7..be1a4ed 100644 --- a/tests/basic/test.ml +++ b/tests/basic/test.ml @@ -6,20 +6,23 @@ let rec fib n = (fun n -> if n <= 1 then 1 else fib (n - 1) + fib (n - 2)) n let () = - let open Landmark in - start_profiling - ~profiling_options:{default_options with format = JSON; debug = true} (); - enter main; + Landmark.start_profiling + ~profiling_options:{ + Landmark.default_options + with format = JSON; + debug = true + } (); + Landmark.enter main; Printf.printf "%d\n%!" (fib 7); - exit main; - if profiling () then begin + Landmark.exit main; + if Landmark.profiling () then begin let open Landmark.Graph in - let cg = export () in + let cg = Landmark.export () in let agg = aggregate_landmarks cg in let all_nodes = nodes agg in print_endline "\nLandmark reached:"; all_nodes |> List.map (fun {name; _} -> name) - |> List.sort compare + |> List.sort String.compare |> List.iter print_endline end diff --git a/tests/js/test.ml b/tests/js/test.ml index 7c59b1e..91531df 100644 --- a/tests/js/test.ml +++ b/tests/js/test.ml @@ -6,15 +6,14 @@ let rec fib n = (fun n -> if n <= 1 then 1 else fib (n - 1) + fib (n - 2)) n let () = - let open Landmark in - start_profiling - ~profiling_options:{default_options with format = JSON} (); - enter main; + Landmark.start_profiling + ~profiling_options:{Landmark.default_options with format = JSON} (); + Landmark.enter main; Printf.printf "%d\n%!" (fib 7); - exit main; - if profiling () then begin + Landmark.exit main; + if Landmark.profiling () then begin let open Landmark.Graph in - let cg = export () in + let cg = Landmark.export () in let agg = aggregate_landmarks cg in let all_nodes = nodes agg in assert ((root cg).time > 0.); diff --git a/tests/speedscope/README.md b/tests/speedscope/README.md new file mode 100644 index 0000000..449c2ac --- /dev/null +++ b/tests/speedscope/README.md @@ -0,0 +1,47 @@ +# Speedscope export example + +This directory contains a minimal hand-instrumented OCaml program that +demonstrates exporting a landmarks profile to the +[Speedscope](https://www.speedscope.app) flame-graph viewer. + +## What the example does + +`example.ml` instruments four functions: + +``` +main +โ”œโ”€โ”€ sort (sort 500 000 integers) +โ””โ”€โ”€ compute + โ””โ”€โ”€ fib (compute fib(33) recursively) +``` + +## Build + +From the repository root: + +``` +dune build +``` + +## Run and export + +``` +OCAML_LANDMARKS="format=speedscope,output=profile.json,time" \ + ./_build/default/tests/speedscope/example.exe +``` + +| `OCAML_LANDMARKS` option | Effect | +|---|---| +| `format=speedscope` | Write Speedscope JSON instead of the default text report | +| `output=profile.json` | Write the profile to this file instead of stderr | +| `time` | Collect wall-clock (`Sys.time`) seconds; without this, weights are in raw CPU cycles | + +## Visualise + +Open [speedscope.app](https://www.speedscope.app) and drag-and-drop the +generated `profile.json`, or load the pre-generated +[profile.json](profile.json) from this directory. + +The "Time Order" view shows functions in the order they were called; the +"Left Heavy" view groups identical stacks, which is most useful for recursive +functions. diff --git a/tests/speedscope/dune b/tests/speedscope/dune new file mode 100644 index 0000000..6a0948b --- /dev/null +++ b/tests/speedscope/dune @@ -0,0 +1,35 @@ +(executable + (name example) + (libraries + landmark + landmarks-speedscope + ) + (preprocess + (pps ppx_landmarks))) + +(executable + (name test) + (libraries + landmark + landmarks-speedscope + ) +) + +(rule + (with-stdout-to test.out + (run ./test.exe))) + +(rule + (alias runtest) + (package landmarks) + (action + (diff test.out.expected test.out))) + +; Check out _build/default/tests/speedscope/profile.speedscope +; and upload it to https://www.speedscope.app/ to view the graph +(rule + (alias runtest) + (package landmarks-speedscope) + (action + (setenv "OCAML_LANDMARKS" "format=speedscope,output=profile.speedscope,time" + (run ./example.exe)))) diff --git a/tests/speedscope/example.ml b/tests/speedscope/example.ml new file mode 100644 index 0000000..1ee4a83 --- /dev/null +++ b/tests/speedscope/example.ml @@ -0,0 +1,50 @@ +(* Example: profiling with Speedscope export using PPX instrumentation. + + Build: + dune build + + Run and write a Speedscope profile: + OCAML_LANDMARKS="format=speedscope,output=profile.json,time" \ + ./_build/default/tests/speedscope/example.exe + + Open profile.json at https://www.speedscope.app to visualise the + flame graph. A pre-generated example is in tests/speedscope/profile.json. + + The call tree below grows and shrinks to form a visible flame shape: + + main + โ”œโ”€โ”€ prepare (sort a list) + โ”‚ โ””โ”€โ”€ make_input (build the list) + โ”œโ”€โ”€ run + โ”‚ โ”œโ”€โ”€ phase_a (fib โ€” deep recursion) + โ”‚ โ””โ”€โ”€ phase_b (fold over a list) + โ””โ”€โ”€ summarise (cheap post-processing) +*) + +let[@landmark] rec fib n = + if n <= 1 then 1 else fib (n - 1) + fib (n - 2) + +let[@landmark] make_input n = + List.init n (fun i -> n - i) + +let[@landmark] prepare n = + List.sort compare (make_input n) + +let[@landmark] phase_a () = ignore (fib 33) + +let[@landmark] phase_b lst = + List.fold_left (fun acc x -> acc + x) 0 lst + +let[@landmark] run lst = + phase_a (); + ignore (phase_b lst) + +let[@landmark] summarise lst = + List.length lst + +let[@landmark] main () = + let lst = prepare 300_000 in + run lst; + ignore (summarise lst) + +let () = main () diff --git a/tests/speedscope/test.ml b/tests/speedscope/test.ml new file mode 100644 index 0000000..f49dcc8 --- /dev/null +++ b/tests/speedscope/test.ml @@ -0,0 +1,22 @@ +open Landmark.Graph + +(* Build a fixed graph with known values so the Speedscope output is + deterministic and can be compared against test.out.expected. *) + +let make_node id kind name location calls time children = + { id; kind; name; landmark_id = name; location; calls; time; + children; sys_time = 0.0; allocated_bytes = 0; + allocated_bytes_major = 0; distrib = Float.Array.create 0 } + +let () = + (* Graph: + root (Root) + โ”œโ”€โ”€ foo time=0.50 + โ””โ”€โ”€ bar time=0.25 + unit = "none" (no sys_time) + *) + let root = make_node 0 Root "ROOT" "" 1 0.75 [1; 2] in + let foo = make_node 1 Normal "foo" "test.ml:10" 5 0.50 [] in + let bar = make_node 2 Normal "bar" "test.ml:20" 3 0.25 [] in + let graph = graph_of_nodes ~label:"test" [root; foo; bar] in + Landmarks_speedscope.exporter stdout graph diff --git a/tests/speedscope/test.out.expected b/tests/speedscope/test.out.expected new file mode 100644 index 0000000..ad29681 --- /dev/null +++ b/tests/speedscope/test.out.expected @@ -0,0 +1,22 @@ +{ + "$schema": "https://www.speedscope.app/file-format-schema.json", + "name": "test", + "exporter": "landmarks", + "profiles": [ + { + "type": "sampled", + "name": "test", + "unit": "none", + "startValue": 0.0, + "endValue": 0.75, + "samples": [ [ 0 ], [ 1 ] ], + "weights": [ 0.5, 0.25 ] + } + ], + "shared": { + "frames": [ + { "name": "foo", "file": "test.ml", "line": 10 }, + { "name": "bar", "file": "test.ml", "line": 20 } + ] + } +}