From 939c6ab4ab71ff102cc0a8f6a94c41f9072cb774 Mon Sep 17 00:00:00 2001 From: Edoardo Paone Date: Thu, 11 Jun 2026 14:04:46 +0200 Subject: [PATCH 01/18] Add dace configuration to compilation fingerprint --- .../runners/dace/workflow/compilation.py | 19 +++++++++++- .../dace_tests/test_dace_compilation.py | 29 +++++++++++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/src/gt4py/next/program_processors/runners/dace/workflow/compilation.py b/src/gt4py/next/program_processors/runners/dace/workflow/compilation.py index b8e18382d8..adf85182c1 100644 --- a/src/gt4py/next/program_processors/runners/dace/workflow/compilation.py +++ b/src/gt4py/next/program_processors/runners/dace/workflow/compilation.py @@ -19,6 +19,7 @@ from gt4py._core import definitions as core_defs, locking from gt4py.next import common, config +from gt4py.next.ffront import stages as ffront_stages from gt4py.next.otf import code_specs, definitions, stages, workflow from gt4py.next.otf.compilation import cache as gtx_cache from gt4py.next.program_processors.runners.dace.workflow import common as gtx_wfdcommon @@ -128,6 +129,13 @@ def __call__(self, **kwargs: Any) -> None: assert result is None +def _get_dace_config_nondefaults() -> dict[str, Any]: + # device type is not relevant for this function, we just need to enter the context + # to get the non-default config values + with gtx_wfdcommon.dace_context(device_type=core_defs.DeviceType.CPU): + return dace.Config._data.nondefaults() + + @dataclasses.dataclass(frozen=True) class DaCeCompiler( workflow.ChainableWorkflowMixin[ @@ -151,6 +159,10 @@ class DaCeCompiler( cmake_build_type: config.CMakeBuildType = dataclasses.field( default_factory=lambda: config.CMAKE_BUILD_TYPE ) + # we store the non-default values of `dace.Config` in order to include it in the stage fingerprint + dace_config_nondefaults: dict[str, Any] = dataclasses.field( + default_factory=_get_dace_config_nondefaults + ) def __call__( self, @@ -160,7 +172,12 @@ def __call__( device_type=self.device_type, cmake_build_type=self.cmake_build_type, ): - sdfg_build_folder = gtx_cache.get_cache_folder(inp, self.cache_lifetime) + # Append a subfolder named after the fingerprint of this compiler instance, so + # that builds with different compiler settings (e.g. non-default dace config) do + # not clash in the same build folder. + sdfg_build_folder = gtx_cache.get_cache_folder( + inp, self.cache_lifetime + ) / ffront_stages.fingerprint_stage(self) sdfg_build_folder.mkdir(parents=True, exist_ok=True) sdfg = dace.SDFG.from_json(inp.program_source.source_code) diff --git a/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace_compilation.py b/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace_compilation.py index 1cbf9d3c2e..7716476cf8 100644 --- a/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace_compilation.py +++ b/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace_compilation.py @@ -147,3 +147,32 @@ def test_compiler_skips_tx_markers_for_non_gpu_device(tmp_path): spy.assert_not_called() assert compiled_sdfg.instrument == _NONE + + +# `CXXFLAGS`, `CUDAFLAGS` and `HIPFLAGS` feed `compiler.cpu.args`, `compiler.cuda.args` +# and `compiler.cuda.hip_args` respectively (see `set_dace_config`). +@pytest.mark.parametrize( + ("device_type", "compiler_flags_env"), + [ + (core_defs.DeviceType.CPU, "CXXFLAGS"), + (core_defs.DeviceType.CUDA, "CUDAFLAGS"), + (core_defs.DeviceType.ROCM, "HIPFLAGS"), + ], +) +def test_compiler_flags_change_build_folder(tmp_path, monkeypatch, device_type, compiler_flags_env): + """Different compiler flags must produce a different build folder. + + The flags are captured in `dace_config_nondefaults`, which is part of the compiler + fingerprint that names the per-compiler build sub-folder. Changing any of them must + therefore land the build in a different sub-folder of the (shared) cache folder. + """ + monkeypatch.delenv(compiler_flags_env, raising=False) + _, sdfg_default = _run_compiler(tmp_path, add_gpu_trace_markers=False, device_type=device_type) + + monkeypatch.setenv(compiler_flags_env, "-O0 -some-custom-flag") + _, sdfg_custom = _run_compiler(tmp_path, add_gpu_trace_markers=False, device_type=device_type) + + # Only the fingerprint sub-folder changes; the shared cache folder (`tmp_path`) does not. + assert sdfg_default.build_folder.parent == tmp_path + assert sdfg_custom.build_folder.parent == tmp_path + assert sdfg_default.build_folder != sdfg_custom.build_folder From 824b24376d7e5443aa34ffcdee9a993e36fcffb1 Mon Sep 17 00:00:00 2001 From: Edoardo Paone Date: Thu, 11 Jun 2026 14:45:34 +0200 Subject: [PATCH 02/18] remove mock --- .../runners_tests/dace_tests/test_dace_compilation.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace_compilation.py b/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace_compilation.py index 7716476cf8..0dd0abfd26 100644 --- a/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace_compilation.py +++ b/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace_compilation.py @@ -92,11 +92,6 @@ def _run_compiler( ) as spy, mock.patch.object(dace.SDFG, "compile", autospec=True) as compile_mock, mock.patch.object(dace_wf_compilation, "CompiledDaceProgram"), - mock.patch.object( - dace_wf_compilation.gtx_wfdcommon, - "dace_context", - lambda **kwargs: contextlib.nullcontext(), - ), mock.patch.object(dace_wf_compilation.gtx_cache, "get_cache_folder", return_value=tmp_path), mock.patch.object( dace_wf_compilation.locking, "lock", lambda *args, **kwargs: contextlib.nullcontext() From d8ea08c418de2d260fec5eaa268648b92d6275aa Mon Sep 17 00:00:00 2001 From: Edoardo Paone Date: Mon, 15 Jun 2026 15:46:49 +0200 Subject: [PATCH 03/18] add context fingerprint to get_cache_folder --- .../otf/compilation/build_systems/cmake.py | 2 +- .../compilation/build_systems/compiledb.py | 2 +- src/gt4py/next/otf/compilation/cache.py | 37 +++----------- src/gt4py/next/otf/compilation/compiler.py | 2 +- .../runners/dace/workflow/compilation.py | 7 ++- .../dace_tests/test_dace_compilation.py | 49 +++++++++++++------ 6 files changed, 47 insertions(+), 52 deletions(-) diff --git a/src/gt4py/next/otf/compilation/build_systems/cmake.py b/src/gt4py/next/otf/compilation/build_systems/cmake.py index 1b79cad6e4..af6446ae1f 100644 --- a/src/gt4py/next/otf/compilation/build_systems/cmake.py +++ b/src/gt4py/next/otf/compilation/build_systems/cmake.py @@ -100,7 +100,7 @@ def __call__( ) return CMakeProject( - root_path=cache.get_cache_folder(source, cache_lifetime), + root_path=cache.get_cache_folder(source, cache_lifetime, self), source_files={ header_name: source.program_source.source_code, bindings_name: source.binding_source.source_code, diff --git a/src/gt4py/next/otf/compilation/build_systems/compiledb.py b/src/gt4py/next/otf/compilation/build_systems/compiledb.py index 347b0e25e9..44a0bc101b 100644 --- a/src/gt4py/next/otf/compilation/build_systems/compiledb.py +++ b/src/gt4py/next/otf/compilation/build_systems/compiledb.py @@ -72,7 +72,7 @@ def __call__( ) return CompiledbProject( - root_path=cache.get_cache_folder(source, cache_lifetime), + root_path=cache.get_cache_folder(source, cache_lifetime, self), program_name=name, source_files={ header_name: source.program_source.source_code, diff --git a/src/gt4py/next/otf/compilation/cache.py b/src/gt4py/next/otf/compilation/cache.py index ebc28f10a5..4a7fe7b25f 100644 --- a/src/gt4py/next/otf/compilation/cache.py +++ b/src/gt4py/next/otf/compilation/cache.py @@ -8,13 +8,12 @@ """Caching for compiled backend artifacts.""" -import hashlib import pathlib import tempfile +from typing import Any -from gt4py.next import config +from gt4py.next import config, utils from gt4py.next.otf import stages -from gt4py.next.otf.binding import interface _session_cache_dir = tempfile.TemporaryDirectory(prefix="gt4py_session_") @@ -22,30 +21,8 @@ _session_cache_dir_path = pathlib.Path(_session_cache_dir.name) -def _serialize_param(parameter: interface.Parameter) -> str: - return f"{parameter.name}: {parameter.type_!s}" - - -def _serialize_library_dependency(dependency: interface.LibraryDependency) -> str: - return f"{dependency.name}/{dependency.version}" - - -def _serialize_source(source: stages.ProgramSource) -> str: - parameters = [_serialize_param(param) for param in source.entry_point.parameters] - dependencies = [_serialize_library_dependency(dep) for dep in source.library_deps] - return f"""\ - language: {source.code_spec} - name: {source.entry_point.name} - params: {", ".join(parameters)} - deps: {", ".join(dependencies)} - src: {source.source_code} - """ - - -def _cache_folder_name(source: stages.ProgramSource) -> str: - serialized = _serialize_source(source) - fingerprint = hashlib.sha256(serialized.encode(encoding="utf-8")) - fingerprint_hex_str = fingerprint.hexdigest() +def _cache_folder_name(source: stages.ProgramSource, *ctx: Any) -> str: + fingerprint_hex_str = utils.stable_fingerprinter((source, *ctx)) return source.entry_point.name + "_" + fingerprint_hex_str @@ -61,7 +38,9 @@ def get_cache_base_path(lifetime: config.BuildCacheLifetime) -> pathlib.Path: def get_cache_folder( - compilable_source: stages.CompilableProject, lifetime: config.BuildCacheLifetime + compilable_source: stages.CompilableProject, + lifetime: config.BuildCacheLifetime, + *ctx: Any, ) -> pathlib.Path: """ Construct the path to where the build system project artifact of a compilable source should be cached. @@ -69,7 +48,7 @@ def get_cache_folder( The returned path points to an existing folder in all cases. """ # TODO(ricoh): make dependent on binding source too or add alternative that depends on bindings - folder_name = _cache_folder_name(compilable_source.program_source) + folder_name = _cache_folder_name(compilable_source.program_source, ctx) base_path = get_cache_base_path(lifetime) base_path.mkdir(exist_ok=True) diff --git a/src/gt4py/next/otf/compilation/compiler.py b/src/gt4py/next/otf/compilation/compiler.py index 3748d95192..652b8909a8 100644 --- a/src/gt4py/next/otf/compilation/compiler.py +++ b/src/gt4py/next/otf/compilation/compiler.py @@ -66,7 +66,7 @@ def __call__( self, inp: stages.CompilableProject[CPPLikeCodeSpecT, code_specs.PythonCodeSpec], ) -> stages.ExecutableProgram: - src_dir = cache.get_cache_folder(inp, self.cache_lifetime) + src_dir = cache.get_cache_folder(inp, self.cache_lifetime, self) # If we are compiling the same program at the same time (e.g. multiple MPI ranks), # we need to make sure that only one of them accesses the same build directory for compilation. diff --git a/src/gt4py/next/program_processors/runners/dace/workflow/compilation.py b/src/gt4py/next/program_processors/runners/dace/workflow/compilation.py index adf85182c1..241c84aadb 100644 --- a/src/gt4py/next/program_processors/runners/dace/workflow/compilation.py +++ b/src/gt4py/next/program_processors/runners/dace/workflow/compilation.py @@ -19,7 +19,6 @@ from gt4py._core import definitions as core_defs, locking from gt4py.next import common, config -from gt4py.next.ffront import stages as ffront_stages from gt4py.next.otf import code_specs, definitions, stages, workflow from gt4py.next.otf.compilation import cache as gtx_cache from gt4py.next.program_processors.runners.dace.workflow import common as gtx_wfdcommon @@ -171,13 +170,13 @@ def __call__( with gtx_wfdcommon.dace_context( device_type=self.device_type, cmake_build_type=self.cmake_build_type, - ): + ) as dace_config: # Append a subfolder named after the fingerprint of this compiler instance, so # that builds with different compiler settings (e.g. non-default dace config) do # not clash in the same build folder. sdfg_build_folder = gtx_cache.get_cache_folder( - inp, self.cache_lifetime - ) / ffront_stages.fingerprint_stage(self) + inp, self.cache_lifetime, self, dace_config + ) sdfg_build_folder.mkdir(parents=True, exist_ok=True) sdfg = dace.SDFG.from_json(inp.program_source.source_code) diff --git a/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace_compilation.py b/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace_compilation.py index 0dd0abfd26..2fe4bfdc8b 100644 --- a/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace_compilation.py +++ b/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace_compilation.py @@ -19,6 +19,8 @@ from gt4py._core import definitions as core_defs from gt4py.next import config +from gt4py.next.otf import code_specs, stages +from gt4py.next.otf.binding import interface from gt4py.next.program_processors.runners.dace.workflow import compilation as dace_wf_compilation @@ -66,16 +68,31 @@ def _make_sdfg_with_gpu_map() -> dace.SDFG: return sdfg +def _make_compilable_project() -> stages.CompilableProject: + """A real `CompilableProject` wrapping the GPU SDFG, as the dace translation step emits. + + Using a real project (rather than a `MagicMock`) lets the unmocked `get_cache_folder` + fingerprint the program source for the build-folder name. + """ + program_source = stages.ProgramSource( + entry_point=interface.Function("gpu_program", parameters=()), + source_code=_make_sdfg_with_gpu_map().to_json(), + library_deps=(), + code_spec=code_specs.SDFGCodeSpec(), + ) + binding_source = stages.BindingSource(source_code="", library_deps=()) + return stages.CompilableProject(program_source=program_source, binding_source=binding_source) + + def _run_compiler( - tmp_path, *, add_gpu_trace_markers: bool, device_type: core_defs.DeviceType + *, add_gpu_trace_markers: bool, device_type: core_defs.DeviceType ) -> tuple[mock.MagicMock, dace.SDFG]: """Run `DaCeCompiler` on a GPU SDFG with compilation stubbed out. Returns the spy wrapping `_add_tx_markers` and the SDFG that was handed to ``SDFG.compile`` (i.e. the SDFG after any marker processing). """ - inp = mock.MagicMock() - inp.program_source.source_code = _make_sdfg_with_gpu_map().to_json() + inp = _make_compilable_project() compiler = dace_wf_compilation.DaCeCompiler( bind_func_name="bind", @@ -92,7 +109,6 @@ def _run_compiler( ) as spy, mock.patch.object(dace.SDFG, "compile", autospec=True) as compile_mock, mock.patch.object(dace_wf_compilation, "CompiledDaceProgram"), - mock.patch.object(dace_wf_compilation.gtx_cache, "get_cache_folder", return_value=tmp_path), mock.patch.object( dace_wf_compilation.locking, "lock", lambda *args, **kwargs: contextlib.nullcontext() ), @@ -111,7 +127,7 @@ def _run_compiler( def test_compiler_applies_tx_markers_for_gpu(tmp_path): """On a CUDA target with the flag on, the compiler applies the markers to the SDFG.""" spy, compiled_sdfg = _run_compiler( - tmp_path, add_gpu_trace_markers=True, device_type=core_defs.DeviceType.CUDA + add_gpu_trace_markers=True, device_type=core_defs.DeviceType.CUDA ) spy.assert_called_once() @@ -127,7 +143,7 @@ def test_compiler_applies_tx_markers_for_gpu(tmp_path): def test_compiler_skips_tx_markers_when_flag_disabled(tmp_path): """With the flag off the compiler must not touch instrumentation, even on CUDA.""" spy, compiled_sdfg = _run_compiler( - tmp_path, add_gpu_trace_markers=False, device_type=core_defs.DeviceType.CUDA + add_gpu_trace_markers=False, device_type=core_defs.DeviceType.CUDA ) spy.assert_not_called() @@ -137,7 +153,7 @@ def test_compiler_skips_tx_markers_when_flag_disabled(tmp_path): def test_compiler_skips_tx_markers_for_non_gpu_device(tmp_path): """On a CPU target the markers must not be applied even with the flag on.""" spy, compiled_sdfg = _run_compiler( - tmp_path, add_gpu_trace_markers=True, device_type=core_defs.DeviceType.CPU + add_gpu_trace_markers=True, device_type=core_defs.DeviceType.CPU ) spy.assert_not_called() @@ -154,20 +170,21 @@ def test_compiler_skips_tx_markers_for_non_gpu_device(tmp_path): (core_defs.DeviceType.ROCM, "HIPFLAGS"), ], ) -def test_compiler_flags_change_build_folder(tmp_path, monkeypatch, device_type, compiler_flags_env): +def test_compiler_flags_change_build_folder(monkeypatch, device_type, compiler_flags_env): """Different compiler flags must produce a different build folder. - The flags are captured in `dace_config_nondefaults`, which is part of the compiler - fingerprint that names the per-compiler build sub-folder. Changing any of them must - therefore land the build in a different sub-folder of the (shared) cache folder. + The flags are captured in `dace_config_nondefaults`, which is part of the compiler's + fingerprint. The compiler instance is passed to `get_cache_folder` as the `ctx`, whose + fingerprint (together with the program source) is hashed into the build-folder name. + Changing any flag therefore changes that fingerprint and lands the build in a different + folder of the build cache. """ monkeypatch.delenv(compiler_flags_env, raising=False) - _, sdfg_default = _run_compiler(tmp_path, add_gpu_trace_markers=False, device_type=device_type) + _, sdfg_default = _run_compiler(add_gpu_trace_markers=False, device_type=device_type) monkeypatch.setenv(compiler_flags_env, "-O0 -some-custom-flag") - _, sdfg_custom = _run_compiler(tmp_path, add_gpu_trace_markers=False, device_type=device_type) + _, sdfg_custom = _run_compiler(add_gpu_trace_markers=False, device_type=device_type) - # Only the fingerprint sub-folder changes; the shared cache folder (`tmp_path`) does not. - assert sdfg_default.build_folder.parent == tmp_path - assert sdfg_custom.build_folder.parent == tmp_path + # The differing `dace_config_nondefaults` make the two compilers fingerprint differently, + # so `get_cache_folder` names two distinct build folders. assert sdfg_default.build_folder != sdfg_custom.build_folder From b7ab4d0840da6190806b989a1558f7bc10ebf63d Mon Sep 17 00:00:00 2001 From: Edoardo Paone Date: Fri, 19 Jun 2026 12:58:26 +0200 Subject: [PATCH 04/18] keep ctx in cache folder onloy for dace backend --- src/gt4py/next/otf/compilation/build_systems/cmake.py | 2 +- .../next/otf/compilation/build_systems/compiledb.py | 2 +- src/gt4py/next/otf/compilation/cache.py | 11 +++++------ src/gt4py/next/otf/compilation/compiler.py | 2 +- .../runners/dace/workflow/compilation.py | 6 ++++-- 5 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/gt4py/next/otf/compilation/build_systems/cmake.py b/src/gt4py/next/otf/compilation/build_systems/cmake.py index af6446ae1f..1b79cad6e4 100644 --- a/src/gt4py/next/otf/compilation/build_systems/cmake.py +++ b/src/gt4py/next/otf/compilation/build_systems/cmake.py @@ -100,7 +100,7 @@ def __call__( ) return CMakeProject( - root_path=cache.get_cache_folder(source, cache_lifetime, self), + root_path=cache.get_cache_folder(source, cache_lifetime), source_files={ header_name: source.program_source.source_code, bindings_name: source.binding_source.source_code, diff --git a/src/gt4py/next/otf/compilation/build_systems/compiledb.py b/src/gt4py/next/otf/compilation/build_systems/compiledb.py index 44a0bc101b..347b0e25e9 100644 --- a/src/gt4py/next/otf/compilation/build_systems/compiledb.py +++ b/src/gt4py/next/otf/compilation/build_systems/compiledb.py @@ -72,7 +72,7 @@ def __call__( ) return CompiledbProject( - root_path=cache.get_cache_folder(source, cache_lifetime, self), + root_path=cache.get_cache_folder(source, cache_lifetime), program_name=name, source_files={ header_name: source.program_source.source_code, diff --git a/src/gt4py/next/otf/compilation/cache.py b/src/gt4py/next/otf/compilation/cache.py index 4a7fe7b25f..887a0a9a35 100644 --- a/src/gt4py/next/otf/compilation/cache.py +++ b/src/gt4py/next/otf/compilation/cache.py @@ -10,9 +10,8 @@ import pathlib import tempfile -from typing import Any -from gt4py.next import config, utils +from gt4py.next import config, fingerprinting from gt4py.next.otf import stages @@ -21,8 +20,8 @@ _session_cache_dir_path = pathlib.Path(_session_cache_dir.name) -def _cache_folder_name(source: stages.ProgramSource, *ctx: Any) -> str: - fingerprint_hex_str = utils.stable_fingerprinter((source, *ctx)) +def _cache_folder_name(source: stages.ProgramSource, ctx_fingerprint: str | None) -> str: + fingerprint_hex_str = fingerprinting.strict_fingerprinter((source, ctx_fingerprint)) return source.entry_point.name + "_" + fingerprint_hex_str @@ -40,7 +39,7 @@ def get_cache_base_path(lifetime: config.BuildCacheLifetime) -> pathlib.Path: def get_cache_folder( compilable_source: stages.CompilableProject, lifetime: config.BuildCacheLifetime, - *ctx: Any, + ctx_fingerprint: str | None = None, ) -> pathlib.Path: """ Construct the path to where the build system project artifact of a compilable source should be cached. @@ -48,7 +47,7 @@ def get_cache_folder( The returned path points to an existing folder in all cases. """ # TODO(ricoh): make dependent on binding source too or add alternative that depends on bindings - folder_name = _cache_folder_name(compilable_source.program_source, ctx) + folder_name = _cache_folder_name(compilable_source.program_source, ctx_fingerprint) base_path = get_cache_base_path(lifetime) base_path.mkdir(exist_ok=True) diff --git a/src/gt4py/next/otf/compilation/compiler.py b/src/gt4py/next/otf/compilation/compiler.py index 652b8909a8..3748d95192 100644 --- a/src/gt4py/next/otf/compilation/compiler.py +++ b/src/gt4py/next/otf/compilation/compiler.py @@ -66,7 +66,7 @@ def __call__( self, inp: stages.CompilableProject[CPPLikeCodeSpecT, code_specs.PythonCodeSpec], ) -> stages.ExecutableProgram: - src_dir = cache.get_cache_folder(inp, self.cache_lifetime, self) + src_dir = cache.get_cache_folder(inp, self.cache_lifetime) # If we are compiling the same program at the same time (e.g. multiple MPI ranks), # we need to make sure that only one of them accesses the same build directory for compilation. diff --git a/src/gt4py/next/program_processors/runners/dace/workflow/compilation.py b/src/gt4py/next/program_processors/runners/dace/workflow/compilation.py index 241c84aadb..3b595f772e 100644 --- a/src/gt4py/next/program_processors/runners/dace/workflow/compilation.py +++ b/src/gt4py/next/program_processors/runners/dace/workflow/compilation.py @@ -18,7 +18,7 @@ import factory from gt4py._core import definitions as core_defs, locking -from gt4py.next import common, config +from gt4py.next import common, config, fingerprinting from gt4py.next.otf import code_specs, definitions, stages, workflow from gt4py.next.otf.compilation import cache as gtx_cache from gt4py.next.program_processors.runners.dace.workflow import common as gtx_wfdcommon @@ -175,7 +175,9 @@ def __call__( # that builds with different compiler settings (e.g. non-default dace config) do # not clash in the same build folder. sdfg_build_folder = gtx_cache.get_cache_folder( - inp, self.cache_lifetime, self, dace_config + inp, + self.cache_lifetime, + fingerprinting.strict_fingerprinter(dace_config), ) sdfg_build_folder.mkdir(parents=True, exist_ok=True) From d98439b0bb87affcb0e0e4be6acb67357ec90418 Mon Sep 17 00:00:00 2001 From: Edoardo Paone Date: Fri, 19 Jun 2026 15:18:39 +0200 Subject: [PATCH 05/18] edit comment --- .../program_processors/runners/dace/workflow/compilation.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/gt4py/next/program_processors/runners/dace/workflow/compilation.py b/src/gt4py/next/program_processors/runners/dace/workflow/compilation.py index 3b595f772e..cce77851f1 100644 --- a/src/gt4py/next/program_processors/runners/dace/workflow/compilation.py +++ b/src/gt4py/next/program_processors/runners/dace/workflow/compilation.py @@ -171,9 +171,8 @@ def __call__( device_type=self.device_type, cmake_build_type=self.cmake_build_type, ) as dace_config: - # Append a subfolder named after the fingerprint of this compiler instance, so - # that builds with different compiler settings (e.g. non-default dace config) do - # not clash in the same build folder. + # We use the fingeprint of the dace configuration in the cache key to ensure + # that the SDFG will be rebuilt if the user changes the configuration. sdfg_build_folder = gtx_cache.get_cache_folder( inp, self.cache_lifetime, From 2d14950533c425e51adc6c850bce61c41abd4f1b Mon Sep 17 00:00:00 2001 From: Edoardo Paone Date: Mon, 22 Jun 2026 10:07:30 +0200 Subject: [PATCH 06/18] edit --- .../runners/dace/workflow/compilation.py | 26 +++++++++---------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/src/gt4py/next/program_processors/runners/dace/workflow/compilation.py b/src/gt4py/next/program_processors/runners/dace/workflow/compilation.py index cce77851f1..3d2eee2146 100644 --- a/src/gt4py/next/program_processors/runners/dace/workflow/compilation.py +++ b/src/gt4py/next/program_processors/runners/dace/workflow/compilation.py @@ -128,13 +128,6 @@ def __call__(self, **kwargs: Any) -> None: assert result is None -def _get_dace_config_nondefaults() -> dict[str, Any]: - # device type is not relevant for this function, we just need to enter the context - # to get the non-default config values - with gtx_wfdcommon.dace_context(device_type=core_defs.DeviceType.CPU): - return dace.Config._data.nondefaults() - - @dataclasses.dataclass(frozen=True) class DaCeCompiler( workflow.ChainableWorkflowMixin[ @@ -159,9 +152,14 @@ class DaCeCompiler( default_factory=lambda: config.CMAKE_BUILD_TYPE ) # we store the non-default values of `dace.Config` in order to include it in the stage fingerprint - dace_config_nondefaults: dict[str, Any] = dataclasses.field( - default_factory=_get_dace_config_nondefaults - ) + dace_config_nondefaults: dict[str, Any] = dataclasses.field(init=False) + + def __post_init__(self) -> None: + with gtx_wfdcommon.dace_context( + device_type=self.device_type, + cmake_build_type=self.cmake_build_type, + ): + object.__setattr__(self, "dace_config_nondefaults", dace.Config._data.nondefaults()) def __call__( self, @@ -170,13 +168,13 @@ def __call__( with gtx_wfdcommon.dace_context( device_type=self.device_type, cmake_build_type=self.cmake_build_type, - ) as dace_config: - # We use the fingeprint of the dace configuration in the cache key to ensure - # that the SDFG will be rebuilt if the user changes the configuration. + ): + # We use the fingeprint of the compilation stage to ensure that the SDFG + # will be rebuilt if the user changes the backend configuration. sdfg_build_folder = gtx_cache.get_cache_folder( inp, self.cache_lifetime, - fingerprinting.strict_fingerprinter(dace_config), + fingerprinting.strict_fingerprinter(self), ) sdfg_build_folder.mkdir(parents=True, exist_ok=True) From 67e2e7d66afd76cd97ccba46ca75b3492e49a2d0 Mon Sep 17 00:00:00 2001 From: Edoardo Paone Date: Mon, 22 Jun 2026 16:17:01 +0200 Subject: [PATCH 07/18] edits for metrics level --- .../program_processors/runners/dace/workflow/common.py | 8 ++++++++ .../runners/dace/workflow/decoration.py | 8 +++++--- .../runners/dace/workflow/translation.py | 4 +++- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/gt4py/next/program_processors/runners/dace/workflow/common.py b/src/gt4py/next/program_processors/runners/dace/workflow/common.py index 6ef363d924..738caffc13 100644 --- a/src/gt4py/next/program_processors/runners/dace/workflow/common.py +++ b/src/gt4py/next/program_processors/runners/dace/workflow/common.py @@ -20,10 +20,18 @@ """Name of SDFG argument to input the GT4Py metrics level.""" +SDFG_ARG_METRIC_LEVEL_DTYPE: Final[dace.dtypes.typeclass] = dace.int32 +"""DaCe datatype of `SDFG_ARG_METRIC_LEVEL` argument.""" + + SDFG_ARG_METRIC_COMPUTE_TIME: Final[str] = "gt_compute_time" """Name of SDFG argument to return the total compute time to GT4Py.""" +SDFG_ARG_METRIC_COMPUTE_TIME_DTYPE: Final[dace.dtypes.typeclass] = dace.float64 +"""DaCe datatype of `SDFG_ARG_METRIC_COMPUTE_TIME` argument.""" + + def set_dace_config( device_type: core_defs.DeviceType, cmake_build_type: Optional[gtx_config.CMakeBuildType] = None, diff --git a/src/gt4py/next/program_processors/runners/dace/workflow/decoration.py b/src/gt4py/next/program_processors/runners/dace/workflow/decoration.py index 103e7af33b..e0bcfec6b6 100644 --- a/src/gt4py/next/program_processors/runners/dace/workflow/decoration.py +++ b/src/gt4py/next/program_processors/runners/dace/workflow/decoration.py @@ -14,7 +14,7 @@ import numpy as np from gt4py._core import definitions as core_defs -from gt4py.next import common as gtx_common, config, utils as gtx_utils +from gt4py.next import common as gtx_common, utils as gtx_utils from gt4py.next.instrumentation import metrics from gt4py.next.otf import stages from gt4py.next.program_processors.runners.dace import sdfg_callable @@ -30,7 +30,9 @@ def convert_args( ) -> stages.ExecutableProgram: # Retieve metrics level from GT4Py environment variable. collect_time = metrics.is_level_enabled(metrics.PERFORMANCE) - collect_time_arg = np.array([1], dtype=np.float64) + collect_time_arg = np.array( + [1], dtype=gtx_wfdcommon.SDFG_ARG_METRIC_COMPUTE_TIME_DTYPE.as_numpy_dtype() + ) # We use the callback function provided by the compiled program to update the SDFG arglist. update_sdfg_call_args = functools.partial( fun.update_sdfg_ctype_arglist, device, fun.sdfg_argtypes @@ -64,7 +66,7 @@ def decorated_program( filter_args=False, ) this_call_args |= { - gtx_wfdcommon.SDFG_ARG_METRIC_LEVEL: config.COLLECT_METRICS_LEVEL, + gtx_wfdcommon.SDFG_ARG_METRIC_LEVEL: metrics.get_current_level(), gtx_wfdcommon.SDFG_ARG_METRIC_COMPUTE_TIME: collect_time_arg, } fun.construct_arguments(**this_call_args) diff --git a/src/gt4py/next/program_processors/runners/dace/workflow/translation.py b/src/gt4py/next/program_processors/runners/dace/workflow/translation.py index 5c8e0cc260..8beecdd7e3 100644 --- a/src/gt4py/next/program_processors/runners/dace/workflow/translation.py +++ b/src/gt4py/next/program_processors/runners/dace/workflow/translation.py @@ -173,7 +173,9 @@ def add_instrumentation(sdfg: dace.SDFG, gpu: bool) -> None: """ output, _ = sdfg.add_array(gtx_wfdcommon.SDFG_ARG_METRIC_COMPUTE_TIME, [1], dace.float64) start_time, _ = sdfg.add_scalar("gt_start_time", dace.int64, transient=True) - metrics_level = sdfg.add_symbol(gtx_wfdcommon.SDFG_ARG_METRIC_LEVEL, dace.int32) + metrics_level = sdfg.add_symbol( + gtx_wfdcommon.SDFG_ARG_METRIC_LEVEL, gtx_wfdcommon.SDFG_ARG_METRIC_LEVEL_DTYPE + ) #### 1. Synchronize the CUDA device, in order to wait for kernels completion. # Even when the target device is GPU, it can happen that dace emits code without From 73010875a477fff280096c1f992145db0daab9b1 Mon Sep 17 00:00:00 2001 From: Edoardo Paone Date: Mon, 22 Jun 2026 16:26:57 +0200 Subject: [PATCH 08/18] edit comments --- src/gt4py/next/otf/compilation/cache.py | 2 ++ .../program_processors/runners/dace/workflow/compilation.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/gt4py/next/otf/compilation/cache.py b/src/gt4py/next/otf/compilation/cache.py index 887a0a9a35..e836e1365e 100644 --- a/src/gt4py/next/otf/compilation/cache.py +++ b/src/gt4py/next/otf/compilation/cache.py @@ -44,6 +44,8 @@ def get_cache_folder( """ Construct the path to where the build system project artifact of a compilable source should be cached. + An optional fingerprint can be provided to distinguish between different contexts + that may produce different artifacts for the same compilable source. The returned path points to an existing folder in all cases. """ # TODO(ricoh): make dependent on binding source too or add alternative that depends on bindings diff --git a/src/gt4py/next/program_processors/runners/dace/workflow/compilation.py b/src/gt4py/next/program_processors/runners/dace/workflow/compilation.py index 3d2eee2146..db559390ea 100644 --- a/src/gt4py/next/program_processors/runners/dace/workflow/compilation.py +++ b/src/gt4py/next/program_processors/runners/dace/workflow/compilation.py @@ -169,7 +169,7 @@ def __call__( device_type=self.device_type, cmake_build_type=self.cmake_build_type, ): - # We use the fingeprint of the compilation stage to ensure that the SDFG + # We use the fingerprint of the compilation stage to ensure that the SDFG # will be rebuilt if the user changes the backend configuration. sdfg_build_folder = gtx_cache.get_cache_folder( inp, From dedced8de5beceac8f2f92fae1aa8d235b5bfa55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Enrique=20Gonz=C3=A1lez=20Paredes?= Date: Fri, 26 Jun 2026 08:55:51 +0200 Subject: [PATCH 09/18] refactor[next]: adapt codebase to ExtensionSource rename and cache-folder API Propagate the CompilableProject -> ExtensionSource rename and the new get_cache_folder signature (build_context_id) through docstrings, comments, user docs and tests. Also fix compiledb's prototype-source mismatch: the locate and create paths must share one ExtensionSource so the new folder naming (binding source folded into slug + fingerprint) resolves to the same cache folder. --- docs/user/next/advanced/HackTheToolchain.md | 2 +- src/gt4py/next/otf/binding/nanobind.py | 4 ++-- .../otf/compilation/build_systems/cmake.py | 4 ++-- .../compilation/build_systems/compiledb.py | 22 ++++++++++--------- src/gt4py/next/otf/compilation/cache.py | 22 +++++++++---------- src/gt4py/next/otf/compilation/compiler.py | 8 +++---- src/gt4py/next/otf/definitions.py | 14 +++++------- src/gt4py/next/otf/recipes.py | 4 ++-- src/gt4py/next/otf/stages.py | 5 ++--- .../runners/dace/workflow/bindings.py | 4 ++-- .../runners/dace/workflow/compilation.py | 8 +++---- .../next/program_processors/runners/gtfn.py | 4 +--- .../build_systems_tests/conftest.py | 8 +++---- .../build_systems_tests/test_cmake.py | 4 ++-- .../build_systems_tests/test_compiledb.py | 8 +++---- .../dace_tests/test_dace_bindings.py | 6 ++--- .../dace_tests/test_dace_compilation.py | 19 ++++++++-------- 17 files changed, 70 insertions(+), 76 deletions(-) diff --git a/docs/user/next/advanced/HackTheToolchain.md b/docs/user/next/advanced/HackTheToolchain.md index 785cc0b24d..74cf66e1da 100644 --- a/docs/user/next/advanced/HackTheToolchain.md +++ b/docs/user/next/advanced/HackTheToolchain.md @@ -59,7 +59,7 @@ class PureCpp2WorkflowFactory(gtx.program_processors.runners.gtfn.GTFNCompileWor translation: workflow.Workflow[ gtx.otf.definitions.CompilableProgramDef, gtx.otf.stages.ProgramSource ] = MyCodeGen() - bindings: workflow.Workflow[gtx.otf.stages.ProgramSource, gtx.otf.stages.CompilableProject] = ( + bindings: workflow.Workflow[gtx.otf.stages.ProgramSource, gtx.otf.stages.ExtensionSource] = ( Cpp2BindingsGen() ) diff --git a/src/gt4py/next/otf/binding/nanobind.py b/src/gt4py/next/otf/binding/nanobind.py index b92355a85f..d4eda86e44 100644 --- a/src/gt4py/next/otf/binding/nanobind.py +++ b/src/gt4py/next/otf/binding/nanobind.py @@ -307,5 +307,5 @@ def create_bindings( def bind_source( inp: stages.ProgramSource[CodeSpecT], -) -> stages.CompilableProject[CodeSpecT, code_specs.PythonCodeSpec]: - return stages.CompilableProject(program_source=inp, binding_source=create_bindings(inp)) +) -> stages.ExtensionSource[CodeSpecT, code_specs.PythonCodeSpec]: + return stages.ExtensionSource(program_source=inp, binding_source=create_bindings(inp)) diff --git a/src/gt4py/next/otf/compilation/build_systems/cmake.py b/src/gt4py/next/otf/compilation/build_systems/cmake.py index 1b79cad6e4..9942de6fa8 100644 --- a/src/gt4py/next/otf/compilation/build_systems/cmake.py +++ b/src/gt4py/next/otf/compilation/build_systems/cmake.py @@ -66,7 +66,7 @@ def get_cmake_device_arch_option() -> str: class CMakeFactory( compiler.BuildSystemProjectGenerator[CPPLikeCodeSpecT, code_specs.PythonCodeSpec] ): - """Create a CMakeProject from a ``CompilableSource`` stage object with given CMake settings.""" + """Create a CMakeProject from an ``ExtensionSource`` stage object with given CMake settings.""" cmake_generator_name: str = "Ninja" cmake_build_type: config.CMakeBuildType = config.CMakeBuildType.DEBUG @@ -74,7 +74,7 @@ class CMakeFactory( def __call__( self, - source: stages.CompilableProject[CPPLikeCodeSpecT, code_specs.PythonCodeSpec], + source: stages.ExtensionSource[CPPLikeCodeSpecT, code_specs.PythonCodeSpec], cache_lifetime: config.BuildCacheLifetime, ) -> CMakeProject: if not source.binding_source: diff --git a/src/gt4py/next/otf/compilation/build_systems/compiledb.py b/src/gt4py/next/otf/compilation/build_systems/compiledb.py index 347b0e25e9..08c379af08 100644 --- a/src/gt4py/next/otf/compilation/build_systems/compiledb.py +++ b/src/gt4py/next/otf/compilation/build_systems/compiledb.py @@ -32,7 +32,7 @@ class CompiledbFactory( compiler.BuildSystemProjectGenerator[CPPLikeCodeSpecT, code_specs.PythonCodeSpec] ): """ - Create a CompiledbProject from a ``CompilableSource`` stage object with given CMake settings. + Create a CompiledbProject from an ``ExtensionSource`` stage object with given CMake settings. Use CMake to generate a compiledb with the required sequence of build commands. Generate a compiledb only if there isn't one for the given combination of cmake configuration @@ -45,7 +45,7 @@ class CompiledbFactory( def __call__( self, - source: stages.CompilableProject[CPPLikeCodeSpecT, code_specs.PythonCodeSpec], + source: stages.ExtensionSource[CPPLikeCodeSpecT, code_specs.PythonCodeSpec], cache_lifetime: config.BuildCacheLifetime, ) -> CompiledbProject: if not source.binding_source: @@ -264,16 +264,19 @@ def _cc_get_compiledb( cmake_flags: list[str], cache_lifetime: config.BuildCacheLifetime, ) -> pathlib.Path: - cache_path = cache.get_cache_folder( - stages.CompilableProject(prototype_program_source, None), cache_lifetime + # Use the same prototype source (with empty bindings) for both locating and creating the + # compiledb, so `get_cache_folder` names the same folder in either path. + prototype_source: stages.ExtensionSource = stages.ExtensionSource( + prototype_program_source, stages.BindingSource(source_code="", library_deps=()) ) + cache_path = cache.get_cache_folder(prototype_source, cache_lifetime) # In a multi-threaded environment, multiple threads may try to create the compiledb at the same time # leading to compilation errors. with locking.lock(cache_path): if renew_compiledb or not (compiled_db := _cc_find_compiledb(path=cache_path)): compiled_db = _cc_create_compiledb( - prototype_program_source=prototype_program_source, + prototype_source=prototype_source, build_type=build_type, cmake_flags=cmake_flags, cache_lifetime=cache_lifetime, @@ -292,7 +295,7 @@ def _cc_find_compiledb(path: pathlib.Path) -> Optional[pathlib.Path]: def _cc_create_compiledb( - prototype_program_source: stages.ProgramSource, + prototype_source: stages.ExtensionSource, build_type: config.CMakeBuildType, cmake_flags: list[str], cache_lifetime: config.BuildCacheLifetime, @@ -302,18 +305,17 @@ def _cc_create_compiledb( cmake_build_type=build_type, cmake_extra_flags=cmake_flags, )( - stages.CompilableProject( - prototype_program_source, stages.BindingSource(source_code="", library_deps=()) - ), + prototype_source, cache_lifetime, ) path = prototype_project.root_path name = prototype_project.program_name + file_extension = prototype_source.program_source.code_spec.file_extension binding_src_name = next( name for name in prototype_project.source_files.keys() - if name.endswith(f"_bindings.{prototype_program_source.code_spec.file_extension}") + if name.endswith(f"_bindings.{file_extension}") ) prototype_project.build() diff --git a/src/gt4py/next/otf/compilation/cache.py b/src/gt4py/next/otf/compilation/cache.py index e836e1365e..70a3aee5e5 100644 --- a/src/gt4py/next/otf/compilation/cache.py +++ b/src/gt4py/next/otf/compilation/cache.py @@ -20,11 +20,6 @@ _session_cache_dir_path = pathlib.Path(_session_cache_dir.name) -def _cache_folder_name(source: stages.ProgramSource, ctx_fingerprint: str | None) -> str: - fingerprint_hex_str = fingerprinting.strict_fingerprinter((source, ctx_fingerprint)) - return source.entry_point.name + "_" + fingerprint_hex_str - - def get_cache_base_path(lifetime: config.BuildCacheLifetime) -> pathlib.Path: """Return the base directory for cached artifacts with the given lifetime.""" match lifetime: @@ -37,19 +32,22 @@ def get_cache_base_path(lifetime: config.BuildCacheLifetime) -> pathlib.Path: def get_cache_folder( - compilable_source: stages.CompilableProject, + ext_source: stages.ExtensionSource, lifetime: config.BuildCacheLifetime, - ctx_fingerprint: str | None = None, + build_context_id: str = "", ) -> pathlib.Path: """ - Construct the path to where the build system project artifact of a compilable source should be cached. + Construct the path to where the build system project artifact of an extension source should be cached. - An optional fingerprint can be provided to distinguish between different contexts - that may produce different artifacts for the same compilable source. + An optional ``build_context_id`` can be provided to distinguish between different contexts + that may produce different artifacts for the same extension source. The returned path points to an existing folder in all cases. """ - # TODO(ricoh): make dependent on binding source too or add alternative that depends on bindings - folder_name = _cache_folder_name(compilable_source.program_source, ctx_fingerprint) + fingerprinter = fingerprinting.strict_fingerprinter + slug = ext_source.program_source.entry_point.name + if ext_source.binding_source: + slug = f"{slug}_bound" + folder_name = f"{slug}_{fingerprinter(ext_source)}_{build_context_id}" base_path = get_cache_base_path(lifetime) base_path.mkdir(exist_ok=True) diff --git a/src/gt4py/next/otf/compilation/compiler.py b/src/gt4py/next/otf/compilation/compiler.py index 3748d95192..17574355fd 100644 --- a/src/gt4py/next/otf/compilation/compiler.py +++ b/src/gt4py/next/otf/compilation/compiler.py @@ -39,7 +39,7 @@ def module_exists(data: build_data.BuildData, src_dir: pathlib.Path) -> bool: class BuildSystemProjectGenerator(Protocol[CodeSpecT, TargetCodeSpecT]): def __call__( self, - source: stages.CompilableProject[CodeSpecT, TargetCodeSpecT], + source: stages.ExtensionSource[CodeSpecT, TargetCodeSpecT], cache_lifetime: config.BuildCacheLifetime, ) -> stages.BuildSystemProject[CodeSpecT, TargetCodeSpecT]: ... @@ -47,11 +47,11 @@ def __call__( @dataclasses.dataclass(frozen=True) class Compiler( workflow.ChainableWorkflowMixin[ - stages.CompilableProject[CPPLikeCodeSpecT, code_specs.PythonCodeSpec], + stages.ExtensionSource[CPPLikeCodeSpecT, code_specs.PythonCodeSpec], stages.ExecutableProgram, ], workflow.ReplaceEnabledWorkflowMixin[ - stages.CompilableProject[CPPLikeCodeSpecT, code_specs.PythonCodeSpec], + stages.ExtensionSource[CPPLikeCodeSpecT, code_specs.PythonCodeSpec], stages.ExecutableProgram, ], definitions.CompilationStep[CPPLikeCodeSpecT, code_specs.PythonCodeSpec], @@ -64,7 +64,7 @@ class Compiler( def __call__( self, - inp: stages.CompilableProject[CPPLikeCodeSpecT, code_specs.PythonCodeSpec], + inp: stages.ExtensionSource[CPPLikeCodeSpecT, code_specs.PythonCodeSpec], ) -> stages.ExecutableProgram: src_dir = cache.get_cache_folder(inp, self.cache_lifetime) diff --git a/src/gt4py/next/otf/definitions.py b/src/gt4py/next/otf/definitions.py index 11b42dc6ce..2366695242 100644 --- a/src/gt4py/next/otf/definitions.py +++ b/src/gt4py/next/otf/definitions.py @@ -44,25 +44,23 @@ class TranslationStep( class BindingStep(Protocol[CodeSpecT, TargetCodeSpecT]): """ - Generate Bindings for program source and package both together (ProgramSource -> CompilableSource). + Generate Bindings for program source and package both together (ProgramSource -> ExtensionSource). In the special cases where bindings are not required, such a step could also simply construct - a ``CompilableSource`` from the ``ProgramSource`` with bindings set to ``None``. + an ``ExtensionSource`` from the ``ProgramSource`` with bindings set to ``None``. """ def __call__( self, program_source: stages.ProgramSource[CodeSpecT] - ) -> stages.CompilableProject[CodeSpecT, TargetCodeSpecT]: ... + ) -> stages.ExtensionSource[CodeSpecT, TargetCodeSpecT]: ... class CompilationStep( - workflow.Workflow[ - stages.CompilableProject[CodeSpecT, TargetCodeSpecT], stages.ExecutableProgram - ], + workflow.Workflow[stages.ExtensionSource[CodeSpecT, TargetCodeSpecT], stages.ExecutableProgram], Protocol[CodeSpecT, TargetCodeSpecT], ): - """Compile program source code and bindings into a python callable (CompilableSource -> CompiledProgram).""" + """Compile program source code and bindings into a python callable (ExtensionSource -> CompiledProgram).""" def __call__( - self, source: stages.CompilableProject[CodeSpecT, TargetCodeSpecT] + self, source: stages.ExtensionSource[CodeSpecT, TargetCodeSpecT] ) -> stages.ExecutableProgram: ... diff --git a/src/gt4py/next/otf/recipes.py b/src/gt4py/next/otf/recipes.py index 79cd17162b..0057ef46ed 100644 --- a/src/gt4py/next/otf/recipes.py +++ b/src/gt4py/next/otf/recipes.py @@ -18,6 +18,6 @@ class OTFCompileWorkflow(workflow.NamedStepSequence): """The typical compiled backend steps composed into a workflow.""" translation: definitions.TranslationStep - bindings: workflow.Workflow[stages.ProgramSource, stages.CompilableProject] - compilation: workflow.Workflow[stages.CompilableProject, stages.ExecutableProgram] + bindings: workflow.Workflow[stages.ProgramSource, stages.ExtensionSource] + compilation: workflow.Workflow[stages.ExtensionSource, stages.ExecutableProgram] decoration: workflow.Workflow[stages.ExecutableProgram, stages.ExecutableProgram] diff --git a/src/gt4py/next/otf/stages.py b/src/gt4py/next/otf/stages.py index c0bdddee1c..f731690e83 100644 --- a/src/gt4py/next/otf/stages.py +++ b/src/gt4py/next/otf/stages.py @@ -90,9 +90,8 @@ class BindingSource(Generic[CodeSpecT, TargetCodeSpecT]): library_deps: tuple[interface.LibraryDependency, ...] -# TODO(ricoh): reconsider name in view of future backends producing standalone compilable ProgramSource code @dataclasses.dataclass(frozen=True) -class CompilableProject(Generic[CodeSpecT, TargetCodeSpecT]): +class ExtensionSource(Generic[CodeSpecT, TargetCodeSpecT]): """ Encapsulate all the source code required for OTF compilation. @@ -117,7 +116,7 @@ def library_deps(self) -> tuple[interface.LibraryDependency, ...]: class BuildSystemProject(Protocol[CodeSpecT_co, TargetCodeSpecT_co]): """ - Use source code extracted from a ``CompilableSource`` to configure and build a GT4Py program. + Use source code extracted from an ``ExtensionSource`` to configure and build a GT4Py program. Should only be considered an OTF stage if used as an endpoint, as this only runs commands on source files and is not responsible for importing the results into Python. diff --git a/src/gt4py/next/program_processors/runners/dace/workflow/bindings.py b/src/gt4py/next/program_processors/runners/dace/workflow/bindings.py index 7a729804fc..172668af65 100644 --- a/src/gt4py/next/program_processors/runners/dace/workflow/bindings.py +++ b/src/gt4py/next/program_processors/runners/dace/workflow/bindings.py @@ -288,13 +288,13 @@ def _create_sdfg_bindings( def bind_sdfg( inp: stages.ProgramSource[code_specs.SDFGCodeSpec], bind_func_name: str, -) -> stages.CompilableProject[code_specs.SDFGCodeSpec, code_specs.PythonCodeSpec]: +) -> stages.ExtensionSource[code_specs.SDFGCodeSpec, code_specs.PythonCodeSpec]: """ Method to be used as workflow stage for generation of SDFG bindings. Refer to `_create_sdfg_bindings` documentation. """ - return stages.CompilableProject( + return stages.ExtensionSource( program_source=inp, binding_source=_create_sdfg_bindings(inp, bind_func_name), ) diff --git a/src/gt4py/next/program_processors/runners/dace/workflow/compilation.py b/src/gt4py/next/program_processors/runners/dace/workflow/compilation.py index db559390ea..44861582e9 100644 --- a/src/gt4py/next/program_processors/runners/dace/workflow/compilation.py +++ b/src/gt4py/next/program_processors/runners/dace/workflow/compilation.py @@ -131,11 +131,11 @@ def __call__(self, **kwargs: Any) -> None: @dataclasses.dataclass(frozen=True) class DaCeCompiler( workflow.ChainableWorkflowMixin[ - stages.CompilableProject[code_specs.SDFGCodeSpec, code_specs.PythonCodeSpec], + stages.ExtensionSource[code_specs.SDFGCodeSpec, code_specs.PythonCodeSpec], CompiledDaceProgram, ], workflow.ReplaceEnabledWorkflowMixin[ - stages.CompilableProject[code_specs.SDFGCodeSpec, code_specs.PythonCodeSpec], + stages.ExtensionSource[code_specs.SDFGCodeSpec, code_specs.PythonCodeSpec], CompiledDaceProgram, ], definitions.CompilationStep[code_specs.SDFGCodeSpec, code_specs.PythonCodeSpec], @@ -163,7 +163,7 @@ def __post_init__(self) -> None: def __call__( self, - inp: stages.CompilableProject[code_specs.SDFGCodeSpec, code_specs.PythonCodeSpec], + inp: stages.ExtensionSource[code_specs.SDFGCodeSpec, code_specs.PythonCodeSpec], ) -> CompiledDaceProgram: with gtx_wfdcommon.dace_context( device_type=self.device_type, @@ -174,7 +174,7 @@ def __call__( sdfg_build_folder = gtx_cache.get_cache_folder( inp, self.cache_lifetime, - fingerprinting.strict_fingerprinter(self), + build_context_id=fingerprinting.strict_fingerprinter(self.dace_config_nondefaults), ) sdfg_build_folder.mkdir(parents=True, exist_ok=True) diff --git a/src/gt4py/next/program_processors/runners/gtfn.py b/src/gt4py/next/program_processors/runners/gtfn.py index c4b4d3d698..1afbac9fd6 100644 --- a/src/gt4py/next/program_processors/runners/gtfn.py +++ b/src/gt4py/next/program_processors/runners/gtfn.py @@ -134,9 +134,7 @@ class Params: ) translation = factory.LazyAttribute(lambda o: o.bare_translation) - bindings: workflow.Workflow[stages.ProgramSource, stages.CompilableProject] = ( - nanobind.bind_source - ) + bindings: workflow.Workflow[stages.ProgramSource, stages.ExtensionSource] = nanobind.bind_source compilation = factory.SubFactory( compiler.CompilerFactory, cache_lifetime=factory.LazyFunction(lambda: config.BUILD_CACHE_LIFETIME), diff --git a/tests/next_tests/unit_tests/otf_tests/compilation_tests/build_systems_tests/conftest.py b/tests/next_tests/unit_tests/otf_tests/compilation_tests/build_systems_tests/conftest.py index d3bd748af0..4b058bf8c3 100644 --- a/tests/next_tests/unit_tests/otf_tests/compilation_tests/build_systems_tests/conftest.py +++ b/tests/next_tests/unit_tests/otf_tests/compilation_tests/build_systems_tests/conftest.py @@ -88,16 +88,16 @@ def program_source_example(): @pytest.fixture -def compilable_source_example(program_source_example): - return stages.CompilableProject( +def extension_source_example(program_source_example): + return stages.ExtensionSource( program_source=program_source_example, binding_source=nanobind.create_bindings(program_source_example), ) @pytest.fixture -def clean_example_session_cache(compilable_source_example): - cache_dir = cache.get_cache_folder(compilable_source_example, config.BuildCacheLifetime.SESSION) +def clean_example_session_cache(extension_source_example): + cache_dir = cache.get_cache_folder(extension_source_example, config.BuildCacheLifetime.SESSION) if cache_dir.exists(): shutil.rmtree(cache_dir) yield diff --git a/tests/next_tests/unit_tests/otf_tests/compilation_tests/build_systems_tests/test_cmake.py b/tests/next_tests/unit_tests/otf_tests/compilation_tests/build_systems_tests/test_cmake.py index 1903468da9..e3bd760073 100644 --- a/tests/next_tests/unit_tests/otf_tests/compilation_tests/build_systems_tests/test_cmake.py +++ b/tests/next_tests/unit_tests/otf_tests/compilation_tests/build_systems_tests/test_cmake.py @@ -50,9 +50,9 @@ def test_get_cmake_device_arch_option_rocm(): assert cmake.get_cmake_device_arch_option() == "-DCMAKE_HIP_ARCHITECTURES=gfx90a" -def test_default_cmake_factory(compilable_source_example, clean_example_session_cache): +def test_default_cmake_factory(extension_source_example, clean_example_session_cache): otf_builder = cmake.CMakeFactory()( - source=compilable_source_example, cache_lifetime=config.BuildCacheLifetime.SESSION + source=extension_source_example, cache_lifetime=config.BuildCacheLifetime.SESSION ) assert not build_data.contains_data(otf_builder.root_path) diff --git a/tests/next_tests/unit_tests/otf_tests/compilation_tests/build_systems_tests/test_compiledb.py b/tests/next_tests/unit_tests/otf_tests/compilation_tests/build_systems_tests/test_compiledb.py index 4c1ab0dba8..44fe700c99 100644 --- a/tests/next_tests/unit_tests/otf_tests/compilation_tests/build_systems_tests/test_compiledb.py +++ b/tests/next_tests/unit_tests/otf_tests/compilation_tests/build_systems_tests/test_compiledb.py @@ -15,9 +15,9 @@ from gt4py.next.otf.compilation.build_systems import compiledb -def test_default_compiledb_factory(compilable_source_example, clean_example_session_cache): +def test_default_compiledb_factory(extension_source_example, clean_example_session_cache): otf_builder = compiledb.CompiledbFactory()( - compilable_source_example, cache_lifetime=config.BuildCacheLifetime.SESSION + extension_source_example, cache_lifetime=config.BuildCacheLifetime.SESSION ) # make sure the example project has not been written yet @@ -35,9 +35,9 @@ def test_default_compiledb_factory(compilable_source_example, clean_example_sess assert (otf_builder.root_path / "build.sh").exists() -def test_compiledb_project_is_relocatable(compilable_source_example, clean_example_session_cache): +def test_compiledb_project_is_relocatable(extension_source_example, clean_example_session_cache): builder = compiledb.CompiledbFactory()( - compilable_source_example, cache_lifetime=config.BuildCacheLifetime.SESSION + extension_source_example, cache_lifetime=config.BuildCacheLifetime.SESSION ) # make sure the example project has not been written yet diff --git a/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace_bindings.py b/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace_bindings.py index 8a144fefdc..2c4811dc7d 100644 --- a/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace_bindings.py +++ b/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace_bindings.py @@ -228,7 +228,7 @@ def {_bind_func_name}(device, sdfg_argtypes, args, sdfg_call_args, offset_provid def mocked_compile_call( self, - inp: stages.CompilableProject[code_specs.SDFGCodeSpec, code_specs.PythonCodeSpec], + inp: stages.ExtensionSource[code_specs.SDFGCodeSpec, code_specs.PythonCodeSpec], binding_source_ref: str, ): assert len(inp.library_deps) == 0 @@ -245,7 +245,7 @@ def mocked_compile_call( def mocked_compile_call_cartesian( self, - inp: stages.CompilableProject[code_specs.SDFGCodeSpec, code_specs.PythonCodeSpec], + inp: stages.ExtensionSource[code_specs.SDFGCodeSpec, code_specs.PythonCodeSpec], use_metrics: bool, use_zero_origin: bool, ): @@ -257,7 +257,7 @@ def mocked_compile_call_cartesian( def mocked_compile_call_unstructured( self, - inp: stages.CompilableProject[code_specs.SDFGCodeSpec, code_specs.PythonCodeSpec], + inp: stages.ExtensionSource[code_specs.SDFGCodeSpec, code_specs.PythonCodeSpec], use_metrics: bool, use_zero_origin: bool, ): diff --git a/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace_compilation.py b/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace_compilation.py index 2fe4bfdc8b..890cbe71d3 100644 --- a/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace_compilation.py +++ b/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace_compilation.py @@ -68,10 +68,10 @@ def _make_sdfg_with_gpu_map() -> dace.SDFG: return sdfg -def _make_compilable_project() -> stages.CompilableProject: - """A real `CompilableProject` wrapping the GPU SDFG, as the dace translation step emits. +def _make_extension_source() -> stages.ExtensionSource: + """A real `ExtensionSource` wrapping the GPU SDFG, as the dace translation step emits. - Using a real project (rather than a `MagicMock`) lets the unmocked `get_cache_folder` + Using a real source (rather than a `MagicMock`) lets the unmocked `get_cache_folder` fingerprint the program source for the build-folder name. """ program_source = stages.ProgramSource( @@ -81,7 +81,7 @@ def _make_compilable_project() -> stages.CompilableProject: code_spec=code_specs.SDFGCodeSpec(), ) binding_source = stages.BindingSource(source_code="", library_deps=()) - return stages.CompilableProject(program_source=program_source, binding_source=binding_source) + return stages.ExtensionSource(program_source=program_source, binding_source=binding_source) def _run_compiler( @@ -92,7 +92,7 @@ def _run_compiler( Returns the spy wrapping `_add_tx_markers` and the SDFG that was handed to ``SDFG.compile`` (i.e. the SDFG after any marker processing). """ - inp = _make_compilable_project() + inp = _make_extension_source() compiler = dace_wf_compilation.DaCeCompiler( bind_func_name="bind", @@ -173,11 +173,10 @@ def test_compiler_skips_tx_markers_for_non_gpu_device(tmp_path): def test_compiler_flags_change_build_folder(monkeypatch, device_type, compiler_flags_env): """Different compiler flags must produce a different build folder. - The flags are captured in `dace_config_nondefaults`, which is part of the compiler's - fingerprint. The compiler instance is passed to `get_cache_folder` as the `ctx`, whose - fingerprint (together with the program source) is hashed into the build-folder name. - Changing any flag therefore changes that fingerprint and lands the build in a different - folder of the build cache. + The flags are captured in `dace_config_nondefaults`, whose fingerprint the compiler + passes to `get_cache_folder` as the `build_context_id`. That id is appended to the + build-folder name, so changing any flag lands the build in a different folder of the + build cache. """ monkeypatch.delenv(compiler_flags_env, raising=False) _, sdfg_default = _run_compiler(add_gpu_trace_markers=False, device_type=device_type) From 5935a47296c52af6eeef895c75c22b4dacb15a8a Mon Sep 17 00:00:00 2001 From: Edoardo Paone Date: Fri, 26 Jun 2026 12:32:13 +0200 Subject: [PATCH 10/18] fix suffix of build folder name --- src/gt4py/next/otf/compilation/cache.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/gt4py/next/otf/compilation/cache.py b/src/gt4py/next/otf/compilation/cache.py index 70a3aee5e5..fe667a6240 100644 --- a/src/gt4py/next/otf/compilation/cache.py +++ b/src/gt4py/next/otf/compilation/cache.py @@ -47,7 +47,9 @@ def get_cache_folder( slug = ext_source.program_source.entry_point.name if ext_source.binding_source: slug = f"{slug}_bound" - folder_name = f"{slug}_{fingerprinter(ext_source)}_{build_context_id}" + folder_name = f"{slug}_{fingerprinter(ext_source)}" + if build_context_id: + folder_name = f"{folder_name}_{build_context_id}" base_path = get_cache_base_path(lifetime) base_path.mkdir(exist_ok=True) From 8e312d38b798113de562006d76a260603783de80 Mon Sep 17 00:00:00 2001 From: Edoardo Paone Date: Fri, 26 Jun 2026 12:32:44 +0200 Subject: [PATCH 11/18] include fingerprint of builder_factory in gtfn backend --- src/gt4py/next/otf/compilation/build_systems/compiledb.py | 8 ++++++-- src/gt4py/next/otf/compilation/compiler.py | 8 ++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/gt4py/next/otf/compilation/build_systems/compiledb.py b/src/gt4py/next/otf/compilation/build_systems/compiledb.py index 08c379af08..b0f9466d53 100644 --- a/src/gt4py/next/otf/compilation/build_systems/compiledb.py +++ b/src/gt4py/next/otf/compilation/build_systems/compiledb.py @@ -17,7 +17,7 @@ from typing import Optional, TypeVar from gt4py._core import locking -from gt4py.next import config, errors +from gt4py.next import config, errors, fingerprinting from gt4py.next.otf import code_specs, stages from gt4py.next.otf.binding import interface from gt4py.next.otf.compilation import build_data, cache, compiler @@ -72,7 +72,11 @@ def __call__( ) return CompiledbProject( - root_path=cache.get_cache_folder(source, cache_lifetime), + root_path=cache.get_cache_folder( + source, + cache_lifetime, + build_context_id=fingerprinting.strict_fingerprinter(self), + ), program_name=name, source_files={ header_name: source.program_source.source_code, diff --git a/src/gt4py/next/otf/compilation/compiler.py b/src/gt4py/next/otf/compilation/compiler.py index 17574355fd..aaa48bfe5a 100644 --- a/src/gt4py/next/otf/compilation/compiler.py +++ b/src/gt4py/next/otf/compilation/compiler.py @@ -15,7 +15,7 @@ import factory from gt4py._core import locking -from gt4py.next import config +from gt4py.next import config, fingerprinting from gt4py.next.otf import code_specs, definitions, stages, workflow from gt4py.next.otf.compilation import build_data, cache, importer @@ -66,7 +66,11 @@ def __call__( self, inp: stages.ExtensionSource[CPPLikeCodeSpecT, code_specs.PythonCodeSpec], ) -> stages.ExecutableProgram: - src_dir = cache.get_cache_folder(inp, self.cache_lifetime) + src_dir = cache.get_cache_folder( + inp, + self.cache_lifetime, + build_context_id=fingerprinting.strict_fingerprinter(self.builder_factory), + ) # If we are compiling the same program at the same time (e.g. multiple MPI ranks), # we need to make sure that only one of them accesses the same build directory for compilation. From 87d8bdac305d327f2513f3b99d243e48820f49b7 Mon Sep 17 00:00:00 2001 From: Edoardo Paone Date: Fri, 26 Jun 2026 15:42:05 +0200 Subject: [PATCH 12/18] fix cmake factory test --- src/gt4py/next/otf/compilation/compiler.py | 10 ++++++---- .../feature_tests/otf_tests/test_nanobind_build.py | 4 +++- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/gt4py/next/otf/compilation/compiler.py b/src/gt4py/next/otf/compilation/compiler.py index aaa48bfe5a..af72677031 100644 --- a/src/gt4py/next/otf/compilation/compiler.py +++ b/src/gt4py/next/otf/compilation/compiler.py @@ -60,17 +60,19 @@ class Compiler( cache_lifetime: config.BuildCacheLifetime builder_factory: BuildSystemProjectGenerator[CPPLikeCodeSpecT, code_specs.PythonCodeSpec] + fingerprint_builder_factory: bool = True force_recompile: bool = False def __call__( self, inp: stages.ExtensionSource[CPPLikeCodeSpecT, code_specs.PythonCodeSpec], ) -> stages.ExecutableProgram: - src_dir = cache.get_cache_folder( - inp, - self.cache_lifetime, - build_context_id=fingerprinting.strict_fingerprinter(self.builder_factory), + build_context_id = ( + fingerprinting.strict_fingerprinter(self.builder_factory) + if self.fingerprint_builder_factory + else "" ) + src_dir = cache.get_cache_folder(inp, self.cache_lifetime, build_context_id) # If we are compiling the same program at the same time (e.g. multiple MPI ranks), # we need to make sure that only one of them accesses the same build directory for compilation. diff --git a/tests/next_tests/integration_tests/feature_tests/otf_tests/test_nanobind_build.py b/tests/next_tests/integration_tests/feature_tests/otf_tests/test_nanobind_build.py index 49bd7b8f87..cca2bf5c0e 100644 --- a/tests/next_tests/integration_tests/feature_tests/otf_tests/test_nanobind_build.py +++ b/tests/next_tests/integration_tests/feature_tests/otf_tests/test_nanobind_build.py @@ -25,7 +25,9 @@ def test_gtfn_cpp_with_cmake(program_source_with_name): example_program_source = program_source_with_name("gtfn_cpp_with_cmake") build_the_program = workflow.make_step(nanobind.bind_source).chain( compiler.Compiler( - cache_lifetime=config.BuildCacheLifetime.SESSION, builder_factory=cmake.CMakeFactory() + cache_lifetime=config.BuildCacheLifetime.SESSION, + builder_factory=cmake.CMakeFactory(), + fingerprint_builder_factory=False, ) ) compiled_program = build_the_program(example_program_source) From 837796ab658c52f36889a275b7ace2fb7a81d285 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Enrique=20Gonz=C3=A1lez=20Paredes?= Date: Fri, 26 Jun 2026 15:46:57 +0200 Subject: [PATCH 13/18] Add unstructured strided axis to gtfn backend state --- src/gt4py/next/otf/binding/nanobind.py | 37 ++++++++++++++----- .../next/program_processors/runners/gtfn.py | 11 +++++- .../otf_tests/test_nanobind_build.py | 4 +- .../otf_tests/binding_tests/test_nanobind.py | 5 ++- .../build_systems_tests/conftest.py | 4 +- 5 files changed, 47 insertions(+), 14 deletions(-) diff --git a/src/gt4py/next/otf/binding/nanobind.py b/src/gt4py/next/otf/binding/nanobind.py index d4eda86e44..a353dcff3e 100644 --- a/src/gt4py/next/otf/binding/nanobind.py +++ b/src/gt4py/next/otf/binding/nanobind.py @@ -10,6 +10,7 @@ from __future__ import annotations +import dataclasses from collections.abc import Collection from typing import Any, Optional, Sequence, TypeVar, Union @@ -200,7 +201,9 @@ def _tuple_get(index: int, var: str) -> str: return f"gridtools::tuple_util::get<{index}>({var})" -def make_argument(name: str, type_: ts.TypeSpec) -> str | BufferSID | Tuple: +def make_argument( + name: str, type_: ts.TypeSpec, unstructured_horizontal_has_unit_stride: bool +) -> str | BufferSID | Tuple: if isinstance(type_, ts.FieldType): return BufferSID( source_buffer=name, @@ -209,7 +212,7 @@ def make_argument(name: str, type_: ts.TypeSpec) -> str | BufferSID | Tuple: name=dim.value, static_stride=1 if ( - config.UNSTRUCTURED_HORIZONTAL_HAS_UNIT_STRIDE + unstructured_horizontal_has_unit_stride and dim.kind == common.DimensionKind.HORIZONTAL ) else None, @@ -219,7 +222,10 @@ def make_argument(name: str, type_: ts.TypeSpec) -> str | BufferSID | Tuple: scalar_type=type_.dtype, ) elif isinstance(type_, ts.TupleType): - elements = [make_argument(_tuple_get(i, name), t) for i, t in enumerate(type_.types)] + elements = [ + make_argument(_tuple_get(i, name), t, unstructured_horizontal_has_unit_stride) + for i, t in enumerate(type_.types) + ] return Tuple(elems=elements) elif isinstance(type_, ts.ScalarType): return name @@ -228,7 +234,7 @@ def make_argument(name: str, type_: ts.TypeSpec) -> str | BufferSID | Tuple: def create_bindings( - program_source: stages.ProgramSource[CodeSpecT], + program_source: stages.ProgramSource[CodeSpecT], unstructured_horizontal_has_unit_stride: bool ) -> stages.BindingSource[CodeSpecT, code_specs.PythonCodeSpec]: """ Generate Python bindings through which a C++ function can be called. @@ -274,7 +280,9 @@ def create_bindings( expr=FunctionCall( target=program_source.entry_point, args=[ - make_argument(param.name, param.type_) + make_argument( + param.name, param.type_, unstructured_horizontal_has_unit_stride + ) for param in program_source.entry_point.parameters ], ) @@ -305,7 +313,18 @@ def create_bindings( return stages.BindingSource(src, (interface.LibraryDependency("nanobind", "2.0.0"),)) -def bind_source( - inp: stages.ProgramSource[CodeSpecT], -) -> stages.ExtensionSource[CodeSpecT, code_specs.PythonCodeSpec]: - return stages.ExtensionSource(program_source=inp, binding_source=create_bindings(inp)) +@dataclasses.dataclass(frozen=True) +class ExtensionGenerator: + """ + Generate a Python extension module that contains the bindings for a C++ function. + """ + + unstructured_horizontal_has_unit_stride: bool = config.UNSTRUCTURED_HORIZONTAL_HAS_UNIT_STRIDE + + def __call__( + self, program_source: stages.ProgramSource[CodeSpecT] + ) -> stages.ExtensionSource[CodeSpecT, code_specs.PythonCodeSpec]: + binding_source = create_bindings( + program_source, self.unstructured_horizontal_has_unit_stride + ) + return stages.ExtensionSource(program_source=program_source, binding_source=binding_source) diff --git a/src/gt4py/next/program_processors/runners/gtfn.py b/src/gt4py/next/program_processors/runners/gtfn.py index 1afbac9fd6..fc5e9c99eb 100644 --- a/src/gt4py/next/program_processors/runners/gtfn.py +++ b/src/gt4py/next/program_processors/runners/gtfn.py @@ -112,6 +112,9 @@ class Params: cmake_build_type: config.CMakeBuildType = factory.LazyFunction( # type: ignore[assignment] # factory-boy typing not precise enough lambda: config.CMAKE_BUILD_TYPE ) + unstructured_horizontal_has_unit_stride: bool = factory.LazyFunction( # type: ignore[assignment] # factory-boy typing not precise enough + lambda: config.UNSTRUCTURED_HORIZONTAL_HAS_UNIT_STRIDE + ) builder_factory: compiler.BuildSystemProjectGenerator = factory.LazyAttribute( # type: ignore[assignment] # factory-boy typing not precise enough lambda o: compiledb.CompiledbFactory(cmake_build_type=o.cmake_build_type) ) @@ -134,7 +137,13 @@ class Params: ) translation = factory.LazyAttribute(lambda o: o.bare_translation) - bindings: workflow.Workflow[stages.ProgramSource, stages.ExtensionSource] = nanobind.bind_source + bindings: workflow.Workflow[stages.ProgramSource, stages.ExtensionSource] = ( + factory.LazyAttribute( # type: ignore[assignment] # factory-boy typing not precise enough + lambda o: nanobind.ExtensionGenerator( + unstructured_horizontal_has_unit_stride=o.unstructured_horizontal_has_unit_stride + ) + ) + ) compilation = factory.SubFactory( compiler.CompilerFactory, cache_lifetime=factory.LazyFunction(lambda: config.BUILD_CACHE_LIFETIME), diff --git a/tests/next_tests/integration_tests/feature_tests/otf_tests/test_nanobind_build.py b/tests/next_tests/integration_tests/feature_tests/otf_tests/test_nanobind_build.py index cca2bf5c0e..aa239b08d5 100644 --- a/tests/next_tests/integration_tests/feature_tests/otf_tests/test_nanobind_build.py +++ b/tests/next_tests/integration_tests/feature_tests/otf_tests/test_nanobind_build.py @@ -23,7 +23,7 @@ def test_gtfn_cpp_with_cmake(program_source_with_name): example_program_source = program_source_with_name("gtfn_cpp_with_cmake") - build_the_program = workflow.make_step(nanobind.bind_source).chain( + build_the_program = workflow.make_step(nanobind.ExtensionGenerator()).chain( compiler.Compiler( cache_lifetime=config.BuildCacheLifetime.SESSION, builder_factory=cmake.CMakeFactory(), @@ -43,7 +43,7 @@ def test_gtfn_cpp_with_cmake(program_source_with_name): def test_gtfn_cpp_with_compiledb(program_source_with_name): example_program_source = program_source_with_name("gtfn_cpp_with_compiledb") - build_the_program = workflow.make_step(nanobind.bind_source).chain( + build_the_program = workflow.make_step(nanobind.ExtensionGenerator()).chain( compiler.Compiler( cache_lifetime=config.BuildCacheLifetime.SESSION, builder_factory=compiledb.CompiledbFactory(), diff --git a/tests/next_tests/unit_tests/otf_tests/binding_tests/test_nanobind.py b/tests/next_tests/unit_tests/otf_tests/binding_tests/test_nanobind.py index 93a491f04f..50a82692ba 100644 --- a/tests/next_tests/unit_tests/otf_tests/binding_tests/test_nanobind.py +++ b/tests/next_tests/unit_tests/otf_tests/binding_tests/test_nanobind.py @@ -6,6 +6,7 @@ # Please, refer to the LICENSE file in the root directory. # SPDX-License-Identifier: BSD-3-Clause +from gt4py.next import config from gt4py.next.otf.binding import nanobind from next_tests.unit_tests.otf_tests.compilation_tests.build_systems_tests.conftest import ( @@ -14,5 +15,7 @@ def test_bindings(program_source_example): - module = nanobind.create_bindings(program_source_example) + module = nanobind.create_bindings( + program_source_example, config.UNSTRUCTURED_HORIZONTAL_HAS_UNIT_STRIDE + ) assert module.library_deps[0].name == "nanobind" diff --git a/tests/next_tests/unit_tests/otf_tests/compilation_tests/build_systems_tests/conftest.py b/tests/next_tests/unit_tests/otf_tests/compilation_tests/build_systems_tests/conftest.py index 4b058bf8c3..c49270e3b8 100644 --- a/tests/next_tests/unit_tests/otf_tests/compilation_tests/build_systems_tests/conftest.py +++ b/tests/next_tests/unit_tests/otf_tests/compilation_tests/build_systems_tests/conftest.py @@ -91,7 +91,9 @@ def program_source_example(): def extension_source_example(program_source_example): return stages.ExtensionSource( program_source=program_source_example, - binding_source=nanobind.create_bindings(program_source_example), + binding_source=nanobind.create_bindings( + program_source_example, config.UNSTRUCTURED_HORIZONTAL_HAS_UNIT_STRIDE + ), ) From 461b77961ad9b4e86e8b005a4e2ff5dd088453b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Enrique=20Gonz=C3=A1lez=20Paredes?= Date: Fri, 26 Jun 2026 15:47:53 +0200 Subject: [PATCH 14/18] Remove fixme comments --- src/gt4py/next/config.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/gt4py/next/config.py b/src/gt4py/next/config.py index a9fa290941..330362c667 100644 --- a/src/gt4py/next/config.py +++ b/src/gt4py/next/config.py @@ -104,21 +104,18 @@ def _get_build_cache_version_id() -> str: #: Build type to be used when CMake is used to compile generated code. #: Might have no effect when CMake is not used as part of the toolchain. -# FIXME[#2447](egparedes): compile-time setting, should be included in the build cache key. CMAKE_BUILD_TYPE: CMakeBuildType = CMakeBuildType[ os.environ.get("GT4PY_CMAKE_BUILD_TYPE", "debug" if DEBUG else "release").upper() ] #: Experimental, use at your own risk: assume horizontal dimension has stride 1 -# FIXME[#2447](egparedes): compile-time setting, should be included in the build cache key. UNSTRUCTURED_HORIZONTAL_HAS_UNIT_STRIDE: bool = env_flag_to_bool( "GT4PY_UNSTRUCTURED_HORIZONTAL_HAS_UNIT_STRIDE", default=False ) #: Add GPU trace markers (NVTX, ROC-TX) to the generated code, at compile time. -# FIXME[#2447](egparedes): compile-time setting, should be included in the build cache key. ADD_GPU_TRACE_MARKERS: bool = env_flag_to_bool("GT4PY_ADD_GPU_TRACE_MARKERS", default=False) From cd5f1f26dffabf16c1b1aaa96be7d2ec10786f68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Enrique=20Gonz=C3=A1lez=20Paredes?= Date: Fri, 26 Jun 2026 16:00:38 +0200 Subject: [PATCH 15/18] Removed duplicated default --- src/gt4py/next/otf/binding/nanobind.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gt4py/next/otf/binding/nanobind.py b/src/gt4py/next/otf/binding/nanobind.py index a353dcff3e..4a7020fc53 100644 --- a/src/gt4py/next/otf/binding/nanobind.py +++ b/src/gt4py/next/otf/binding/nanobind.py @@ -16,7 +16,7 @@ import gt4py.eve as eve from gt4py.eve.codegen import JinjaTemplate as as_jinja, TemplatedGenerator -from gt4py.next import common, config +from gt4py.next import common from gt4py.next.otf import code_specs, cpp_utils, stages from gt4py.next.otf.binding import cpp_interface, interface from gt4py.next.type_system import type_specifications as ts @@ -319,7 +319,7 @@ class ExtensionGenerator: Generate a Python extension module that contains the bindings for a C++ function. """ - unstructured_horizontal_has_unit_stride: bool = config.UNSTRUCTURED_HORIZONTAL_HAS_UNIT_STRIDE + unstructured_horizontal_has_unit_stride: bool def __call__( self, program_source: stages.ProgramSource[CodeSpecT] From 284dcdf9c3aad94954864f3f2b7175da0e270164 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Enrique=20Gonz=C3=A1lez=20Paredes?= Date: Fri, 26 Jun 2026 16:04:11 +0200 Subject: [PATCH 16/18] Undo previous change --- src/gt4py/next/otf/binding/nanobind.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gt4py/next/otf/binding/nanobind.py b/src/gt4py/next/otf/binding/nanobind.py index 4a7020fc53..a353dcff3e 100644 --- a/src/gt4py/next/otf/binding/nanobind.py +++ b/src/gt4py/next/otf/binding/nanobind.py @@ -16,7 +16,7 @@ import gt4py.eve as eve from gt4py.eve.codegen import JinjaTemplate as as_jinja, TemplatedGenerator -from gt4py.next import common +from gt4py.next import common, config from gt4py.next.otf import code_specs, cpp_utils, stages from gt4py.next.otf.binding import cpp_interface, interface from gt4py.next.type_system import type_specifications as ts @@ -319,7 +319,7 @@ class ExtensionGenerator: Generate a Python extension module that contains the bindings for a C++ function. """ - unstructured_horizontal_has_unit_stride: bool + unstructured_horizontal_has_unit_stride: bool = config.UNSTRUCTURED_HORIZONTAL_HAS_UNIT_STRIDE def __call__( self, program_source: stages.ProgramSource[CodeSpecT] From a4b3452e21593cff2a7328314c19e63ce89e7f84 Mon Sep 17 00:00:00 2001 From: Edoardo Paone Date: Fri, 26 Jun 2026 23:20:07 +0200 Subject: [PATCH 17/18] Update src/gt4py/next/otf/compilation/cache.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Enrique González Paredes --- src/gt4py/next/otf/compilation/cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gt4py/next/otf/compilation/cache.py b/src/gt4py/next/otf/compilation/cache.py index fe667a6240..76f00eebce 100644 --- a/src/gt4py/next/otf/compilation/cache.py +++ b/src/gt4py/next/otf/compilation/cache.py @@ -46,7 +46,7 @@ def get_cache_folder( fingerprinter = fingerprinting.strict_fingerprinter slug = ext_source.program_source.entry_point.name if ext_source.binding_source: - slug = f"{slug}_bound" + slug = f"{slug}_pyext" folder_name = f"{slug}_{fingerprinter(ext_source)}" if build_context_id: folder_name = f"{folder_name}_{build_context_id}" From 36b5212d5d6a7e7a8ef591fd55c74d8f65c4922f Mon Sep 17 00:00:00 2001 From: Edoardo Paone Date: Fri, 26 Jun 2026 23:30:55 +0200 Subject: [PATCH 18/18] fix compiledb test --- .../build_systems_tests/conftest.py | 8 ------- .../build_systems_tests/test_cmake.py | 15 +++++++++++-- .../build_systems_tests/test_compiledb.py | 22 +++++++++++++++---- 3 files changed, 31 insertions(+), 14 deletions(-) diff --git a/tests/next_tests/unit_tests/otf_tests/compilation_tests/build_systems_tests/conftest.py b/tests/next_tests/unit_tests/otf_tests/compilation_tests/build_systems_tests/conftest.py index c49270e3b8..9e524fcfeb 100644 --- a/tests/next_tests/unit_tests/otf_tests/compilation_tests/build_systems_tests/conftest.py +++ b/tests/next_tests/unit_tests/otf_tests/compilation_tests/build_systems_tests/conftest.py @@ -95,11 +95,3 @@ def extension_source_example(program_source_example): program_source_example, config.UNSTRUCTURED_HORIZONTAL_HAS_UNIT_STRIDE ), ) - - -@pytest.fixture -def clean_example_session_cache(extension_source_example): - cache_dir = cache.get_cache_folder(extension_source_example, config.BuildCacheLifetime.SESSION) - if cache_dir.exists(): - shutil.rmtree(cache_dir) - yield diff --git a/tests/next_tests/unit_tests/otf_tests/compilation_tests/build_systems_tests/test_cmake.py b/tests/next_tests/unit_tests/otf_tests/compilation_tests/build_systems_tests/test_cmake.py index e3bd760073..f976f9b98f 100644 --- a/tests/next_tests/unit_tests/otf_tests/compilation_tests/build_systems_tests/test_cmake.py +++ b/tests/next_tests/unit_tests/otf_tests/compilation_tests/build_systems_tests/test_cmake.py @@ -8,14 +8,25 @@ import os import pathlib +import shutil from unittest import mock +import pytest + from gt4py._core import definitions as core_defs from gt4py.next import config -from gt4py.next.otf.compilation import build_data, importer +from gt4py.next.otf.compilation import build_data, cache, importer from gt4py.next.otf.compilation.build_systems import cmake +@pytest.fixture +def clean_cmake_cache(extension_source_example): + cache_dir = cache.get_cache_folder(extension_source_example, config.BuildCacheLifetime.SESSION) + if cache_dir.exists(): + shutil.rmtree(cache_dir) + yield + + def test_get_cmake_device_arch_option_cuda(): with ( mock.patch("gt4py._core.definitions.CUPY_DEVICE_TYPE", core_defs.DeviceType.CUDA), @@ -50,7 +61,7 @@ def test_get_cmake_device_arch_option_rocm(): assert cmake.get_cmake_device_arch_option() == "-DCMAKE_HIP_ARCHITECTURES=gfx90a" -def test_default_cmake_factory(extension_source_example, clean_example_session_cache): +def test_default_cmake_factory(extension_source_example, clean_cmake_cache): otf_builder = cmake.CMakeFactory()( source=extension_source_example, cache_lifetime=config.BuildCacheLifetime.SESSION ) diff --git a/tests/next_tests/unit_tests/otf_tests/compilation_tests/build_systems_tests/test_compiledb.py b/tests/next_tests/unit_tests/otf_tests/compilation_tests/build_systems_tests/test_compiledb.py index 44fe700c99..b208b7cd6f 100644 --- a/tests/next_tests/unit_tests/otf_tests/compilation_tests/build_systems_tests/test_compiledb.py +++ b/tests/next_tests/unit_tests/otf_tests/compilation_tests/build_systems_tests/test_compiledb.py @@ -10,12 +10,26 @@ import shutil import tempfile -from gt4py.next import config -from gt4py.next.otf.compilation import build_data, importer +import pytest + +from gt4py.next import config, fingerprinting +from gt4py.next.otf.compilation import build_data, cache, importer from gt4py.next.otf.compilation.build_systems import compiledb -def test_default_compiledb_factory(extension_source_example, clean_example_session_cache): +@pytest.fixture +def clean_compiledb_cache(extension_source_example): + cache_dir = cache.get_cache_folder( + ext_source=extension_source_example, + lifetime=config.BuildCacheLifetime.SESSION, + build_context_id=fingerprinting.strict_fingerprinter(compiledb.CompiledbFactory()), + ) + if cache_dir.exists(): + shutil.rmtree(cache_dir) + yield + + +def test_default_compiledb_factory(extension_source_example, clean_compiledb_cache): otf_builder = compiledb.CompiledbFactory()( extension_source_example, cache_lifetime=config.BuildCacheLifetime.SESSION ) @@ -35,7 +49,7 @@ def test_default_compiledb_factory(extension_source_example, clean_example_sessi assert (otf_builder.root_path / "build.sh").exists() -def test_compiledb_project_is_relocatable(extension_source_example, clean_example_session_cache): +def test_compiledb_project_is_relocatable(extension_source_example, clean_compiledb_cache): builder = compiledb.CompiledbFactory()( extension_source_example, cache_lifetime=config.BuildCacheLifetime.SESSION )