If you want to allow scalar iteration, use `allowscalar` or `@allowscalar`
to enable scalar iteration globally or for the operations in question.
Stacktrace:
[1] error(s::String)
@ Base ./error.jl:44
[2] errorscalar(op::String)
@ GPUArraysCore ~/.julia/packages/GPUArraysCore/aNaXo/src/GPUArraysCore.jl:151
[3] _assertscalar(op::String, behavior::GPUArraysCore.ScalarIndexing)
@ GPUArraysCore ~/.julia/packages/GPUArraysCore/aNaXo/src/GPUArraysCore.jl:124
[4] assertscalar(op::String)
@ GPUArraysCore ~/.julia/packages/GPUArraysCore/aNaXo/src/GPUArraysCore.jl:112
[5] getindex
@ ~/.julia/packages/GPUArrays/DPs2B/src/host/indexing.jl:50 [inlined]
[6] getindex
@ ~/.julia/juliaup/julia-1.12.6+0.x64.linux.gnu/share/julia/stdlib/v1.12/LinearAlgebra/src/diagonal.jl:182 [inlined]
[7] getindex
@ ./subarray.jl:316 [inlined]
[8] getindex
@ ~/.julia/juliaup/julia-1.12.6+0.x64.linux.gnu/share/julia/stdlib/v1.12/LinearAlgebra/src/triangular.jl:243 [inlined]
[9] macro expansion
@ ~/.julia/juliaup/julia-1.12.6+0.x64.linux.gnu/share/julia/stdlib/v1.12/LinearAlgebra/src/generic.jl:100 [inlined] [10] _generic_matmatmul_nonadjtrans!(C::CUDACore.CuArray{Float64, 2, CUDACore.DeviceMemory}, A::LinearAlgebra.UpperTriangular{Float64, LinearAlgebra.Adjoint{Float64, SubArray{Float64, 2, LinearAlgebra.Diagonal{Float64, CUDACore.CuArray{Float64, 1, CUDACore.DeviceMemory}}, Tuple{UnitRange{Int64}, UnitRange{Int64}}, false}}}, B::LinearAlgebra.LowerTriangular{Float64, SubArray{Float64, 2, LinearAlgebra.Diagonal{Float64, CUDACore.CuArray{Float64, 1, CUDACore.DeviceMemory}}, Tuple{UnitRange{Int64}, UnitRange{Int64}}, false}}, alpha::Int64, beta::Int64)
@ LinearAlgebra ~/.julia/juliaup/julia-1.12.6+0.x64.linux.gnu/share/julia/stdlib/v1.12/LinearAlgebra/src/matmul.jl:1033
[11] __generic_matmatmul!
@ ~/.julia/juliaup/julia-1.12.6+0.x64.linux.gnu/share/julia/stdlib/v1.12/LinearAlgebra/src/matmul.jl:1025 [inlined]
[12] _generic_matmatmul!(C::CUDACore.CuArray{Float64, 2, CUDACore.DeviceMemory}, A::LinearAlgebra.UpperTriangular{Float64, LinearAlgebra.Adjoint{Float64, SubArray{Float64, 2, LinearAlgebra.Diagonal{Float64, CUDACore.CuArray{Float64, 1, CUDACore.DeviceMemory}}, Tuple{UnitRange{Int64}, UnitRange{Int64}}, false}}}, B::LinearAlgebra.LowerTriangular{Float64, SubArray{Float64, 2, LinearAlgebra.Diagonal{Float64, CUDACore.CuArray{Float64, 1, CUDACore.DeviceMemory}}, Tuple{UnitRange{Int64}, UnitRange{Int64}}, false}}, alpha::Int64, beta::Int64)
@ LinearAlgebra ~/.julia/juliaup/julia-1.12.6+0.x64.linux.gnu/share/julia/stdlib/v1.12/LinearAlgebra/src/matmul.jl:1019
[13] generic_matmatmul!
@ ~/.julia/juliaup/julia-1.12.6+0.x64.linux.gnu/share/julia/stdlib/v1.12/LinearAlgebra/src/matmul.jl:1009 [inlined]
[14] _mul!
@ ~/.julia/juliaup/julia-1.12.6+0.x64.linux.gnu/share/julia/stdlib/v1.12/LinearAlgebra/src/triangular.jl:1159 [inlined]
[15] mul!
@ ~/.julia/juliaup/julia-1.12.6+0.x64.linux.gnu/share/julia/stdlib/v1.12/LinearAlgebra/src/matmul.jl:297 [inlined]
[16] lq_pullback!(ΔA::LinearAlgebra.Diagonal{Float64, CUDACore.CuArray{Float64, 1, CUDACore.DeviceMemory}}, A::LinearAlgebra.Diagonal{Float64, CUDACore.CuArray{Float64, 1, CUDACore.DeviceMemory}}, LQ::Tuple{LinearAlgebra.Diagonal{Float64, CUDACore.CuArray{Float64, 1, CUDACore.DeviceMemory}}, LinearAlgebra.Diagonal{Float64, CUDACore.CuArray{Float64, 1, CUDACore.DeviceMemory}}}, ΔLQ::Tuple{LinearAlgebra.Diagonal{Float64, CUDACore.CuArray{Float64, 1, CUDACore.DeviceMemory}}, LinearAlgebra.Diagonal{Float64, CUDACore.CuArray{Float64, 1, CUDACore.DeviceMemory}}}; rank_atol::Float64, gauge_atol::Float64)
@ MatrixAlgebraKit ~/.julia/dev/MatrixAlgebraKit/src/pullbacks/lq.jl:93
https://github.com/QuantumKitHub/MatrixAlgebraKit.jl/blob/main/src/pullbacks/lq.jl#L93 offending line is here, the stacktrace is:
This one might also need an upstream fix at GPUArrays.jl