From af120d4810634fb08a425f31552e1cd93f1f5ec8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20CHEN?= Date: Fri, 19 Jun 2026 01:00:46 +0800 Subject: [PATCH 1/5] refactor: split CPUBackend::Render() and CUDABackend::Render() (closes #48) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CPUBackend: extract RenderISPC() and RenderCPUFallback() private methods. CUDABackend: extract UploadSceneAndBVHIfChanged() and CopyInteropToVulkan(). Render() sizes: CPUBackend 205→30 lines, CUDABackend 192→110 lines. --- RayTracing/src/CPUBackend.cpp | 266 ++++++++++++++++----------------- RayTracing/src/CPUBackend.h | 5 + RayTracing/src/CUDABackend.cpp | 173 ++++++++++----------- RayTracing/src/CUDABackend.h | 3 + 4 files changed, 228 insertions(+), 219 deletions(-) diff --git a/RayTracing/src/CPUBackend.cpp b/RayTracing/src/CPUBackend.cpp index ab5002b..f77d1bd 100644 --- a/RayTracing/src/CPUBackend.cpp +++ b/RayTracing/src/CPUBackend.cpp @@ -47,7 +47,7 @@ void CPUBackend::Render( m_ActiveScene = &scene; m_ActiveCamera = &camera; m_MaxBounces = maxBounces; - + m_AccumFrameIndex = frameIndex; if (scene.Version != m_LastBvhSceneVersion) { m_BVH.Build(scene); @@ -62,143 +62,148 @@ void CPUBackend::Render( ); #ifdef PN_ISPC - // ═══════════════════════════════════════════════ - // ISPC-Accelerated Path (SIMD vectorized) - // Replaces the inner pixel loop with ISPC foreach - // ═══════════════════════════════════════════════ + RenderISPC(camera, scene, outputBuffer, frameIndex); +#else + RenderCPUFallback(outputBuffer); +#endif +} + +#ifdef PN_ISPC +void CPUBackend::RenderISPC(const Camera& camera, const Scene& scene, uint32_t* outputBuffer, uint32_t frameIndex) +{ + const uint32_t width = m_Width; + const uint32_t height = m_Height; + const size_t pixelCount = static_cast(width) * static_cast(height); + + const auto& rayDirs = camera.GetRayDirections(); + const glm::vec3& camPos = camera.GetPosition(); + + // ── Pack ray directions into SoA flat arrays ── + m_ISPCRayDirX.resize(pixelCount); + m_ISPCRayDirY.resize(pixelCount); + m_ISPCRayDirZ.resize(pixelCount); + for (size_t i = 0; i < pixelCount; i++) { + m_ISPCRayDirX[i] = rayDirs[i].x; + m_ISPCRayDirY[i] = rayDirs[i].y; + m_ISPCRayDirZ[i] = rayDirs[i].z; + } + + // ── Pack scene spheres (SoA layout) ── + const uint32_t sphereCount = static_cast(scene.Spheres.size()); + if (scene.Version != m_LastISPCSceneVersion) { - const uint32_t width = m_Width; - const uint32_t height = m_Height; - const size_t pixelCount = static_cast(width) * static_cast(height); - - const auto& rayDirs = camera.GetRayDirections(); - const glm::vec3& camPos = camera.GetPosition(); - - // ── Pack ray directions into SoA flat arrays ── - m_ISPCRayDirX.resize(pixelCount); - m_ISPCRayDirY.resize(pixelCount); - m_ISPCRayDirZ.resize(pixelCount); - for (size_t i = 0; i < pixelCount; i++) { - m_ISPCRayDirX[i] = rayDirs[i].x; - m_ISPCRayDirY[i] = rayDirs[i].y; - m_ISPCRayDirZ[i] = rayDirs[i].z; + m_ISCPSphPosX.resize(sphereCount); + m_ISCPSphPosY.resize(sphereCount); + m_ISCPSphPosZ.resize(sphereCount); + m_ISCPSphRadius.resize(sphereCount); + m_ISCPSphMatIdx.resize(sphereCount); + for (uint32_t i = 0; i < sphereCount; i++) { + const auto& s = scene.Spheres[i]; + m_ISCPSphPosX[i] = s.Position.x; + m_ISCPSphPosY[i] = s.Position.y; + m_ISCPSphPosZ[i] = s.Position.z; + m_ISCPSphRadius[i] = s.Radius; + m_ISCPSphMatIdx[i] = s.MaterialIndex; } - // ── Pack scene spheres (SoA layout) ── - const uint32_t sphereCount = static_cast(scene.Spheres.size()); - if (scene.Version != m_LastISPCSceneVersion) - { - m_ISCPSphPosX.resize(sphereCount); - m_ISCPSphPosY.resize(sphereCount); - m_ISCPSphPosZ.resize(sphereCount); - m_ISCPSphRadius.resize(sphereCount); - m_ISCPSphMatIdx.resize(sphereCount); - for (uint32_t i = 0; i < sphereCount; i++) { - const auto& s = scene.Spheres[i]; - m_ISCPSphPosX[i] = s.Position.x; - m_ISCPSphPosY[i] = s.Position.y; - m_ISCPSphPosZ[i] = s.Position.z; - m_ISCPSphRadius[i] = s.Radius; - m_ISCPSphMatIdx[i] = s.MaterialIndex; - } - // ── Pack materials (SoA layout) ── const uint32_t matCount = static_cast(scene.Materials.size()); - m_ISPCMatAlbedoR.resize(matCount); - m_ISPCMatAlbedoG.resize(matCount); - m_ISPCMatAlbedoB.resize(matCount); - m_ISPCMatRoughness.resize(matCount); - m_ISPCMatMetallic.resize(matCount); - m_ISPCMatEmissionR.resize(matCount); - m_ISPCMatEmissionG.resize(matCount); - m_ISPCMatEmissionB.resize(matCount); - m_ISPCMatEmissionPower.resize(matCount); - for (uint32_t i = 0; i < matCount; i++) { - const auto& m = scene.Materials[i]; - m_ISPCMatAlbedoR[i] = m.Albedo.x; - m_ISPCMatAlbedoG[i] = m.Albedo.y; - m_ISPCMatAlbedoB[i] = m.Albedo.z; - m_ISPCMatRoughness[i] = m.Roughness; - m_ISPCMatMetallic[i] = m.Metallic; - m_ISPCMatEmissionR[i] = m.EmissionColor.x; - m_ISPCMatEmissionG[i] = m.EmissionColor.y; - m_ISPCMatEmissionB[i] = m.EmissionColor.z; - m_ISPCMatEmissionPower[i] = m.EmissionPower; - } - m_LastISPCSceneVersion = scene.Version; + m_ISPCMatAlbedoR.resize(matCount); + m_ISPCMatAlbedoG.resize(matCount); + m_ISPCMatAlbedoB.resize(matCount); + m_ISPCMatRoughness.resize(matCount); + m_ISPCMatMetallic.resize(matCount); + m_ISPCMatEmissionR.resize(matCount); + m_ISPCMatEmissionG.resize(matCount); + m_ISPCMatEmissionB.resize(matCount); + m_ISPCMatEmissionPower.resize(matCount); + for (uint32_t i = 0; i < matCount; i++) { + const auto& m = scene.Materials[i]; + m_ISPCMatAlbedoR[i] = m.Albedo.x; + m_ISPCMatAlbedoG[i] = m.Albedo.y; + m_ISPCMatAlbedoB[i] = m.Albedo.z; + m_ISPCMatRoughness[i] = m.Roughness; + m_ISPCMatMetallic[i] = m.Metallic; + m_ISPCMatEmissionR[i] = m.EmissionColor.x; + m_ISPCMatEmissionG[i] = m.EmissionColor.y; + m_ISPCMatEmissionB[i] = m.EmissionColor.z; + m_ISPCMatEmissionPower[i] = m.EmissionPower; } + m_LastISPCSceneVersion = scene.Version; + } - // ── Pack BVH (SoA layout) ── - { - const auto& bvhNodes = m_BVH.Nodes(); - const auto& bvhSpIndices = m_BVH.SphereIndices(); - const size_t bvhCount = bvhNodes.size(); - - m_ISPCBvhMinX.resize(bvhCount); - m_ISPCBvhMinY.resize(bvhCount); - m_ISPCBvhMinZ.resize(bvhCount); - m_ISPCBvhMaxX.resize(bvhCount); - m_ISPCBvhMaxY.resize(bvhCount); - m_ISPCBvhMaxZ.resize(bvhCount); - m_ISPCBvhLeftFirst.resize(bvhCount); - m_ISPCBvhCount.resize(bvhCount); - for (size_t i = 0; i < bvhCount; i++) { - const auto& node = bvhNodes[i]; - m_ISPCBvhMinX[i] = node.Bounds.Min.x; - m_ISPCBvhMinY[i] = node.Bounds.Min.y; - m_ISPCBvhMinZ[i] = node.Bounds.Min.z; - m_ISPCBvhMaxX[i] = node.Bounds.Max.x; - m_ISPCBvhMaxY[i] = node.Bounds.Max.y; - m_ISPCBvhMaxZ[i] = node.Bounds.Max.z; - m_ISPCBvhLeftFirst[i] = node.LeftFirst; - m_ISPCBvhCount[i] = node.Count; - } - - m_ISPCBvhSphereIndices.assign(bvhSpIndices.begin(), bvhSpIndices.end()); + // ── Pack BVH (SoA layout) ── + { + const auto& bvhNodes = m_BVH.Nodes(); + const auto& bvhSpIndices = m_BVH.SphereIndices(); + const size_t bvhCount = bvhNodes.size(); + + m_ISPCBvhMinX.resize(bvhCount); + m_ISPCBvhMinY.resize(bvhCount); + m_ISPCBvhMinZ.resize(bvhCount); + m_ISPCBvhMaxX.resize(bvhCount); + m_ISPCBvhMaxY.resize(bvhCount); + m_ISPCBvhMaxZ.resize(bvhCount); + m_ISPCBvhLeftFirst.resize(bvhCount); + m_ISPCBvhCount.resize(bvhCount); + for (size_t i = 0; i < bvhCount; i++) { + const auto& node = bvhNodes[i]; + m_ISPCBvhMinX[i] = node.Bounds.Min.x; + m_ISPCBvhMinY[i] = node.Bounds.Min.y; + m_ISPCBvhMinZ[i] = node.Bounds.Min.z; + m_ISPCBvhMaxX[i] = node.Bounds.Max.x; + m_ISPCBvhMaxY[i] = node.Bounds.Max.y; + m_ISPCBvhMaxZ[i] = node.Bounds.Max.z; + m_ISPCBvhLeftFirst[i] = node.LeftFirst; + m_ISPCBvhCount[i] = node.Count; } - // ── Output buffers ── - m_ISPCOutputR.resize(pixelCount); - m_ISPCOutputG.resize(pixelCount); - m_ISPCOutputB.resize(pixelCount); - m_ISPCOutputA.resize(pixelCount); - - // ── Call ISPC kernel ── - ispc::ISPCRenderPixels( - camPos.x, camPos.y, camPos.z, - m_ISPCRayDirX.data(), m_ISPCRayDirY.data(), m_ISPCRayDirZ.data(), - m_ISCPSphPosX.data(), m_ISCPSphPosY.data(), m_ISCPSphPosZ.data(), - m_ISCPSphRadius.data(), m_ISCPSphMatIdx.data(), - m_ISPCMatAlbedoR.data(), m_ISPCMatAlbedoG.data(), m_ISPCMatAlbedoB.data(), - m_ISPCMatRoughness.data(), m_ISPCMatMetallic.data(), - m_ISPCMatEmissionR.data(), m_ISPCMatEmissionG.data(), m_ISPCMatEmissionB.data(), - m_ISPCMatEmissionPower.data(), - m_ISPCOutputR.data(), m_ISPCOutputG.data(), m_ISPCOutputB.data(), m_ISPCOutputA.data(), - static_cast(pixelCount), static_cast(sphereCount), - // BVH data - m_ISPCBvhMinX.data(), m_ISPCBvhMinY.data(), m_ISPCBvhMinZ.data(), - m_ISPCBvhMaxX.data(), m_ISPCBvhMaxY.data(), m_ISPCBvhMaxZ.data(), - m_ISPCBvhLeftFirst.data(), m_ISPCBvhCount.data(), - m_ISPCBvhSphereIndices.data(), - static_cast(m_ISPCBvhLeftFirst.size()), - static_cast(frameIndex), maxBounces - ); - - // ── Unpack: accumulate + tone map + RGBA convert ── - for (size_t i = 0; i < pixelCount; i++) { - glm::vec4 color(m_ISPCOutputR[i], m_ISPCOutputG[i], m_ISPCOutputB[i], m_ISPCOutputA[i]); - m_AccumulationData[i] += color; + m_ISPCBvhSphereIndices.assign(bvhSpIndices.begin(), bvhSpIndices.end()); + } - glm::vec4 accumulated = m_AccumulationData[i]; - accumulated /= static_cast(frameIndex); - accumulated = glm::clamp(accumulated, glm::vec4(0.0f), glm::vec4(1.0f)); - outputBuffer[i] = PathTracerCore::ConvertToRGBA(accumulated); - } + // ── Output buffers ── + m_ISPCOutputR.resize(pixelCount); + m_ISPCOutputG.resize(pixelCount); + m_ISPCOutputB.resize(pixelCount); + m_ISPCOutputA.resize(pixelCount); + + // ── Call ISPC kernel ── + ispc::ISPCRenderPixels( + camPos.x, camPos.y, camPos.z, + m_ISPCRayDirX.data(), m_ISPCRayDirY.data(), m_ISPCRayDirZ.data(), + m_ISCPSphPosX.data(), m_ISCPSphPosY.data(), m_ISCPSphPosZ.data(), + m_ISCPSphRadius.data(), m_ISCPSphMatIdx.data(), + m_ISPCMatAlbedoR.data(), m_ISPCMatAlbedoG.data(), m_ISPCMatAlbedoB.data(), + m_ISPCMatRoughness.data(), m_ISPCMatMetallic.data(), + m_ISPCMatEmissionR.data(), m_ISPCMatEmissionG.data(), m_ISPCMatEmissionB.data(), + m_ISPCMatEmissionPower.data(), + m_ISPCOutputR.data(), m_ISPCOutputG.data(), m_ISPCOutputB.data(), m_ISPCOutputA.data(), + static_cast(pixelCount), static_cast(sphereCount), + // BVH data + m_ISPCBvhMinX.data(), m_ISPCBvhMinY.data(), m_ISPCBvhMinZ.data(), + m_ISPCBvhMaxX.data(), m_ISPCBvhMaxY.data(), m_ISPCBvhMaxZ.data(), + m_ISPCBvhLeftFirst.data(), m_ISPCBvhCount.data(), + m_ISPCBvhSphereIndices.data(), + static_cast(m_ISPCBvhLeftFirst.size()), + static_cast(frameIndex), m_MaxBounces + ); + + // ── Unpack: accumulate + tone map + RGBA convert ── + for (size_t i = 0; i < pixelCount; i++) { + glm::vec4 color(m_ISPCOutputR[i], m_ISPCOutputG[i], m_ISPCOutputB[i], m_ISPCOutputA[i]); + m_AccumulationData[i] += color; + + glm::vec4 accumulated = m_AccumulationData[i]; + accumulated /= static_cast(frameIndex); + accumulated = glm::clamp(accumulated, glm::vec4(0.0f), glm::vec4(1.0f)); + outputBuffer[i] = PathTracerCore::ConvertToRGBA(accumulated); } -#else - // ── C++ Scalar / std::execution::par Fallback ── +} +#endif + +void CPUBackend::RenderCPUFallback(uint32_t* outputBuffer) +{ static constexpr bool kMultithreaded = true; - m_AccumFrameIndex = frameIndex; if constexpr (kMultithreaded) { std::for_each( std::execution::par, @@ -217,12 +222,8 @@ void CPUBackend::Render( glm::vec4 accumulatedColor = m_AccumulationData[idx]; accumulatedColor /= static_cast(m_AccumFrameIndex); - accumulatedColor = glm::clamp( - accumulatedColor, - glm::vec4(0.0f), glm::vec4(1.0f) - ); - outputBuffer[idx] - = PathTracerCore::ConvertToRGBA(accumulatedColor); + accumulatedColor = glm::clamp(accumulatedColor, glm::vec4(0.0f), glm::vec4(1.0f)); + outputBuffer[idx] = PathTracerCore::ConvertToRGBA(accumulatedColor); }); }); } else { @@ -240,7 +241,6 @@ void CPUBackend::Render( } } } -#endif // PN_ISPC } // ────────────────────────────────────────────── diff --git a/RayTracing/src/CPUBackend.h b/RayTracing/src/CPUBackend.h index 081104b..a0732bf 100644 --- a/RayTracing/src/CPUBackend.h +++ b/RayTracing/src/CPUBackend.h @@ -39,6 +39,11 @@ class CPUBackend : public IRenderBackend ) const noexcept; static HitPayLoad Miss(const Ray& ray) noexcept; + void RenderCPUFallback(uint32_t* outputBuffer); +#ifdef PN_ISPC + void RenderISPC(const Camera& camera, const Scene& scene, uint32_t* outputBuffer, uint32_t frameIndex); +#endif + private: uint32_t m_Width = 0; uint32_t m_Height = 0; diff --git a/RayTracing/src/CUDABackend.cpp b/RayTracing/src/CUDABackend.cpp index cccd4b2..40f6201 100644 --- a/RayTracing/src/CUDABackend.cpp +++ b/RayTracing/src/CUDABackend.cpp @@ -154,41 +154,8 @@ void CUDABackend::Render( } } - // Upload scene data only when changed (tracked by scene version) if (scene.Version != m_LastSceneVersion) - { - UploadSceneToGPU(scene); - m_LastSceneVersion = scene.Version; - - // Rebuild BVH and upload to GPU - m_BVH.Build(scene); - const auto& bvhNodes = m_BVH.Nodes(); - - m_GPUBVHNodes.resize(bvhNodes.size()); - for (size_t i = 0; i < bvhNodes.size(); i++) - { - m_GPUBVHNodes[i].BoundsMin[0] = bvhNodes[i].Bounds.Min.x; - m_GPUBVHNodes[i].BoundsMin[1] = bvhNodes[i].Bounds.Min.y; - m_GPUBVHNodes[i].BoundsMin[2] = bvhNodes[i].Bounds.Min.z; - m_GPUBVHNodes[i].BoundsMax[0] = bvhNodes[i].Bounds.Max.x; - m_GPUBVHNodes[i].BoundsMax[1] = bvhNodes[i].Bounds.Max.y; - m_GPUBVHNodes[i].BoundsMax[2] = bvhNodes[i].Bounds.Max.z; - m_GPUBVHNodes[i].LeftFirst = bvhNodes[i].LeftFirst; - m_GPUBVHNodes[i].Count = bvhNodes[i].Count; - } - - // Copy sorted sphere indices for GPU leaf resolution - const auto& sphereIndices = m_BVH.SphereIndices(); - m_GPUSphereIndices = sphereIndices; - - CUDARenderer_UploadBVH( - m_CUDAState.get(), - m_GPUBVHNodes.data(), - static_cast(m_GPUBVHNodes.size()), - m_GPUSphereIndices.data(), - static_cast(m_GPUSphereIndices.size()) - ); - } + UploadSceneAndBVHIfChanged(scene); // Upload camera position const glm::vec3& camPos = camera.GetPosition(); @@ -245,58 +212,7 @@ void CUDABackend::Render( if (m_InteropEnabled && m_Interop) { m_Interop->SyncCUDAComplete(reinterpret_cast(CUDARenderer_GetComputeStream(m_CUDAState.get()))); - - // ── Interop path: CUDA wrote to Vulkan buffer, copy to Peanut's VkImage ── - VkCommandBuffer cmd = Peanut::Application::GetCommandBuffer(true); - const VkImage dstImage = m_DestinationImage; - - // Buffer barrier: external (CUDA) write → Vulkan transfer read - VkBufferMemoryBarrier bufBarrier = {}; - bufBarrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; - bufBarrier.srcAccessMask = 0; - bufBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - bufBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - bufBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - bufBarrier.buffer = m_Interop->GetVulkanBuffer(); - bufBarrier.size = VK_WHOLE_SIZE; - vkCmdPipelineBarrier(cmd, - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, - 0, 0, nullptr, 1, &bufBarrier, 0, nullptr); - - VkImageMemoryBarrier preBarrier = {}; - preBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - preBarrier.oldLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - preBarrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; - preBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - preBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - preBarrier.image = dstImage; - preBarrier.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }; - preBarrier.srcAccessMask = 0; - preBarrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &preBarrier); - - VkBufferImageCopy region = {}; - region.imageSubresource = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1 }; - region.imageExtent = { m_Interop->GetWidth(), m_Interop->GetHeight(), 1 }; - vkCmdCopyBufferToImage(cmd, m_Interop->GetVulkanBuffer(), dstImage, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion); - - VkImageMemoryBarrier postBarrier = {}; - postBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - postBarrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; - postBarrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - postBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - postBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - postBarrier.image = dstImage; - postBarrier.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }; - postBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - postBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &postBarrier); - - Peanut::Application::FlushCommandBuffer(cmd); + CopyInteropToVulkan(); } else { @@ -308,4 +224,89 @@ void CUDABackend::Render( } } +void CUDABackend::UploadSceneAndBVHIfChanged(const Scene& scene) +{ + UploadSceneToGPU(scene); + m_LastSceneVersion = scene.Version; + + // Rebuild BVH and upload to GPU + m_BVH.Build(scene); + const auto& bvhNodes = m_BVH.Nodes(); + + m_GPUBVHNodes.resize(bvhNodes.size()); + for (size_t i = 0; i < bvhNodes.size(); i++) + { + m_GPUBVHNodes[i].BoundsMin[0] = bvhNodes[i].Bounds.Min.x; + m_GPUBVHNodes[i].BoundsMin[1] = bvhNodes[i].Bounds.Min.y; + m_GPUBVHNodes[i].BoundsMin[2] = bvhNodes[i].Bounds.Min.z; + m_GPUBVHNodes[i].BoundsMax[0] = bvhNodes[i].Bounds.Max.x; + m_GPUBVHNodes[i].BoundsMax[1] = bvhNodes[i].Bounds.Max.y; + m_GPUBVHNodes[i].BoundsMax[2] = bvhNodes[i].Bounds.Max.z; + m_GPUBVHNodes[i].LeftFirst = bvhNodes[i].LeftFirst; + m_GPUBVHNodes[i].Count = bvhNodes[i].Count; + } + + const auto& sphereIndices = m_BVH.SphereIndices(); + m_GPUSphereIndices = sphereIndices; + + CUDARenderer_UploadBVH( + m_CUDAState.get(), + m_GPUBVHNodes.data(), + static_cast(m_GPUBVHNodes.size()), + m_GPUSphereIndices.data(), + static_cast(m_GPUSphereIndices.size()) + ); +} + +void CUDABackend::CopyInteropToVulkan() +{ + VkCommandBuffer cmd = Peanut::Application::GetCommandBuffer(true); + const VkImage dstImage = m_DestinationImage; + + VkBufferMemoryBarrier bufBarrier = {}; + bufBarrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + bufBarrier.srcAccessMask = 0; + bufBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + bufBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufBarrier.buffer = m_Interop->GetVulkanBuffer(); + bufBarrier.size = VK_WHOLE_SIZE; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 1, &bufBarrier, 0, nullptr); + + VkImageMemoryBarrier preBarrier = {}; + preBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + preBarrier.oldLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + preBarrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + preBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + preBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + preBarrier.image = dstImage; + preBarrier.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }; + preBarrier.srcAccessMask = 0; + preBarrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &preBarrier); + + VkBufferImageCopy region = {}; + region.imageSubresource = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1 }; + region.imageExtent = { m_Interop->GetWidth(), m_Interop->GetHeight(), 1 }; + vkCmdCopyBufferToImage(cmd, m_Interop->GetVulkanBuffer(), dstImage, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion); + + VkImageMemoryBarrier postBarrier = {}; + postBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + postBarrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + postBarrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + postBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + postBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + postBarrier.image = dstImage; + postBarrier.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }; + postBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + postBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &postBarrier); + + Peanut::Application::FlushCommandBuffer(cmd); +} + #endif // PN_CUDA diff --git a/RayTracing/src/CUDABackend.h b/RayTracing/src/CUDABackend.h index 26c9221..dbb9702 100644 --- a/RayTracing/src/CUDABackend.h +++ b/RayTracing/src/CUDABackend.h @@ -64,7 +64,10 @@ class CUDABackend : public IRenderBackend private: // ── Scene upload (host → GPU packing + cudaMemcpy) ── void UploadSceneToGPU(const Scene& scene); + void UploadSceneAndBVHIfChanged(const Scene& scene); + // ── Vulkan interop output ── + void CopyInteropToVulkan(); // ── CUDA state ── std::unique_ptr m_CUDAState; std::unique_ptr m_Interop; From c97c6983692c45a4c301949a18fb9bff3f38dffb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20CHEN?= Date: Fri, 19 Jun 2026 01:06:43 +0800 Subject: [PATCH 2/5] fix: add blank line between method and member section (review feedback) --- RayTracing/src/CUDABackend.h | 1 + 1 file changed, 1 insertion(+) diff --git a/RayTracing/src/CUDABackend.h b/RayTracing/src/CUDABackend.h index dbb9702..18a9a8c 100644 --- a/RayTracing/src/CUDABackend.h +++ b/RayTracing/src/CUDABackend.h @@ -68,6 +68,7 @@ class CUDABackend : public IRenderBackend // ── Vulkan interop output ── void CopyInteropToVulkan(); + // ── CUDA state ── std::unique_ptr m_CUDAState; std::unique_ptr m_Interop; From 786807dcf035bf9d8ecc021b9af3b90664a2104c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20CHEN?= Date: Fri, 19 Jun 2026 01:42:19 +0800 Subject: [PATCH 3/5] fix: capture outputBuffer in lambda after RenderCPUFallback extraction (CI fix) After extracting RenderCPUFallback(), outputBuffer became a function parameter instead of being in the outer scope. The inner lambda at line 215 needs explicit capture. --- RayTracing/src/CPUBackend.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RayTracing/src/CPUBackend.cpp b/RayTracing/src/CPUBackend.cpp index f77d1bd..35b9536 100644 --- a/RayTracing/src/CPUBackend.cpp +++ b/RayTracing/src/CPUBackend.cpp @@ -212,7 +212,7 @@ void CPUBackend::RenderCPUFallback(uint32_t* outputBuffer) { std::ranges::for_each( m_ImageHorizontalIterator.begin(), m_ImageHorizontalIterator.end(), - [this, y, width = m_Width](const uint32_t x) + [this, y, outputBuffer, width = m_Width](const uint32_t x) { const glm::vec4 color = PerPixel(x, y); const size_t idx = static_cast(x) + static_cast(y) * static_cast(width); From 221190a76b2e8d7b49949a97e3005ce37f6a5e42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20CHEN?= Date: Fri, 19 Jun 2026 01:48:00 +0800 Subject: [PATCH 4/5] fix: add outputBuffer to outer lambda capture (MSVC requires explicit capture through all nested levels) --- RayTracing/src/CPUBackend.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RayTracing/src/CPUBackend.cpp b/RayTracing/src/CPUBackend.cpp index 35b9536..c81fb99 100644 --- a/RayTracing/src/CPUBackend.cpp +++ b/RayTracing/src/CPUBackend.cpp @@ -208,7 +208,7 @@ void CPUBackend::RenderCPUFallback(uint32_t* outputBuffer) std::for_each( std::execution::par, m_ImageVerticalIterator.begin(), m_ImageVerticalIterator.end(), - [this](uint32_t y) + [this, outputBuffer](uint32_t y) { std::ranges::for_each( m_ImageHorizontalIterator.begin(), m_ImageHorizontalIterator.end(), From 2a7685b48ca850d79ec125a2ff41ec23737b9e1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20CHEN?= Date: Fri, 19 Jun 2026 01:52:31 +0800 Subject: [PATCH 5/5] fix: fix indentation in RenderCPUFallback, guard BVH ISPC packing with scene version (review feedback) Lines 225-226 had wrong indent level (5 instead of 6 tabs). BVH SoA packing now inside if(scene.Version != m_LastISPCSceneVersion), avoiding wasted work on consecutive frames when scene hasn't changed. --- RayTracing/src/CPUBackend.cpp | 58 +++++++++++++++++------------------ 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/RayTracing/src/CPUBackend.cpp b/RayTracing/src/CPUBackend.cpp index c81fb99..ae5370e 100644 --- a/RayTracing/src/CPUBackend.cpp +++ b/RayTracing/src/CPUBackend.cpp @@ -130,35 +130,35 @@ void CPUBackend::RenderISPC(const Camera& camera, const Scene& scene, uint32_t* m_ISPCMatEmissionPower[i] = m.EmissionPower; } m_LastISPCSceneVersion = scene.Version; - } - // ── Pack BVH (SoA layout) ── - { - const auto& bvhNodes = m_BVH.Nodes(); - const auto& bvhSpIndices = m_BVH.SphereIndices(); - const size_t bvhCount = bvhNodes.size(); - - m_ISPCBvhMinX.resize(bvhCount); - m_ISPCBvhMinY.resize(bvhCount); - m_ISPCBvhMinZ.resize(bvhCount); - m_ISPCBvhMaxX.resize(bvhCount); - m_ISPCBvhMaxY.resize(bvhCount); - m_ISPCBvhMaxZ.resize(bvhCount); - m_ISPCBvhLeftFirst.resize(bvhCount); - m_ISPCBvhCount.resize(bvhCount); - for (size_t i = 0; i < bvhCount; i++) { - const auto& node = bvhNodes[i]; - m_ISPCBvhMinX[i] = node.Bounds.Min.x; - m_ISPCBvhMinY[i] = node.Bounds.Min.y; - m_ISPCBvhMinZ[i] = node.Bounds.Min.z; - m_ISPCBvhMaxX[i] = node.Bounds.Max.x; - m_ISPCBvhMaxY[i] = node.Bounds.Max.y; - m_ISPCBvhMaxZ[i] = node.Bounds.Max.z; - m_ISPCBvhLeftFirst[i] = node.LeftFirst; - m_ISPCBvhCount[i] = node.Count; - } + // ── Pack BVH (SoA layout) ── + { + const auto& bvhNodes = m_BVH.Nodes(); + const auto& bvhSpIndices = m_BVH.SphereIndices(); + const size_t bvhCount = bvhNodes.size(); + + m_ISPCBvhMinX.resize(bvhCount); + m_ISPCBvhMinY.resize(bvhCount); + m_ISPCBvhMinZ.resize(bvhCount); + m_ISPCBvhMaxX.resize(bvhCount); + m_ISPCBvhMaxY.resize(bvhCount); + m_ISPCBvhMaxZ.resize(bvhCount); + m_ISPCBvhLeftFirst.resize(bvhCount); + m_ISPCBvhCount.resize(bvhCount); + for (size_t i = 0; i < bvhCount; i++) { + const auto& node = bvhNodes[i]; + m_ISPCBvhMinX[i] = node.Bounds.Min.x; + m_ISPCBvhMinY[i] = node.Bounds.Min.y; + m_ISPCBvhMinZ[i] = node.Bounds.Min.z; + m_ISPCBvhMaxX[i] = node.Bounds.Max.x; + m_ISPCBvhMaxY[i] = node.Bounds.Max.y; + m_ISPCBvhMaxZ[i] = node.Bounds.Max.z; + m_ISPCBvhLeftFirst[i] = node.LeftFirst; + m_ISPCBvhCount[i] = node.Count; + } - m_ISPCBvhSphereIndices.assign(bvhSpIndices.begin(), bvhSpIndices.end()); + m_ISPCBvhSphereIndices.assign(bvhSpIndices.begin(), bvhSpIndices.end()); + } } // ── Output buffers ── @@ -222,8 +222,8 @@ void CPUBackend::RenderCPUFallback(uint32_t* outputBuffer) glm::vec4 accumulatedColor = m_AccumulationData[idx]; accumulatedColor /= static_cast(m_AccumFrameIndex); - accumulatedColor = glm::clamp(accumulatedColor, glm::vec4(0.0f), glm::vec4(1.0f)); - outputBuffer[idx] = PathTracerCore::ConvertToRGBA(accumulatedColor); + accumulatedColor = glm::clamp(accumulatedColor, glm::vec4(0.0f), glm::vec4(1.0f)); + outputBuffer[idx] = PathTracerCore::ConvertToRGBA(accumulatedColor); }); }); } else {