diff --git a/RayTracing/src/CPUBackend.cpp b/RayTracing/src/CPUBackend.cpp index ab5002b..ae5370e 100644 --- a/RayTracing/src/CPUBackend.cpp +++ b/RayTracing/src/CPUBackend.cpp @@ -47,7 +47,7 @@ void CPUBackend::Render( m_ActiveScene = &scene; m_ActiveCamera = &camera; m_MaxBounces = maxBounces; - + m_AccumFrameIndex = frameIndex; if (scene.Version != m_LastBvhSceneVersion) { m_BVH.Build(scene); @@ -62,71 +62,74 @@ void CPUBackend::Render( ); #ifdef PN_ISPC - // ═══════════════════════════════════════════════ - // ISPC-Accelerated Path (SIMD vectorized) - // Replaces the inner pixel loop with ISPC foreach - // ═══════════════════════════════════════════════ + RenderISPC(camera, scene, outputBuffer, frameIndex); +#else + RenderCPUFallback(outputBuffer); +#endif +} + +#ifdef PN_ISPC +void CPUBackend::RenderISPC(const Camera& camera, const Scene& scene, uint32_t* outputBuffer, uint32_t frameIndex) +{ + const uint32_t width = m_Width; + const uint32_t height = m_Height; + const size_t pixelCount = static_cast(width) * static_cast(height); + + const auto& rayDirs = camera.GetRayDirections(); + const glm::vec3& camPos = camera.GetPosition(); + + // ── Pack ray directions into SoA flat arrays ── + m_ISPCRayDirX.resize(pixelCount); + m_ISPCRayDirY.resize(pixelCount); + m_ISPCRayDirZ.resize(pixelCount); + for (size_t i = 0; i < pixelCount; i++) { + m_ISPCRayDirX[i] = rayDirs[i].x; + m_ISPCRayDirY[i] = rayDirs[i].y; + m_ISPCRayDirZ[i] = rayDirs[i].z; + } + + // ── Pack scene spheres (SoA layout) ── + const uint32_t sphereCount = static_cast(scene.Spheres.size()); + if (scene.Version != m_LastISPCSceneVersion) { - const uint32_t width = m_Width; - const uint32_t height = m_Height; - const size_t pixelCount = static_cast(width) * static_cast(height); - - const auto& rayDirs = camera.GetRayDirections(); - const glm::vec3& camPos = camera.GetPosition(); - - // ── Pack ray directions into SoA flat arrays ── - m_ISPCRayDirX.resize(pixelCount); - m_ISPCRayDirY.resize(pixelCount); - m_ISPCRayDirZ.resize(pixelCount); - for (size_t i = 0; i < pixelCount; i++) { - m_ISPCRayDirX[i] = rayDirs[i].x; - m_ISPCRayDirY[i] = rayDirs[i].y; - m_ISPCRayDirZ[i] = rayDirs[i].z; + m_ISCPSphPosX.resize(sphereCount); + m_ISCPSphPosY.resize(sphereCount); + m_ISCPSphPosZ.resize(sphereCount); + m_ISCPSphRadius.resize(sphereCount); + m_ISCPSphMatIdx.resize(sphereCount); + for (uint32_t i = 0; i < sphereCount; i++) { + const auto& s = scene.Spheres[i]; + m_ISCPSphPosX[i] = s.Position.x; + m_ISCPSphPosY[i] = s.Position.y; + m_ISCPSphPosZ[i] = s.Position.z; + m_ISCPSphRadius[i] = s.Radius; + m_ISCPSphMatIdx[i] = s.MaterialIndex; } - // ── Pack scene spheres (SoA layout) ── - const uint32_t sphereCount = static_cast(scene.Spheres.size()); - if (scene.Version != m_LastISPCSceneVersion) - { - m_ISCPSphPosX.resize(sphereCount); - m_ISCPSphPosY.resize(sphereCount); - m_ISCPSphPosZ.resize(sphereCount); - m_ISCPSphRadius.resize(sphereCount); - m_ISCPSphMatIdx.resize(sphereCount); - for (uint32_t i = 0; i < sphereCount; i++) { - const auto& s = scene.Spheres[i]; - m_ISCPSphPosX[i] = s.Position.x; - m_ISCPSphPosY[i] = s.Position.y; - m_ISCPSphPosZ[i] = s.Position.z; - m_ISCPSphRadius[i] = s.Radius; - m_ISCPSphMatIdx[i] = s.MaterialIndex; - } - // ── Pack materials (SoA layout) ── const uint32_t matCount = static_cast(scene.Materials.size()); - m_ISPCMatAlbedoR.resize(matCount); - m_ISPCMatAlbedoG.resize(matCount); - m_ISPCMatAlbedoB.resize(matCount); - m_ISPCMatRoughness.resize(matCount); - m_ISPCMatMetallic.resize(matCount); - m_ISPCMatEmissionR.resize(matCount); - m_ISPCMatEmissionG.resize(matCount); - m_ISPCMatEmissionB.resize(matCount); - m_ISPCMatEmissionPower.resize(matCount); - for (uint32_t i = 0; i < matCount; i++) { - const auto& m = scene.Materials[i]; - m_ISPCMatAlbedoR[i] = m.Albedo.x; - m_ISPCMatAlbedoG[i] = m.Albedo.y; - m_ISPCMatAlbedoB[i] = m.Albedo.z; - m_ISPCMatRoughness[i] = m.Roughness; - m_ISPCMatMetallic[i] = m.Metallic; - m_ISPCMatEmissionR[i] = m.EmissionColor.x; - m_ISPCMatEmissionG[i] = m.EmissionColor.y; - m_ISPCMatEmissionB[i] = m.EmissionColor.z; - m_ISPCMatEmissionPower[i] = m.EmissionPower; - } - m_LastISPCSceneVersion = scene.Version; + m_ISPCMatAlbedoR.resize(matCount); + m_ISPCMatAlbedoG.resize(matCount); + m_ISPCMatAlbedoB.resize(matCount); + m_ISPCMatRoughness.resize(matCount); + m_ISPCMatMetallic.resize(matCount); + m_ISPCMatEmissionR.resize(matCount); + m_ISPCMatEmissionG.resize(matCount); + m_ISPCMatEmissionB.resize(matCount); + m_ISPCMatEmissionPower.resize(matCount); + for (uint32_t i = 0; i < matCount; i++) { + const auto& m = scene.Materials[i]; + m_ISPCMatAlbedoR[i] = m.Albedo.x; + m_ISPCMatAlbedoG[i] = m.Albedo.y; + m_ISPCMatAlbedoB[i] = m.Albedo.z; + m_ISPCMatRoughness[i] = m.Roughness; + m_ISPCMatMetallic[i] = m.Metallic; + m_ISPCMatEmissionR[i] = m.EmissionColor.x; + m_ISPCMatEmissionG[i] = m.EmissionColor.y; + m_ISPCMatEmissionB[i] = m.EmissionColor.z; + m_ISPCMatEmissionPower[i] = m.EmissionPower; } + m_LastISPCSceneVersion = scene.Version; // ── Pack BVH (SoA layout) ── { @@ -156,58 +159,60 @@ void CPUBackend::Render( m_ISPCBvhSphereIndices.assign(bvhSpIndices.begin(), bvhSpIndices.end()); } + } - // ── Output buffers ── - m_ISPCOutputR.resize(pixelCount); - m_ISPCOutputG.resize(pixelCount); - m_ISPCOutputB.resize(pixelCount); - m_ISPCOutputA.resize(pixelCount); - - // ── Call ISPC kernel ── - ispc::ISPCRenderPixels( - camPos.x, camPos.y, camPos.z, - m_ISPCRayDirX.data(), m_ISPCRayDirY.data(), m_ISPCRayDirZ.data(), - m_ISCPSphPosX.data(), m_ISCPSphPosY.data(), m_ISCPSphPosZ.data(), - m_ISCPSphRadius.data(), m_ISCPSphMatIdx.data(), - m_ISPCMatAlbedoR.data(), m_ISPCMatAlbedoG.data(), m_ISPCMatAlbedoB.data(), - m_ISPCMatRoughness.data(), m_ISPCMatMetallic.data(), - m_ISPCMatEmissionR.data(), m_ISPCMatEmissionG.data(), m_ISPCMatEmissionB.data(), - m_ISPCMatEmissionPower.data(), - m_ISPCOutputR.data(), m_ISPCOutputG.data(), m_ISPCOutputB.data(), m_ISPCOutputA.data(), - static_cast(pixelCount), static_cast(sphereCount), - // BVH data - m_ISPCBvhMinX.data(), m_ISPCBvhMinY.data(), m_ISPCBvhMinZ.data(), - m_ISPCBvhMaxX.data(), m_ISPCBvhMaxY.data(), m_ISPCBvhMaxZ.data(), - m_ISPCBvhLeftFirst.data(), m_ISPCBvhCount.data(), - m_ISPCBvhSphereIndices.data(), - static_cast(m_ISPCBvhLeftFirst.size()), - static_cast(frameIndex), maxBounces - ); - - // ── Unpack: accumulate + tone map + RGBA convert ── - for (size_t i = 0; i < pixelCount; i++) { - glm::vec4 color(m_ISPCOutputR[i], m_ISPCOutputG[i], m_ISPCOutputB[i], m_ISPCOutputA[i]); - m_AccumulationData[i] += color; - - glm::vec4 accumulated = m_AccumulationData[i]; - accumulated /= static_cast(frameIndex); - accumulated = glm::clamp(accumulated, glm::vec4(0.0f), glm::vec4(1.0f)); - outputBuffer[i] = PathTracerCore::ConvertToRGBA(accumulated); - } + // ── Output buffers ── + m_ISPCOutputR.resize(pixelCount); + m_ISPCOutputG.resize(pixelCount); + m_ISPCOutputB.resize(pixelCount); + m_ISPCOutputA.resize(pixelCount); + + // ── Call ISPC kernel ── + ispc::ISPCRenderPixels( + camPos.x, camPos.y, camPos.z, + m_ISPCRayDirX.data(), m_ISPCRayDirY.data(), m_ISPCRayDirZ.data(), + m_ISCPSphPosX.data(), m_ISCPSphPosY.data(), m_ISCPSphPosZ.data(), + m_ISCPSphRadius.data(), m_ISCPSphMatIdx.data(), + m_ISPCMatAlbedoR.data(), m_ISPCMatAlbedoG.data(), m_ISPCMatAlbedoB.data(), + m_ISPCMatRoughness.data(), m_ISPCMatMetallic.data(), + m_ISPCMatEmissionR.data(), m_ISPCMatEmissionG.data(), m_ISPCMatEmissionB.data(), + m_ISPCMatEmissionPower.data(), + m_ISPCOutputR.data(), m_ISPCOutputG.data(), m_ISPCOutputB.data(), m_ISPCOutputA.data(), + static_cast(pixelCount), static_cast(sphereCount), + // BVH data + m_ISPCBvhMinX.data(), m_ISPCBvhMinY.data(), m_ISPCBvhMinZ.data(), + m_ISPCBvhMaxX.data(), m_ISPCBvhMaxY.data(), m_ISPCBvhMaxZ.data(), + m_ISPCBvhLeftFirst.data(), m_ISPCBvhCount.data(), + m_ISPCBvhSphereIndices.data(), + static_cast(m_ISPCBvhLeftFirst.size()), + static_cast(frameIndex), m_MaxBounces + ); + + // ── Unpack: accumulate + tone map + RGBA convert ── + for (size_t i = 0; i < pixelCount; i++) { + glm::vec4 color(m_ISPCOutputR[i], m_ISPCOutputG[i], m_ISPCOutputB[i], m_ISPCOutputA[i]); + m_AccumulationData[i] += color; + + glm::vec4 accumulated = m_AccumulationData[i]; + accumulated /= static_cast(frameIndex); + accumulated = glm::clamp(accumulated, glm::vec4(0.0f), glm::vec4(1.0f)); + outputBuffer[i] = PathTracerCore::ConvertToRGBA(accumulated); } -#else - // ── C++ Scalar / std::execution::par Fallback ── +} +#endif + +void CPUBackend::RenderCPUFallback(uint32_t* outputBuffer) +{ static constexpr bool kMultithreaded = true; - m_AccumFrameIndex = frameIndex; if constexpr (kMultithreaded) { std::for_each( std::execution::par, m_ImageVerticalIterator.begin(), m_ImageVerticalIterator.end(), - [this](uint32_t y) + [this, outputBuffer](uint32_t y) { std::ranges::for_each( m_ImageHorizontalIterator.begin(), m_ImageHorizontalIterator.end(), - [this, y, width = m_Width](const uint32_t x) + [this, y, outputBuffer, width = m_Width](const uint32_t x) { const glm::vec4 color = PerPixel(x, y); const size_t idx = static_cast(x) + static_cast(y) * static_cast(width); @@ -217,12 +222,8 @@ void CPUBackend::Render( glm::vec4 accumulatedColor = m_AccumulationData[idx]; accumulatedColor /= static_cast(m_AccumFrameIndex); - accumulatedColor = glm::clamp( - accumulatedColor, - glm::vec4(0.0f), glm::vec4(1.0f) - ); - outputBuffer[idx] - = PathTracerCore::ConvertToRGBA(accumulatedColor); + accumulatedColor = glm::clamp(accumulatedColor, glm::vec4(0.0f), glm::vec4(1.0f)); + outputBuffer[idx] = PathTracerCore::ConvertToRGBA(accumulatedColor); }); }); } else { @@ -240,7 +241,6 @@ void CPUBackend::Render( } } } -#endif // PN_ISPC } // ────────────────────────────────────────────── diff --git a/RayTracing/src/CPUBackend.h b/RayTracing/src/CPUBackend.h index 081104b..a0732bf 100644 --- a/RayTracing/src/CPUBackend.h +++ b/RayTracing/src/CPUBackend.h @@ -39,6 +39,11 @@ class CPUBackend : public IRenderBackend ) const noexcept; static HitPayLoad Miss(const Ray& ray) noexcept; + void RenderCPUFallback(uint32_t* outputBuffer); +#ifdef PN_ISPC + void RenderISPC(const Camera& camera, const Scene& scene, uint32_t* outputBuffer, uint32_t frameIndex); +#endif + private: uint32_t m_Width = 0; uint32_t m_Height = 0; diff --git a/RayTracing/src/CUDABackend.cpp b/RayTracing/src/CUDABackend.cpp index cccd4b2..40f6201 100644 --- a/RayTracing/src/CUDABackend.cpp +++ b/RayTracing/src/CUDABackend.cpp @@ -154,41 +154,8 @@ void CUDABackend::Render( } } - // Upload scene data only when changed (tracked by scene version) if (scene.Version != m_LastSceneVersion) - { - UploadSceneToGPU(scene); - m_LastSceneVersion = scene.Version; - - // Rebuild BVH and upload to GPU - m_BVH.Build(scene); - const auto& bvhNodes = m_BVH.Nodes(); - - m_GPUBVHNodes.resize(bvhNodes.size()); - for (size_t i = 0; i < bvhNodes.size(); i++) - { - m_GPUBVHNodes[i].BoundsMin[0] = bvhNodes[i].Bounds.Min.x; - m_GPUBVHNodes[i].BoundsMin[1] = bvhNodes[i].Bounds.Min.y; - m_GPUBVHNodes[i].BoundsMin[2] = bvhNodes[i].Bounds.Min.z; - m_GPUBVHNodes[i].BoundsMax[0] = bvhNodes[i].Bounds.Max.x; - m_GPUBVHNodes[i].BoundsMax[1] = bvhNodes[i].Bounds.Max.y; - m_GPUBVHNodes[i].BoundsMax[2] = bvhNodes[i].Bounds.Max.z; - m_GPUBVHNodes[i].LeftFirst = bvhNodes[i].LeftFirst; - m_GPUBVHNodes[i].Count = bvhNodes[i].Count; - } - - // Copy sorted sphere indices for GPU leaf resolution - const auto& sphereIndices = m_BVH.SphereIndices(); - m_GPUSphereIndices = sphereIndices; - - CUDARenderer_UploadBVH( - m_CUDAState.get(), - m_GPUBVHNodes.data(), - static_cast(m_GPUBVHNodes.size()), - m_GPUSphereIndices.data(), - static_cast(m_GPUSphereIndices.size()) - ); - } + UploadSceneAndBVHIfChanged(scene); // Upload camera position const glm::vec3& camPos = camera.GetPosition(); @@ -245,58 +212,7 @@ void CUDABackend::Render( if (m_InteropEnabled && m_Interop) { m_Interop->SyncCUDAComplete(reinterpret_cast(CUDARenderer_GetComputeStream(m_CUDAState.get()))); - - // ── Interop path: CUDA wrote to Vulkan buffer, copy to Peanut's VkImage ── - VkCommandBuffer cmd = Peanut::Application::GetCommandBuffer(true); - const VkImage dstImage = m_DestinationImage; - - // Buffer barrier: external (CUDA) write → Vulkan transfer read - VkBufferMemoryBarrier bufBarrier = {}; - bufBarrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; - bufBarrier.srcAccessMask = 0; - bufBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - bufBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - bufBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - bufBarrier.buffer = m_Interop->GetVulkanBuffer(); - bufBarrier.size = VK_WHOLE_SIZE; - vkCmdPipelineBarrier(cmd, - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, - 0, 0, nullptr, 1, &bufBarrier, 0, nullptr); - - VkImageMemoryBarrier preBarrier = {}; - preBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - preBarrier.oldLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - preBarrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; - preBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - preBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - preBarrier.image = dstImage; - preBarrier.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }; - preBarrier.srcAccessMask = 0; - preBarrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &preBarrier); - - VkBufferImageCopy region = {}; - region.imageSubresource = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1 }; - region.imageExtent = { m_Interop->GetWidth(), m_Interop->GetHeight(), 1 }; - vkCmdCopyBufferToImage(cmd, m_Interop->GetVulkanBuffer(), dstImage, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion); - - VkImageMemoryBarrier postBarrier = {}; - postBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - postBarrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; - postBarrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - postBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - postBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - postBarrier.image = dstImage; - postBarrier.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }; - postBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - postBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &postBarrier); - - Peanut::Application::FlushCommandBuffer(cmd); + CopyInteropToVulkan(); } else { @@ -308,4 +224,89 @@ void CUDABackend::Render( } } +void CUDABackend::UploadSceneAndBVHIfChanged(const Scene& scene) +{ + UploadSceneToGPU(scene); + m_LastSceneVersion = scene.Version; + + // Rebuild BVH and upload to GPU + m_BVH.Build(scene); + const auto& bvhNodes = m_BVH.Nodes(); + + m_GPUBVHNodes.resize(bvhNodes.size()); + for (size_t i = 0; i < bvhNodes.size(); i++) + { + m_GPUBVHNodes[i].BoundsMin[0] = bvhNodes[i].Bounds.Min.x; + m_GPUBVHNodes[i].BoundsMin[1] = bvhNodes[i].Bounds.Min.y; + m_GPUBVHNodes[i].BoundsMin[2] = bvhNodes[i].Bounds.Min.z; + m_GPUBVHNodes[i].BoundsMax[0] = bvhNodes[i].Bounds.Max.x; + m_GPUBVHNodes[i].BoundsMax[1] = bvhNodes[i].Bounds.Max.y; + m_GPUBVHNodes[i].BoundsMax[2] = bvhNodes[i].Bounds.Max.z; + m_GPUBVHNodes[i].LeftFirst = bvhNodes[i].LeftFirst; + m_GPUBVHNodes[i].Count = bvhNodes[i].Count; + } + + const auto& sphereIndices = m_BVH.SphereIndices(); + m_GPUSphereIndices = sphereIndices; + + CUDARenderer_UploadBVH( + m_CUDAState.get(), + m_GPUBVHNodes.data(), + static_cast(m_GPUBVHNodes.size()), + m_GPUSphereIndices.data(), + static_cast(m_GPUSphereIndices.size()) + ); +} + +void CUDABackend::CopyInteropToVulkan() +{ + VkCommandBuffer cmd = Peanut::Application::GetCommandBuffer(true); + const VkImage dstImage = m_DestinationImage; + + VkBufferMemoryBarrier bufBarrier = {}; + bufBarrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + bufBarrier.srcAccessMask = 0; + bufBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + bufBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufBarrier.buffer = m_Interop->GetVulkanBuffer(); + bufBarrier.size = VK_WHOLE_SIZE; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 1, &bufBarrier, 0, nullptr); + + VkImageMemoryBarrier preBarrier = {}; + preBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + preBarrier.oldLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + preBarrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + preBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + preBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + preBarrier.image = dstImage; + preBarrier.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }; + preBarrier.srcAccessMask = 0; + preBarrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &preBarrier); + + VkBufferImageCopy region = {}; + region.imageSubresource = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1 }; + region.imageExtent = { m_Interop->GetWidth(), m_Interop->GetHeight(), 1 }; + vkCmdCopyBufferToImage(cmd, m_Interop->GetVulkanBuffer(), dstImage, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion); + + VkImageMemoryBarrier postBarrier = {}; + postBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + postBarrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + postBarrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + postBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + postBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + postBarrier.image = dstImage; + postBarrier.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }; + postBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + postBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &postBarrier); + + Peanut::Application::FlushCommandBuffer(cmd); +} + #endif // PN_CUDA diff --git a/RayTracing/src/CUDABackend.h b/RayTracing/src/CUDABackend.h index 26c9221..18a9a8c 100644 --- a/RayTracing/src/CUDABackend.h +++ b/RayTracing/src/CUDABackend.h @@ -64,6 +64,10 @@ class CUDABackend : public IRenderBackend private: // ── Scene upload (host → GPU packing + cudaMemcpy) ── void UploadSceneToGPU(const Scene& scene); + void UploadSceneAndBVHIfChanged(const Scene& scene); + + // ── Vulkan interop output ── + void CopyInteropToVulkan(); // ── CUDA state ── std::unique_ptr m_CUDAState;