From 96dfdcc9c6ffb0d4ba1894eb548fbf086ce7c243 Mon Sep 17 00:00:00 2001 From: hongyizhang <805701948@qq.com> Date: Thu, 11 Jun 2026 11:53:26 +0800 Subject: [PATCH] Reduce GOP decoder stream synchronizations Signed-off-by: hongyizhang <805701948@qq.com> --- .../src/PyNvOnDemandDecoder/src/PyNvGopDecoder_common.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/packages/on_demand_video_decoder/ext_impl/src/PyNvOnDemandDecoder/src/PyNvGopDecoder_common.cpp b/packages/on_demand_video_decoder/ext_impl/src/PyNvOnDemandDecoder/src/PyNvGopDecoder_common.cpp index a4e790c..8963358 100644 --- a/packages/on_demand_video_decoder/ext_impl/src/PyNvOnDemandDecoder/src/PyNvGopDecoder_common.cpp +++ b/packages/on_demand_video_decoder/ext_impl/src/PyNvOnDemandDecoder/src/PyNvGopDecoder_common.cpp @@ -109,8 +109,10 @@ int PyNvGopDecoder::GetYUVFromFrame(NvDecoder* decoder, const uint8_t* pFrame, u decoded_frame.timestamp = timestamp; decoded_frame.SetColorRange(color_range); - // Copy the decode frames from device - CUDA_DRVAPI_CALL(cuMemcpyDtoD((CUdeviceptr)pFrame_buffer, (CUdeviceptr)pFrame, decoder->GetFrameSize())); + // Queue the decode-buffer copy on the decoder stream. DecProc synchronizes + // this stream before returning the Python-visible frame. + CUDA_DRVAPI_CALL(cuMemcpyDtoDAsync((CUdeviceptr)pFrame_buffer, (CUdeviceptr)pFrame, + decoder->GetFrameSize(), decoder->GetStream())); switch (decoded_frame.format) { case Pixel_Format_NV12: { @@ -481,6 +483,8 @@ void PyNvGopDecoder::DecProc(AVColorRange color_range, NvDecoder* decoder, << " is different with number of frame id:" << std::to_string(sorted_frame_ids.size()); } + CUDA_DRVAPI_CALL(cuStreamSynchronize(decoder->GetStream())); + nvtxRangePop(); }