From e99e4eb7729f02928b32de951d81887958a2bc7c Mon Sep 17 00:00:00 2001 From: luoxiaocheng Date: Wed, 19 Sep 2018 22:09:57 +0800 Subject: [PATCH 1/2] fix bugs in benchmark --- test/framework/net/benchmark.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/framework/net/benchmark.cpp b/test/framework/net/benchmark.cpp index 9ba978ca3..764af975b 100644 --- a/test/framework/net/benchmark.cpp +++ b/test/framework/net/benchmark.cpp @@ -104,6 +104,7 @@ TEST(NetTest, net_execute_base_test) { fill_tensor_host_const(th, 1.f); d_tensor_in_p->copy_from(th); } + cudaDeviceSynchronize(); // do inference Context ctx(FLAGS_device_id, 0, 0); saber::SaberTimer my_time; @@ -111,6 +112,7 @@ TEST(NetTest, net_execute_base_test) { for (int i = 0; i < FLAGS_warmup_iter; i++) { net_executer.prediction(); } + cudaDeviceSynchronize(); #ifdef ENABLE_OP_TIMER net_executer.reset_op_time(); #endif @@ -126,7 +128,7 @@ TEST(NetTest, net_execute_base_test) { } my_time.end(ctx); #ifdef ENABLE_OP_TIMER - std::vector op_time = net_executer.geifrot_op_time(); + std::vector op_time = net_executer.get_op_time(); auto exec_funcs = net_executer.get_exec_funcs(); auto op_param = net_executer.get_op_param(); for (int i = 0; i < op_time.size(); i++) { From bacc057db2a35ce844af346f896f99724557634c Mon Sep 17 00:00:00 2001 From: luoxiaocheng Date: Wed, 19 Sep 2018 22:12:27 +0800 Subject: [PATCH 2/2] fix bug in benchmark --- test/framework/net/benchmark.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/framework/net/benchmark.cpp b/test/framework/net/benchmark.cpp index 764af975b..9858fb648 100644 --- a/test/framework/net/benchmark.cpp +++ b/test/framework/net/benchmark.cpp @@ -104,7 +104,7 @@ TEST(NetTest, net_execute_base_test) { fill_tensor_host_const(th, 1.f); d_tensor_in_p->copy_from(th); } - cudaDeviceSynchronize(); + cudaDeviceSynchronize(); // do inference Context ctx(FLAGS_device_id, 0, 0); saber::SaberTimer my_time; @@ -112,7 +112,7 @@ TEST(NetTest, net_execute_base_test) { for (int i = 0; i < FLAGS_warmup_iter; i++) { net_executer.prediction(); } - cudaDeviceSynchronize(); + cudaDeviceSynchronize(); #ifdef ENABLE_OP_TIMER net_executer.reset_op_time(); #endif