diff --git a/CMakeLists.txt b/CMakeLists.txt index 050da5434..3b867b88d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -91,7 +91,9 @@ if((NOT BUILD_FAT_BIN) AND (NOT BUILD_CROSS_PLANTFORM) AND USE_CUDA) endif() if(USE_X86_PLACE) - if(NOT DEFINED BUILD_X86_TARGET) + if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") + set(BUILD_X86_ARCH "clang_native") + elseif(NOT DEFINED BUILD_X86_TARGET) set(BUILD_X86_ARCH "native") anakin_get_cpu_arch(BUILD_X86_ARCH) else() diff --git a/cmake/compiler_options.cmake b/cmake/compiler_options.cmake index 1b41b047e..ee65bf6bb 100644 --- a/cmake/compiler_options.cmake +++ b/cmake/compiler_options.cmake @@ -96,8 +96,11 @@ if(X86_COMPILE_482) # anakin_add_compile_option(-static-libgcc) endif() +if(CMAKE_CXX_COMPILER_ID MATCHES "GNU") anakin_add_compile_option(-fabi-version=6) - anakin_add_compile_option(-march=${BUILD_X86_ARCH}) + anakin_add_compile_option(-march=${BUILD_X86_ARCH}) +endif() + anakin_add_compile_option(-Ofast) anakin_add_compile_option(-ffast-math) anakin_add_compile_option(-Wall) diff --git a/examples/anakin/build.sh b/examples/anakin/build.sh index 41763133c..ee7812ce1 100644 --- a/examples/anakin/build.sh +++ b/examples/anakin/build.sh @@ -2,6 +2,6 @@ DEBUG_FLAG="-std=c++11 -g -I../../framework/c_api/ -I./ -I../../build/ -ldl -Wno-narrowing " ORI_FAST_FLAG="-std=c++11 -Ofast -ffast-math -I../../framework/c_api/ -I./ -ldl -Wno-narrowing " STATIC_FAST_FLAG="-std=c++11 -Ofast -ffast-math -I../../output -I./ -ldl -Wno-narrowing -I../../output/framework/c_api/" -FAST_FLAG="-std=c++11 -g -static-libstdc++ --sysroot=/opt/compiler/gcc-4.8.2/ -Wl,-rpath,/opt/compiler/gcc-4.8.2/lib64/ -Wl,-dynamic-linker,/opt/compiler/gcc-4.8.2/lib64/ld-linux-x86-64.so.2 -Ofast -ffast-math -I../../output/framework/c_api/ -I./ -ldl -Wno-narrowing" +FAST_FLAG="-std=c++11 -g -static-libstdc++ --sysroot=/opt/compiler/gcc-4.8.2/ -Wl,-rpath,/opt/compiler/gcc-4.8.2/lib64/ -Wl,-dynamic-linker,/opt/compiler/gcc-4.8.2/lib64/ld-linux-x86-64.so.2 -Ofast -ffast-math -I../../output/framework/c_api/ -I./ -I../../framework/c_api/ -ldl -Wno-narrowing " g++ example.cpp -o example $FAST_FLAG -g++ map_rnn.cpp -o map_rnn $FAST_FLAG \ No newline at end of file +g++ map_rnn.cpp -o map_rnn ${FAST_FLAG} \ No newline at end of file diff --git a/examples/anakin/map_rnn.cpp b/examples/anakin/map_rnn.cpp index cd1e46c36..1a269170e 100644 --- a/examples/anakin/map_rnn.cpp +++ b/examples/anakin/map_rnn.cpp @@ -1,5 +1,6 @@ #include "anakin_helper.h" #include +bool g_print_data=false; class Data { public: Data(std::string file_name, int batch_size) : @@ -197,19 +198,21 @@ class AKRNNExampleX86 { input_fea->set_dev_lod_offset(lod); _anakin_obj->prediction(); -#ifdef PRINT_RESULT - AnakinRunerTensorInterface* output_0 = _anakin_obj->get_output_tensor(0); - for (int seq_id = 0; seq_id < seq_offset.size() - 1; seq_id++) { - int seq_len = seq_offset[seq_id + 1] - seq_offset[seq_id]; - int seq_start = seq_offset[seq_id]; - for (int i = 0; i < seq_len - 1; i++) { - printf("%f|", static_cast(output_0->get_host_data())[seq_start + i]); - } + if(g_print_data){ + AnakinRunerTensorInterface* output_0 = _anakin_obj->get_output_tensor(0); + for (int seq_id = 0; seq_id < seq_offset.size() - 1; seq_id++) { + int seq_len = seq_offset[seq_id + 1] - seq_offset[seq_id]; + int seq_start = seq_offset[seq_id]; + + for (int i = 0; i < seq_len - 1; i++) { + printf("%f|", static_cast(output_0->get_host_data())[seq_start + i]); + } - printf("%f\n", static_cast(output_0->get_host_data())[seq_start + seq_len - 1]); + printf("%f\n", static_cast(output_0->get_host_data())[seq_start + seq_len - 1]); + } } -#endif + // output_0->copy_data_dev_2_host(); // float* out_ptr = static_cast(output_0->get_host_data()); @@ -249,9 +252,23 @@ int main(int argc, const char** argv) { } if (argc > 5) { - so_path = argv[5]; + g_print_data=atoi(argv[5]); + } + + if (argc > 6) { + so_dir=argv[6]; + } + + if(argc > 7){ + so_path = argv[7]; + } + + if(argc<=7){ + AKRNNExampleX86 ak_run(so_dir, model_path, max_batch); + ak_run.run(data_path,batch_size); + }else { + AKRNNExampleX86 ak_run(so_dir, so_path, model_path, max_batch); + ak_run.run(data_path,batch_size); } - AKRNNExampleX86 ak_run(so_dir, so_path,model_path,max_batch); - ak_run.run(data_path,batch_size); } \ No newline at end of file diff --git a/saber/core/buffer.h b/saber/core/buffer.h index f76174329..ceb43dd32 100644 --- a/saber/core/buffer.h +++ b/saber/core/buffer.h @@ -202,14 +202,14 @@ class Buffer { /** * \brief return const data pointer */ - const TPtr get_data(){ + const TPtr get_data()const { return _data; } /** * \brief return mutable data pointer */ - TPtr get_data_mutable(){ + TPtr get_data_mutable()const{ return _data; } diff --git a/saber/funcs/impl/x86/kernel/jit_avx512_conv.cpp b/saber/funcs/impl/x86/kernel/jit_avx512_conv.cpp index ad2c16e4e..2fd17391b 100644 --- a/saber/funcs/impl/x86/kernel/jit_avx512_conv.cpp +++ b/saber/funcs/impl/x86/kernel/jit_avx512_conv.cpp @@ -227,16 +227,7 @@ SaberStatus JitAvx512Conv::dispatch( const float *ptr_weights = reinterpret_cast(weights_internal->data()); const float *ptr_bias = reinterpret_cast(bias->data()); - auto ptr_dst = NULL; - switch (type){ - case AK_UINT8: ptr_dst = reinterpret_cast(outputs[0]->mutable_data()); break; - case AK_INT8: ptr_dst = reinterpret_cast(outputs[0]->mutable_data()); break; - case AK_UINT32: ptr_dst = reinterpret_cast(outputs[0]->mutable_data()); break; - case AK_INT32: ptr_dst = reinterpret_cast(outputs[0]->mutable_data()); break; - case AK_FLOAT: ptr_dst = reinterpret_cast(outputs[0]->mutable_data()); break; - default: LOG(FATAL) << "data type: " << type << " is unsupported now"; - } - //ptr_dst = reinterpret_cast(outputs[0]->mutable_data()); + auto ptr_dst= static_cast(outputs[0]->mutable_data()); const auto &jcp = kernel->jcp; diff --git a/saber/funcs/impl/x86/saber_attension_lstm.cpp b/saber/funcs/impl/x86/saber_attension_lstm.cpp index 43bfe71c9..57564d18b 100644 --- a/saber/funcs/impl/x86/saber_attension_lstm.cpp +++ b/saber/funcs/impl/x86/saber_attension_lstm.cpp @@ -97,7 +97,7 @@ void sequence_pool(const Dtype* data, const Dtype* weight, std::vector& seq for (int j = seq_offset[i]; j < seq_offset[i + 1]; j++) { Dtype scale = weight[j]; - Dtype* tmp_data = data + j * dim; + const Dtype* tmp_data = data + j * dim; for (int k = 0; k < dim; k++) { tmp_out[k] += scale * tmp_data[k]; diff --git a/saber/funcs/impl/x86/saber_col2im_deconv.cpp b/saber/funcs/impl/x86/saber_col2im_deconv.cpp index 00ccc17bd..ccb45b8ef 100644 --- a/saber/funcs/impl/x86/saber_col2im_deconv.cpp +++ b/saber/funcs/impl/x86/saber_col2im_deconv.cpp @@ -171,6 +171,7 @@ SaberStatus SaberCol2ImDeconv::dispatch(const std::vector with_relu); } } + return SaberSuccess; } } } diff --git a/saber/funcs/impl/x86/saber_detection_output.h b/saber/funcs/impl/x86/saber_detection_output.h index 01eebc68b..1546604d5 100644 --- a/saber/funcs/impl/x86/saber_detection_output.h +++ b/saber/funcs/impl/x86/saber_detection_output.h @@ -112,7 +112,7 @@ class SaberDetectionOutput : \ dtype* _bbox_cpu_data{nullptr}; dtype* _conf_cpu_data{nullptr}; }; -template class SaberDetectionOutput; + } //namespace saber } //namespace anakin diff --git a/saber/funcs/impl/x86/saber_embedding.cpp b/saber/funcs/impl/x86/saber_embedding.cpp index a0bf18b00..e3ca79779 100644 --- a/saber/funcs/impl/x86/saber_embedding.cpp +++ b/saber/funcs/impl/x86/saber_embedding.cpp @@ -84,6 +84,7 @@ SaberStatus SaberEmbedding::dispatch( } } } + return SaberSuccess; } diff --git a/saber/funcs/impl/x86/saber_match_matrix.cpp b/saber/funcs/impl/x86/saber_match_matrix.cpp index 7bb27a965..5cc15a587 100644 --- a/saber/funcs/impl/x86/saber_match_matrix.cpp +++ b/saber/funcs/impl/x86/saber_match_matrix.cpp @@ -60,7 +60,7 @@ void padding_out(const dtype* src, std::vector& offset_r, int dim_t, int le int tl = dim_t * len_l; for (int i = 0; i < seq_num; i++) { dtype* dst_tmp = dst + i * tl * max_len_r; - dtype* src_tmp = src + offset_r[i] * tl; + const dtype* src_tmp = src + offset_r[i] * tl; int cur_len = offset_r[i+1] - offset_r[i]; for (int j = 0; j < cur_len; j++) { for (int k = 0; k < tl; k++) { diff --git a/saber/funcs/impl/x86/saber_normal_activation.h b/saber/funcs/impl/x86/saber_normal_activation.h index 9bcaf2b1f..3b44cbe7c 100644 --- a/saber/funcs/impl/x86/saber_normal_activation.h +++ b/saber/funcs/impl/x86/saber_normal_activation.h @@ -17,7 +17,7 @@ namespace saber { template inline Dtype InValidAct(Dtype a) { - CHECK_EQ(0, 1) << "InValidAct"; + return 0; } template @@ -44,6 +44,11 @@ inline Dtype Identity(const Dtype a) { #if defined(__SSE4_2__) and defined(__FMA__) +template<> +inline __m128 InValidAct<__m128>(const __m128 a) { + return _mm_set1_ps(0.0f); +} + template<> inline __m128 Relu<__m128>(const __m128 a) { @@ -80,6 +85,10 @@ inline __m128 Tanh<__m128>(const __m128 a) { #if defined(__AVX2__) and defined(__FMA__) +template<> +inline __m256 InValidAct<__m256>(const __m256 a) { + return _mm256_set1_ps(0.0f); +} template<> inline __m256 Relu<__m256>(const __m256 a) { @@ -112,6 +121,10 @@ inline __m256 Tanh<__m256>(const __m256 a) { #if defined(__AVX512F__) +template<> +inline __m512 InValidAct<__m512>(const __m512 a) { + return _mm512_set1_ps(0.0f); +} template<> inline __m512 Relu<__m512>(const __m512 a) { diff --git a/saber/funcs/impl/x86/saber_sequence_conv.cpp b/saber/funcs/impl/x86/saber_sequence_conv.cpp index 723a83633..58774ebe5 100644 --- a/saber/funcs/impl/x86/saber_sequence_conv.cpp +++ b/saber/funcs/impl/x86/saber_sequence_conv.cpp @@ -57,8 +57,8 @@ SaberStatus SaberSequenceConv::dispatch( _hidden_size); } - gemm(false, false, word_num, _feature_size, _hidden_kernel_size, 1.f, _temp_im2col_tensor.data(), - param.filter_tensor->data(), 0.f, out_data->mutable_data()); + gemm(false, false, word_num, _feature_size, _hidden_kernel_size, 1.f, static_cast(_temp_im2col_tensor.data()), + static_cast(param.filter_tensor->data()), 0.f, static_cast(out_data->mutable_data())); std::vector> voffset; voffset.push_back(offset); out_data->set_seq_offset(voffset); diff --git a/saber/funcs/impl/x86/saber_sequence_expand.cpp b/saber/funcs/impl/x86/saber_sequence_expand.cpp index c554d1143..4b3afcd0d 100644 --- a/saber/funcs/impl/x86/saber_sequence_expand.cpp +++ b/saber/funcs/impl/x86/saber_sequence_expand.cpp @@ -37,7 +37,7 @@ SequenceExpandParam& param) { auto ref_offset = inputs[1]->get_seq_offset()[0]; size_t len = inputs[0]->valid_size(); - OpDataType* input_data = static_cast(inputs[0]->data()); + const OpDataType* input_data = static_cast(inputs[0]->data()); OpDataType* output_data = static_cast(outputs[0]->mutable_data()); int dim = inputs[0]->valid_size() / inputs[0]->num(); diff --git a/saber/funcs/impl/x86/saber_topk_avg_pooling.cpp b/saber/funcs/impl/x86/saber_topk_avg_pooling.cpp index 7653500aa..ca40b1bee 100644 --- a/saber/funcs/impl/x86/saber_topk_avg_pooling.cpp +++ b/saber/funcs/impl/x86/saber_topk_avg_pooling.cpp @@ -43,6 +43,7 @@ SaberStatus SaberTopKAvgPooling::get_topk(std::vector& for (int k = real_k; k < top_k; k++) { dst[k] = (OpDataType) 0.f; } + return SaberSuccess; } diff --git a/saber/funcs/impl/x86/saber_topk_pooling.cpp b/saber/funcs/impl/x86/saber_topk_pooling.cpp index a52ff68c8..4eee9dff6 100644 --- a/saber/funcs/impl/x86/saber_topk_pooling.cpp +++ b/saber/funcs/impl/x86/saber_topk_pooling.cpp @@ -43,6 +43,7 @@ SaberStatus SaberTopKPooling::get_topk(std::vector& sr for (int k = real_k; k < top_k; k++) { dst[k] = (OpDataType) 0.f; } + return SaberSuccess; } template @@ -76,7 +77,7 @@ SaberStatus SaberTopKPooling::dispatch( int feat_map_size = height_stride * width_stride; for (int c = 0; c < channel; c++) { OpDataType* tmp_out_data = output_data + (i * channel + c) * top_k; - OpDataType* tmp_in_data = input_data + (i * channel + c) * feat_map_size; + const OpDataType* tmp_in_data = input_data + (i * channel + c) * feat_map_size; std::vector vec; for (int h = 0; h < height; h++) { diff --git a/saber/funcs/impl/x86/sequence2batch.cpp b/saber/funcs/impl/x86/sequence2batch.cpp index 8210ca964..fe21db525 100644 --- a/saber/funcs/impl/x86/sequence2batch.cpp +++ b/saber/funcs/impl/x86/sequence2batch.cpp @@ -29,13 +29,14 @@ void CopyMatrixRowsFunctor::operator()( LOG(ERROR) << "hidden size should be divided with no remainder by fragment_num."; exit(-1); } + typedef typename DataTrait::PtrDtype Data_ptr; auto height = dst_shape[0]; auto dst_width = dst_shape[1] / fragment_num; auto src_width = src_shape[1] / fragment_num; auto real_width = (width != 0) ? width : (dst_width > src_width ? src_width : dst_width); - auto* src_data = src->data(); - auto* dst_data = dst->mutable_data(); + Data_ptr src_data = static_cast(src->data()); + Data_ptr dst_data = static_cast(dst->mutable_data()); if (is_src_index) { #pragma omp parallel for collapse(2) diff --git a/saber/saber_funcs_param.h b/saber/saber_funcs_param.h index 0c3e2a5b5..d064f5b8b 100644 --- a/saber/saber_funcs_param.h +++ b/saber/saber_funcs_param.h @@ -104,7 +104,9 @@ struct AffineChannelParam { AffineChannelParam(const AffineChannelParam& right) {} - AffineChannelParam& operator=(const AffineChannelParam& right) {} + AffineChannelParam& operator=(const AffineChannelParam& right) { + return *this; + } bool operator==(const AffineChannelParam& right) {return true;} }; @@ -1053,6 +1055,7 @@ struct ExpandParam{ } ExpandParam& operator=(const ExpandParam& right) { expand_times = right.expand_times; + return *this; } bool operator==(const ExpandParam& right) { bool flag = true; @@ -1534,6 +1537,7 @@ struct MatMulParam { MatMulParam& operator=(const MatMulParam& right) { _is_transpose_X = right._is_transpose_X; _is_transpose_Y = right._is_transpose_Y; + return *this; } bool operator==(const MatMulParam& right) { bool comp_eq = true; @@ -2396,7 +2400,9 @@ template struct TransposeParam { TransposeParam() = default; TransposeParam(const TransposeParam& right) {} - TransposeParam& operator=(const TransposeParam& right) {} + TransposeParam& operator=(const TransposeParam& right) { + return *this; + } bool operator==(const TransposeParam& right) { return true; } @@ -2534,7 +2540,9 @@ template struct MeanParam { MeanParam() = default; MeanParam(const MeanParam& right) {} - MeanParam& operator=(const MeanParam& right) {} + MeanParam& operator=(const MeanParam& right) { + return *this; + } bool operator==(const MeanParam& right) { return true; } diff --git a/test/framework/net/net_exec_test.cpp b/test/framework/net/net_exec_test.cpp index 0d3d624cf..e4346d134 100644 --- a/test/framework/net/net_exec_test.cpp +++ b/test/framework/net/net_exec_test.cpp @@ -348,7 +348,7 @@ TEST(NetTest, net_execute_reconstruction_test) { int main(int argc, const char** argv){ if (argc < 2){ LOG(ERROR)<<"no input!!!"; - return; + return -1; } if (argc > 1) { g_model_path = std::string(argv[1]); diff --git a/test/framework/net/net_exec_test_x86.cpp b/test/framework/net/net_exec_test_x86.cpp index 4ec56d59c..5adb7fda5 100644 --- a/test/framework/net/net_exec_test_x86.cpp +++ b/test/framework/net/net_exec_test_x86.cpp @@ -75,7 +75,7 @@ TEST(NetTest, net_execute_base_test) { int main(int argc, const char** argv){ if (argc < 2){ LOG(ERROR)<<"no input!!!"; - return; + return -1; } if (argc > 1) { g_model_path = std::string(argv[1]); diff --git a/test/saber/test_saber_activation.cpp b/test/saber/test_saber_activation.cpp index 83f06a5cb..d4722bcb0 100644 --- a/test/saber/test_saber_activation.cpp +++ b/test/saber/test_saber_activation.cpp @@ -134,7 +134,7 @@ void test_model() { //test example for (auto shape : {input_shape, input_shape2}) { - for (auto act : {1, 2, 3, 4, 5, 9, 10, active}) { + for (auto act : {Active_sigmoid,Active_relu, Active_tanh, Active_clipped_relu, Active_elu, Active_stanh, Active_prelu}) { LOG(INFO) << "================ active: " << act; for (auto neg_slope : {-1.0, 0.5}) { diff --git a/test/saber/test_saber_base.h b/test/saber/test_saber_base.h index 3cd55f3f1..16db295fa 100644 --- a/test/saber/test_saber_base.h +++ b/test/saber/test_saber_base.h @@ -181,10 +181,11 @@ class TestSaberBase{ add_inputs_shape(shape_v); for(int i = 0; i < _op_input_num; ++i) { - SaberStatus status = _inputs_dev[0][i]->set_dtype(input[i]->get_dtype()); - status &= _inputs_host[0][i]->set_dtype(input[i]->get_dtype()); - if(!status) + SaberStatus status_dev = _inputs_dev[0][i]->set_dtype(input[i]->get_dtype()); + SaberStatus status_host= _inputs_host[0][i]->set_dtype(input[i]->get_dtype()); + if (status_dev != SaberSuccess || status_host != SaberSuccess){ LOG(INFO) << "ERROR"; + } _inputs_dev[0][i] -> copy_from(*input[i]); _inputs_host[0][i] -> copy_from(*input[i]); if(input[i]->get_seq_offset().size() > 0){ diff --git a/test/saber/test_saber_conv_eltwise.cpp b/test/saber/test_saber_conv_eltwise.cpp index df328a97e..a37cd3bfb 100644 --- a/test/saber/test_saber_conv_eltwise.cpp +++ b/test/saber/test_saber_conv_eltwise.cpp @@ -221,8 +221,9 @@ int test_conv_results(int group, group, kernel_w, kernel_h, stride_w, stride_h, dilation_w, dilation_h, pad_w, pad_h, bias_term, param.activation_param.has_active, 1.f); - +#ifdef USE_CUDA cudaDeviceSynchronize(); +#endif conv.init(input_v, output_v, conv_eltwise_param, strategy, imp, ctx1); conv.trans_weights(*param.mutable_weight(), *param.mutable_bias(), param.pad_h, param.pad_w, param.dilation_h, param.dilation_w, diff --git a/test/saber/test_saber_gru.cpp b/test/saber/test_saber_gru.cpp index 73ba2bc57..1393843e0 100644 --- a/test/saber/test_saber_gru.cpp +++ b/test/saber/test_saber_gru.cpp @@ -16,7 +16,7 @@ using namespace std; template static Dtype InValidAct(Dtype a) { - CHECK(false)<<"InValidAct"; + return 0; } template diff --git a/test/saber/test_saber_lstm.cpp b/test/saber/test_saber_lstm.cpp index bbfa8c05d..62bb74701 100644 --- a/test/saber/test_saber_lstm.cpp +++ b/test/saber/test_saber_lstm.cpp @@ -17,7 +17,7 @@ using namespace std; template static Dtype InValidAct(Dtype a) { - CHECK(false)<<"InValidAct"; + return 0; } template diff --git a/test/saber/test_saber_pooling.cpp b/test/saber/test_saber_pooling.cpp index 4e0c4dc7e..b73a31915 100644 --- a/test/saber/test_saber_pooling.cpp +++ b/test/saber/test_saber_pooling.cpp @@ -118,7 +118,7 @@ void test_pooling() { if (pad_h >= window_h || pad_w >= window_w){ continue; } - for (int pooling_type : {Pooling_max, Pooling_average_include_padding, Pooling_average_exclude_padding}) { + for (PoolingType pooling_type : {Pooling_max, Pooling_average_include_padding, Pooling_average_exclude_padding}) { for (int stride_h : {1, 2 }) { for (int stride_w : {1, 2}) { PoolingParam param(window_h, window_w, pad_h, pad_w, stride_h, stride_w, diff --git a/test/saber/test_saber_priorbox.cpp b/test/saber/test_saber_priorbox.cpp index 64a54f70d..230d3e348 100644 --- a/test/saber/test_saber_priorbox.cpp +++ b/test/saber/test_saber_priorbox.cpp @@ -25,7 +25,7 @@ void priorbox_cpu_base(const std::vector* > &input, \ unsigned long long out_size = output[0]->valid_size(); - float* _cpu_data = output[0]->mutable_data(); + float* _cpu_data = static_cast(output[0]->mutable_data()); float* min_buf = (float*)fast_malloc(sizeof(float) * 4); float* max_buf = (float*)fast_malloc(sizeof(float) * 4); diff --git a/test/saber/test_saber_topk_avg_pooling.cpp b/test/saber/test_saber_topk_avg_pooling.cpp index 3a230b830..37a534edb 100644 --- a/test/saber/test_saber_topk_avg_pooling.cpp +++ b/test/saber/test_saber_topk_avg_pooling.cpp @@ -120,7 +120,6 @@ void topk_avg_pooling_basic(const std::vector*>& inputs, st } } - return SaberSuccess; }