PaddlePaddle · qq332982511 · Nov 27, 2018
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -91,7 +91,9 @@ if((NOT BUILD_FAT_BIN) AND (NOT BUILD_CROSS_PLANTFORM) AND USE_CUDA)
 endif()
 
 if(USE_X86_PLACE)
-    if(NOT DEFINED BUILD_X86_TARGET)
+    if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+        set(BUILD_X86_ARCH "clang_native")
+    elseif(NOT DEFINED BUILD_X86_TARGET)
         set(BUILD_X86_ARCH "native")
         anakin_get_cpu_arch(BUILD_X86_ARCH)
     else()

diff --git a/cmake/compiler_options.cmake b/cmake/compiler_options.cmake
@@ -96,8 +96,11 @@ if(X86_COMPILE_482)
 #    anakin_add_compile_option(-static-libgcc)
 endif()
 
+if(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
 	anakin_add_compile_option(-fabi-version=6)
-	anakin_add_compile_option(-march=${BUILD_X86_ARCH})
+    anakin_add_compile_option(-march=${BUILD_X86_ARCH})
+endif()
+
     anakin_add_compile_option(-Ofast)
     anakin_add_compile_option(-ffast-math)
     anakin_add_compile_option(-Wall)

diff --git a/examples/anakin/build.sh b/examples/anakin/build.sh
@@ -2,6 +2,6 @@
 DEBUG_FLAG="-std=c++11 -g -I../../framework/c_api/ -I./ -I../../build/  -ldl -Wno-narrowing "
 ORI_FAST_FLAG="-std=c++11 -Ofast -ffast-math -I../../framework/c_api/ -I./ -ldl -Wno-narrowing "
 STATIC_FAST_FLAG="-std=c++11 -Ofast -ffast-math -I../../output -I./ -ldl -Wno-narrowing -I../../output/framework/c_api/"
-FAST_FLAG="-std=c++11 -g -static-libstdc++ --sysroot=/opt/compiler/gcc-4.8.2/ -Wl,-rpath,/opt/compiler/gcc-4.8.2/lib64/ -Wl,-dynamic-linker,/opt/compiler/gcc-4.8.2/lib64/ld-linux-x86-64.so.2  -Ofast -ffast-math -I../../output/framework/c_api/ -I./ -ldl -Wno-narrowing"
+FAST_FLAG="-std=c++11 -g -static-libstdc++ --sysroot=/opt/compiler/gcc-4.8.2/ -Wl,-rpath,/opt/compiler/gcc-4.8.2/lib64/ -Wl,-dynamic-linker,/opt/compiler/gcc-4.8.2/lib64/ld-linux-x86-64.so.2  -Ofast -ffast-math -I../../output/framework/c_api/ -I./ -I../../framework/c_api/  -ldl -Wno-narrowing "
 g++ example.cpp -o example $FAST_FLAG
-g++ map_rnn.cpp -o map_rnn $FAST_FLAG
+g++ map_rnn.cpp -o map_rnn ${FAST_FLAG}
diff --git a/examples/anakin/map_rnn.cpp b/examples/anakin/map_rnn.cpp
@@ -1,5 +1,6 @@
 #include "anakin_helper.h"
 #include <string.h>
+bool g_print_data=false;
 class Data {
 public:
     Data(std::string file_name, int batch_size) :
@@ -197,19 +198,21 @@ class AKRNNExampleX86 {
             input_fea->set_dev_lod_offset(lod);
             _anakin_obj->prediction();
 
-#ifdef  PRINT_RESULT
-            AnakinRunerTensorInterface* output_0 = _anakin_obj->get_output_tensor(0);
-            for (int seq_id = 0; seq_id < seq_offset.size() - 1; seq_id++) {
-                int seq_len = seq_offset[seq_id + 1] - seq_offset[seq_id];
-                int seq_start = seq_offset[seq_id];
 
-                for (int i = 0; i < seq_len - 1; i++) {
-                    printf("%f|", static_cast<float*>(output_0->get_host_data())[seq_start + i]);
-                }
+            if(g_print_data){
+                AnakinRunerTensorInterface* output_0 = _anakin_obj->get_output_tensor(0);
+                for (int seq_id = 0; seq_id < seq_offset.size() - 1; seq_id++) {
+                    int seq_len = seq_offset[seq_id + 1] - seq_offset[seq_id];
+                    int seq_start = seq_offset[seq_id];
+
+                    for (int i = 0; i < seq_len - 1; i++) {
+                        printf("%f|", static_cast<float*>(output_0->get_host_data())[seq_start + i]);
+                    }
 
-                printf("%f\n", static_cast<float*>(output_0->get_host_data())[seq_start + seq_len - 1]);
+                    printf("%f\n", static_cast<float*>(output_0->get_host_data())[seq_start + seq_len - 1]);
+                }
             }
-#endif
+
 
 //            output_0->copy_data_dev_2_host();
 //            float* out_ptr = static_cast<float*>(output_0->get_host_data());
@@ -249,9 +252,23 @@ int main(int argc, const char** argv) {
     }
 
     if (argc > 5) {
-        so_path = argv[5];
+        g_print_data=atoi(argv[5]);
+    }
+
+    if (argc > 6) {
+        so_dir=argv[6];
+    }
+
+    if(argc > 7){
+        so_path = argv[7];
+    }
+
+    if(argc<=7){
+        AKRNNExampleX86 ak_run(so_dir, model_path, max_batch);
+        ak_run.run(data_path,batch_size);
+    }else {
+        AKRNNExampleX86 ak_run(so_dir, so_path, model_path, max_batch);
+        ak_run.run(data_path,batch_size);
     }
 
-    AKRNNExampleX86 ak_run(so_dir, so_path,model_path,max_batch);
-    ak_run.run(data_path,batch_size);
 }
diff --git a/saber/core/buffer.h b/saber/core/buffer.h
@@ -202,14 +202,14 @@ class Buffer {
     /**
      * \brief return const data pointer
      */
-    const TPtr get_data(){
+    const TPtr get_data()const {
         return _data;
     }
 
     /**
      * \brief return mutable data pointer
      */
-    TPtr get_data_mutable(){
+    TPtr get_data_mutable()const{
         return _data;
     }
 

diff --git a/saber/funcs/impl/x86/kernel/jit_avx512_conv.cpp b/saber/funcs/impl/x86/kernel/jit_avx512_conv.cpp
@@ -227,16 +227,7 @@ SaberStatus JitAvx512Conv<AK_FLOAT>::dispatch(
     const float *ptr_weights = reinterpret_cast<const float*>(weights_internal->data());
     const float *ptr_bias = reinterpret_cast<const float*>(bias->data());
 
-    auto ptr_dst = NULL;
-    switch (type){
-        case AK_UINT8: ptr_dst = reinterpret_cast<unsigned char*>(outputs[0]->mutable_data()); break;
-        case AK_INT8: ptr_dst = reinterpret_cast<char*>(outputs[0]->mutable_data()); break;
-        case AK_UINT32: ptr_dst = reinterpret_cast<unsigned int*>(outputs[0]->mutable_data()); break;
-        case AK_INT32: ptr_dst = reinterpret_cast<int*>(outputs[0]->mutable_data()); break;
-        case AK_FLOAT: ptr_dst = reinterpret_cast<float*>(outputs[0]->mutable_data()); break;
-        default: LOG(FATAL) << "data type: " << type << " is unsupported now";
-    }
-    //ptr_dst = reinterpret_cast<float*>(outputs[0]->mutable_data());
+    auto ptr_dst= static_cast<float*>(outputs[0]->mutable_data());
 
     const auto &jcp = kernel->jcp;
 

diff --git a/saber/funcs/impl/x86/saber_attension_lstm.cpp b/saber/funcs/impl/x86/saber_attension_lstm.cpp
@@ -97,7 +97,7 @@ void sequence_pool(const Dtype* data, const Dtype* weight, std::vector<int>& seq
 
         for (int j = seq_offset[i]; j < seq_offset[i + 1]; j++) {
             Dtype scale = weight[j];
-            Dtype* tmp_data = data + j * dim;
+            const Dtype* tmp_data = data + j * dim;
 
             for (int k = 0; k < dim; k++) {
                 tmp_out[k] += scale * tmp_data[k];

diff --git a/saber/funcs/impl/x86/saber_col2im_deconv.cpp b/saber/funcs/impl/x86/saber_col2im_deconv.cpp
@@ -171,6 +171,7 @@ SaberStatus SaberCol2ImDeconv<AK_FLOAT>::dispatch(const std::vector<Tensor<X86>
                       with_relu);
         }
     }
+    return SaberSuccess;
 }
 }
 }
diff --git a/saber/funcs/impl/x86/saber_detection_output.h b/saber/funcs/impl/x86/saber_detection_output.h
@@ -112,7 +112,7 @@ class SaberDetectionOutput<X86, OpDtype> : \
     dtype* _bbox_cpu_data{nullptr};
     dtype* _conf_cpu_data{nullptr};
 };
-template class SaberDetectionOutput<X86>;
+
 } //namespace saber
 
 } //namespace anakin

diff --git a/saber/funcs/impl/x86/saber_embedding.cpp b/saber/funcs/impl/x86/saber_embedding.cpp
@@ -84,6 +84,7 @@ SaberStatus SaberEmbedding<X86, OpDtype>::dispatch(
             }
         }
     }
+    return SaberSuccess;
 
 }
 

diff --git a/saber/funcs/impl/x86/saber_match_matrix.cpp b/saber/funcs/impl/x86/saber_match_matrix.cpp
@@ -60,7 +60,7 @@ void padding_out(const dtype* src, std::vector<int>& offset_r, int dim_t, int le
     int tl = dim_t * len_l;
     for (int i = 0; i < seq_num; i++) {
         dtype* dst_tmp = dst + i * tl * max_len_r;
-        dtype* src_tmp = src + offset_r[i] *  tl;
+        const dtype* src_tmp = src + offset_r[i] *  tl;
         int cur_len = offset_r[i+1] - offset_r[i];
         for (int j = 0; j < cur_len; j++) {
             for (int k = 0; k < tl; k++) {

diff --git a/saber/funcs/impl/x86/saber_normal_activation.h b/saber/funcs/impl/x86/saber_normal_activation.h
@@ -17,7 +17,7 @@ namespace saber {
 
 template<typename Dtype>
 inline Dtype InValidAct(Dtype a) {
-    CHECK_EQ(0, 1) << "InValidAct";
+    return 0;
 }
 
 template<typename Dtype>
@@ -44,6 +44,11 @@ inline Dtype Identity(const Dtype a) {
 
 #if defined(__SSE4_2__) and defined(__FMA__)
 
+template<>
+inline __m128 InValidAct<__m128>(const __m128 a) {
+    return _mm_set1_ps(0.0f);
+}
+
 
 template<>
 inline __m128 Relu<__m128>(const __m128 a) {
@@ -80,6 +85,10 @@ inline __m128 Tanh<__m128>(const __m128 a) {
 
 #if defined(__AVX2__) and defined(__FMA__)
 
+template<>
+inline __m256 InValidAct<__m256>(const __m256 a) {
+    return _mm256_set1_ps(0.0f);
+}
 
 template<>
 inline __m256 Relu<__m256>(const __m256 a) {
@@ -112,6 +121,10 @@ inline __m256 Tanh<__m256>(const __m256 a) {
 
 #if defined(__AVX512F__)
 
+template<>
+inline __m512 InValidAct<__m512>(const __m512 a) {
+    return _mm512_set1_ps(0.0f);
+}
 
 template<>
 inline __m512 Relu<__m512>(const __m512 a) {

diff --git a/saber/funcs/impl/x86/saber_sequence_conv.cpp b/saber/funcs/impl/x86/saber_sequence_conv.cpp
@@ -57,8 +57,8 @@ SaberStatus SaberSequenceConv<X86, OpDtype>::dispatch(
                       _hidden_size);
     }
 
-    gemm(false, false, word_num, _feature_size, _hidden_kernel_size, 1.f, _temp_im2col_tensor.data(),
-         param.filter_tensor->data(), 0.f, out_data->mutable_data());
+    gemm(false, false, word_num, _feature_size, _hidden_kernel_size, 1.f, static_cast<const float*>(_temp_im2col_tensor.data()),
+         static_cast<const float*>(param.filter_tensor->data()), 0.f, static_cast<float*>(out_data->mutable_data()));
     std::vector<std::vector<int>> voffset;
     voffset.push_back(offset);
     out_data->set_seq_offset(voffset);

diff --git a/saber/funcs/impl/x86/saber_sequence_expand.cpp b/saber/funcs/impl/x86/saber_sequence_expand.cpp
@@ -37,7 +37,7 @@ SequenceExpandParam<X86>& param) {
 
     auto ref_offset = inputs[1]->get_seq_offset()[0];
     size_t len = inputs[0]->valid_size();
-    OpDataType* input_data = static_cast<const OpDataType* >(inputs[0]->data());
+    const OpDataType* input_data = static_cast<const OpDataType* >(inputs[0]->data());
     OpDataType* output_data =  static_cast<OpDataType* >(outputs[0]->mutable_data());
     int dim = inputs[0]->valid_size() / inputs[0]->num();
 

diff --git a/saber/funcs/impl/x86/saber_topk_avg_pooling.cpp b/saber/funcs/impl/x86/saber_topk_avg_pooling.cpp
@@ -43,6 +43,7 @@ SaberStatus SaberTopKAvgPooling<X86, OpDtype>::get_topk(std::vector<OpDataType>&
     for (int k = real_k; k < top_k; k++) {
        dst[k] = (OpDataType) 0.f;
     }
+    return SaberSuccess;
 }
 
 

diff --git a/saber/funcs/impl/x86/saber_topk_pooling.cpp b/saber/funcs/impl/x86/saber_topk_pooling.cpp
@@ -43,6 +43,7 @@ SaberStatus SaberTopKPooling<X86, OpDtype>::get_topk(std::vector<OpDataType>& sr
     for (int k = real_k; k < top_k; k++) {
         dst[k] = (OpDataType) 0.f;
     }
+    return SaberSuccess;
 }
 
 template <DataType OpDtype>
@@ -76,7 +77,7 @@ SaberStatus SaberTopKPooling<X86, OpDtype>::dispatch(
         int feat_map_size = height_stride * width_stride;
         for (int c = 0; c < channel; c++) {
             OpDataType* tmp_out_data = output_data + (i * channel + c) * top_k;
-            OpDataType* tmp_in_data = input_data + (i * channel + c) * feat_map_size;
+            const OpDataType* tmp_in_data = input_data + (i * channel + c) * feat_map_size;
             std::vector<OpDataType> vec;
 
             for (int h = 0; h < height; h++) {

diff --git a/saber/funcs/impl/x86/sequence2batch.cpp b/saber/funcs/impl/x86/sequence2batch.cpp
@@ -29,13 +29,14 @@ void CopyMatrixRowsFunctor<Dtype, LayOutType>::operator()(
         LOG(ERROR) << "hidden size should be divided with no remainder by fragment_num.";
         exit(-1);
     }
+    typedef typename DataTrait<X86,Dtype>::PtrDtype Data_ptr;
 
     auto height = dst_shape[0];
     auto dst_width = dst_shape[1] / fragment_num;
     auto src_width = src_shape[1] / fragment_num;
     auto real_width = (width != 0) ? width : (dst_width > src_width ? src_width : dst_width);
-    auto* src_data = src->data();
-    auto* dst_data = dst->mutable_data();
+    Data_ptr src_data = static_cast<Data_ptr>(src->data());
+    Data_ptr dst_data = static_cast<Data_ptr>(dst->mutable_data());
 
     if (is_src_index) {
         #pragma omp parallel for collapse(2)

diff --git a/saber/saber_funcs_param.h b/saber/saber_funcs_param.h
@@ -104,7 +104,9 @@ struct AffineChannelParam {
 
     AffineChannelParam(const AffineChannelParam<TargetType>& right) {}
 
-    AffineChannelParam<TargetType>& operator=(const AffineChannelParam<TargetType>& right) {}
+    AffineChannelParam<TargetType>& operator=(const AffineChannelParam<TargetType>& right) {
+        return *this;
+    }
 
     bool operator==(const AffineChannelParam<TargetType>& right) {return true;}
 };
@@ -1053,6 +1055,7 @@ struct ExpandParam{
     }
     ExpandParam& operator=(const ExpandParam& right) {
        expand_times = right.expand_times;
+        return *this;
     }
     bool operator==(const ExpandParam& right) {
         bool flag = true;
@@ -1534,6 +1537,7 @@ struct MatMulParam {
     MatMulParam& operator=(const MatMulParam& right) {
         _is_transpose_X = right._is_transpose_X;
         _is_transpose_Y = right._is_transpose_Y;
+        return *this;
     }
     bool operator==(const MatMulParam& right) {
         bool comp_eq = true;
@@ -2396,7 +2400,9 @@ template <typename TargetType>
 struct TransposeParam {
     TransposeParam() = default;
     TransposeParam(const TransposeParam& right) {}
-    TransposeParam& operator=(const TransposeParam& right) {}
+    TransposeParam& operator=(const TransposeParam& right) {
+        return *this;
+    }
     bool operator==(const TransposeParam& right) {
         return true;
     }
@@ -2534,7 +2540,9 @@ template <typename TargetType>
 struct MeanParam {
     MeanParam() = default;
     MeanParam(const MeanParam& right) {}
-    MeanParam& operator=(const MeanParam& right) {}
+    MeanParam& operator=(const MeanParam& right) {
+        return *this;
+    }
     bool operator==(const MeanParam& right) {
         return true;
     }

diff --git a/test/framework/net/net_exec_test.cpp b/test/framework/net/net_exec_test.cpp
@@ -348,7 +348,7 @@ TEST(NetTest, net_execute_reconstruction_test) {
 int main(int argc, const char** argv){
     if (argc < 2){
         LOG(ERROR)<<"no input!!!";
-        return;
+        return -1;
     }
     if (argc > 1) {
         g_model_path = std::string(argv[1]);

diff --git a/test/framework/net/net_exec_test_x86.cpp b/test/framework/net/net_exec_test_x86.cpp
@@ -75,7 +75,7 @@ TEST(NetTest, net_execute_base_test) {
 int main(int argc, const char** argv){
     if (argc < 2){
         LOG(ERROR)<<"no input!!!";
-        return;
+        return -1;
     }
     if (argc > 1) {
         g_model_path = std::string(argv[1]);

diff --git a/test/saber/test_saber_activation.cpp b/test/saber/test_saber_activation.cpp
@@ -134,7 +134,7 @@ void test_model() {
 
     //test example
     for (auto shape : {input_shape, input_shape2}) {
-    for (auto act : {1, 2, 3, 4, 5, 9, 10, active}) {
+    for (auto act : {Active_sigmoid,Active_relu, Active_tanh, Active_clipped_relu, Active_elu, Active_stanh, Active_prelu}) {
     LOG(INFO) << "================ active: " << act;
 
     for (auto neg_slope : {-1.0, 0.5}) {

diff --git a/test/saber/test_saber_base.h b/test/saber/test_saber_base.h
@@ -181,10 +181,11 @@ class TestSaberBase{
         add_inputs_shape(shape_v);
         for(int i = 0; i < _op_input_num; ++i)
         {
-            SaberStatus status = _inputs_dev[0][i]->set_dtype(input[i]->get_dtype());
-            status &= _inputs_host[0][i]->set_dtype(input[i]->get_dtype());
-            if(!status)
+            SaberStatus status_dev = _inputs_dev[0][i]->set_dtype(input[i]->get_dtype());
+            SaberStatus status_host= _inputs_host[0][i]->set_dtype(input[i]->get_dtype());
+            if (status_dev != SaberSuccess || status_host != SaberSuccess){
                 LOG(INFO) << "ERROR";
+            }
             _inputs_dev[0][i] -> copy_from(*input[i]);
             _inputs_host[0][i] -> copy_from(*input[i]);
             if(input[i]->get_seq_offset().size() > 0){

diff --git a/test/saber/test_saber_conv_eltwise.cpp b/test/saber/test_saber_conv_eltwise.cpp
@@ -221,8 +221,9 @@ int test_conv_results(int group,
                                    group, kernel_w, kernel_h, stride_w, stride_h,
                                    dilation_w, dilation_h, pad_w, pad_h, bias_term,
                                    param.activation_param.has_active, 1.f);
-
+#ifdef USE_CUDA
     cudaDeviceSynchronize();
+#endif
     conv.init(input_v, output_v, conv_eltwise_param, strategy, imp, ctx1);
     conv.trans_weights(*param.mutable_weight(), *param.mutable_bias(),
                        param.pad_h, param.pad_w, param.dilation_h, param.dilation_w,

diff --git a/test/saber/test_saber_gru.cpp b/test/saber/test_saber_gru.cpp
@@ -16,7 +16,7 @@ using namespace std;
 
 template <typename Dtype>
 static Dtype InValidAct(Dtype a) {
-    CHECK(false)<<"InValidAct";
+    return 0;
 }
 
 template <typename Dtype>
-Original file line number
+Diff line change
@@ Expand Up @@
                           with_relu);
             }
         }
+        return SaberSuccess;
     }
     }
     }