Skip to content
This repository was archived by the owner on Jan 24, 2024. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,9 @@ if((NOT BUILD_FAT_BIN) AND (NOT BUILD_CROSS_PLANTFORM) AND USE_CUDA)
endif()

if(USE_X86_PLACE)
if(NOT DEFINED BUILD_X86_TARGET)
if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
set(BUILD_X86_ARCH "clang_native")
elseif(NOT DEFINED BUILD_X86_TARGET)
set(BUILD_X86_ARCH "native")
anakin_get_cpu_arch(BUILD_X86_ARCH)
else()
Expand Down
5 changes: 4 additions & 1 deletion cmake/compiler_options.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,11 @@ if(X86_COMPILE_482)
# anakin_add_compile_option(-static-libgcc)
endif()

if(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
anakin_add_compile_option(-fabi-version=6)
anakin_add_compile_option(-march=${BUILD_X86_ARCH})
anakin_add_compile_option(-march=${BUILD_X86_ARCH})
endif()

anakin_add_compile_option(-Ofast)
anakin_add_compile_option(-ffast-math)
anakin_add_compile_option(-Wall)
Expand Down
4 changes: 2 additions & 2 deletions examples/anakin/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@
DEBUG_FLAG="-std=c++11 -g -I../../framework/c_api/ -I./ -I../../build/ -ldl -Wno-narrowing "
ORI_FAST_FLAG="-std=c++11 -Ofast -ffast-math -I../../framework/c_api/ -I./ -ldl -Wno-narrowing "
STATIC_FAST_FLAG="-std=c++11 -Ofast -ffast-math -I../../output -I./ -ldl -Wno-narrowing -I../../output/framework/c_api/"
FAST_FLAG="-std=c++11 -g -static-libstdc++ --sysroot=/opt/compiler/gcc-4.8.2/ -Wl,-rpath,/opt/compiler/gcc-4.8.2/lib64/ -Wl,-dynamic-linker,/opt/compiler/gcc-4.8.2/lib64/ld-linux-x86-64.so.2 -Ofast -ffast-math -I../../output/framework/c_api/ -I./ -ldl -Wno-narrowing"
FAST_FLAG="-std=c++11 -g -static-libstdc++ --sysroot=/opt/compiler/gcc-4.8.2/ -Wl,-rpath,/opt/compiler/gcc-4.8.2/lib64/ -Wl,-dynamic-linker,/opt/compiler/gcc-4.8.2/lib64/ld-linux-x86-64.so.2 -Ofast -ffast-math -I../../output/framework/c_api/ -I./ -I../../framework/c_api/ -ldl -Wno-narrowing "
g++ example.cpp -o example $FAST_FLAG
g++ map_rnn.cpp -o map_rnn $FAST_FLAG
g++ map_rnn.cpp -o map_rnn ${FAST_FLAG}
43 changes: 30 additions & 13 deletions examples/anakin/map_rnn.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "anakin_helper.h"
#include <string.h>
bool g_print_data=false;
class Data {
public:
Data(std::string file_name, int batch_size) :
Expand Down Expand Up @@ -197,19 +198,21 @@ class AKRNNExampleX86 {
input_fea->set_dev_lod_offset(lod);
_anakin_obj->prediction();

#ifdef PRINT_RESULT
AnakinRunerTensorInterface* output_0 = _anakin_obj->get_output_tensor(0);
for (int seq_id = 0; seq_id < seq_offset.size() - 1; seq_id++) {
int seq_len = seq_offset[seq_id + 1] - seq_offset[seq_id];
int seq_start = seq_offset[seq_id];

for (int i = 0; i < seq_len - 1; i++) {
printf("%f|", static_cast<float*>(output_0->get_host_data())[seq_start + i]);
}
if(g_print_data){
AnakinRunerTensorInterface* output_0 = _anakin_obj->get_output_tensor(0);
for (int seq_id = 0; seq_id < seq_offset.size() - 1; seq_id++) {
int seq_len = seq_offset[seq_id + 1] - seq_offset[seq_id];
int seq_start = seq_offset[seq_id];

for (int i = 0; i < seq_len - 1; i++) {
printf("%f|", static_cast<float*>(output_0->get_host_data())[seq_start + i]);
}

printf("%f\n", static_cast<float*>(output_0->get_host_data())[seq_start + seq_len - 1]);
printf("%f\n", static_cast<float*>(output_0->get_host_data())[seq_start + seq_len - 1]);
}
}
#endif


// output_0->copy_data_dev_2_host();
// float* out_ptr = static_cast<float*>(output_0->get_host_data());
Expand Down Expand Up @@ -249,9 +252,23 @@ int main(int argc, const char** argv) {
}

if (argc > 5) {
so_path = argv[5];
g_print_data=atoi(argv[5]);
}

if (argc > 6) {
so_dir=argv[6];
}

if(argc > 7){
so_path = argv[7];
}

if(argc<=7){
AKRNNExampleX86 ak_run(so_dir, model_path, max_batch);
ak_run.run(data_path,batch_size);
}else {
AKRNNExampleX86 ak_run(so_dir, so_path, model_path, max_batch);
ak_run.run(data_path,batch_size);
}

AKRNNExampleX86 ak_run(so_dir, so_path,model_path,max_batch);
ak_run.run(data_path,batch_size);
}
4 changes: 2 additions & 2 deletions saber/core/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -202,14 +202,14 @@ class Buffer {
/**
* \brief return const data pointer
*/
const TPtr get_data(){
const TPtr get_data()const {
return _data;
}

/**
* \brief return mutable data pointer
*/
TPtr get_data_mutable(){
TPtr get_data_mutable()const{
return _data;
}

Expand Down
11 changes: 1 addition & 10 deletions saber/funcs/impl/x86/kernel/jit_avx512_conv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -227,16 +227,7 @@ SaberStatus JitAvx512Conv<AK_FLOAT>::dispatch(
const float *ptr_weights = reinterpret_cast<const float*>(weights_internal->data());
const float *ptr_bias = reinterpret_cast<const float*>(bias->data());

auto ptr_dst = NULL;
switch (type){
case AK_UINT8: ptr_dst = reinterpret_cast<unsigned char*>(outputs[0]->mutable_data()); break;
case AK_INT8: ptr_dst = reinterpret_cast<char*>(outputs[0]->mutable_data()); break;
case AK_UINT32: ptr_dst = reinterpret_cast<unsigned int*>(outputs[0]->mutable_data()); break;
case AK_INT32: ptr_dst = reinterpret_cast<int*>(outputs[0]->mutable_data()); break;
case AK_FLOAT: ptr_dst = reinterpret_cast<float*>(outputs[0]->mutable_data()); break;
default: LOG(FATAL) << "data type: " << type << " is unsupported now";
}
//ptr_dst = reinterpret_cast<float*>(outputs[0]->mutable_data());
auto ptr_dst= static_cast<float*>(outputs[0]->mutable_data());

const auto &jcp = kernel->jcp;

Expand Down
2 changes: 1 addition & 1 deletion saber/funcs/impl/x86/saber_attension_lstm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ void sequence_pool(const Dtype* data, const Dtype* weight, std::vector<int>& seq

for (int j = seq_offset[i]; j < seq_offset[i + 1]; j++) {
Dtype scale = weight[j];
Dtype* tmp_data = data + j * dim;
const Dtype* tmp_data = data + j * dim;

for (int k = 0; k < dim; k++) {
tmp_out[k] += scale * tmp_data[k];
Expand Down
1 change: 1 addition & 0 deletions saber/funcs/impl/x86/saber_col2im_deconv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ SaberStatus SaberCol2ImDeconv<AK_FLOAT>::dispatch(const std::vector<Tensor<X86>
with_relu);
}
}
return SaberSuccess;
}
}
}
2 changes: 1 addition & 1 deletion saber/funcs/impl/x86/saber_detection_output.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ class SaberDetectionOutput<X86, OpDtype> : \
dtype* _bbox_cpu_data{nullptr};
dtype* _conf_cpu_data{nullptr};
};
template class SaberDetectionOutput<X86>;

} //namespace saber

} //namespace anakin
Expand Down
1 change: 1 addition & 0 deletions saber/funcs/impl/x86/saber_embedding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ SaberStatus SaberEmbedding<X86, OpDtype>::dispatch(
}
}
}
return SaberSuccess;

}

Expand Down
2 changes: 1 addition & 1 deletion saber/funcs/impl/x86/saber_match_matrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ void padding_out(const dtype* src, std::vector<int>& offset_r, int dim_t, int le
int tl = dim_t * len_l;
for (int i = 0; i < seq_num; i++) {
dtype* dst_tmp = dst + i * tl * max_len_r;
dtype* src_tmp = src + offset_r[i] * tl;
const dtype* src_tmp = src + offset_r[i] * tl;
int cur_len = offset_r[i+1] - offset_r[i];
for (int j = 0; j < cur_len; j++) {
for (int k = 0; k < tl; k++) {
Expand Down
15 changes: 14 additions & 1 deletion saber/funcs/impl/x86/saber_normal_activation.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ namespace saber {

template<typename Dtype>
inline Dtype InValidAct(Dtype a) {
CHECK_EQ(0, 1) << "InValidAct";
return 0;
}

template<typename Dtype>
Expand All @@ -44,6 +44,11 @@ inline Dtype Identity(const Dtype a) {

#if defined(__SSE4_2__) and defined(__FMA__)

template<>
inline __m128 InValidAct<__m128>(const __m128 a) {
return _mm_set1_ps(0.0f);
}


template<>
inline __m128 Relu<__m128>(const __m128 a) {
Expand Down Expand Up @@ -80,6 +85,10 @@ inline __m128 Tanh<__m128>(const __m128 a) {

#if defined(__AVX2__) and defined(__FMA__)

template<>
inline __m256 InValidAct<__m256>(const __m256 a) {
return _mm256_set1_ps(0.0f);
}

template<>
inline __m256 Relu<__m256>(const __m256 a) {
Expand Down Expand Up @@ -112,6 +121,10 @@ inline __m256 Tanh<__m256>(const __m256 a) {

#if defined(__AVX512F__)

template<>
inline __m512 InValidAct<__m512>(const __m512 a) {
return _mm512_set1_ps(0.0f);
}

template<>
inline __m512 Relu<__m512>(const __m512 a) {
Expand Down
4 changes: 2 additions & 2 deletions saber/funcs/impl/x86/saber_sequence_conv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ SaberStatus SaberSequenceConv<X86, OpDtype>::dispatch(
_hidden_size);
}

gemm(false, false, word_num, _feature_size, _hidden_kernel_size, 1.f, _temp_im2col_tensor.data(),
param.filter_tensor->data(), 0.f, out_data->mutable_data());
gemm(false, false, word_num, _feature_size, _hidden_kernel_size, 1.f, static_cast<const float*>(_temp_im2col_tensor.data()),
static_cast<const float*>(param.filter_tensor->data()), 0.f, static_cast<float*>(out_data->mutable_data()));
std::vector<std::vector<int>> voffset;
voffset.push_back(offset);
out_data->set_seq_offset(voffset);
Expand Down
2 changes: 1 addition & 1 deletion saber/funcs/impl/x86/saber_sequence_expand.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ SequenceExpandParam<X86>& param) {

auto ref_offset = inputs[1]->get_seq_offset()[0];
size_t len = inputs[0]->valid_size();
OpDataType* input_data = static_cast<const OpDataType* >(inputs[0]->data());
const OpDataType* input_data = static_cast<const OpDataType* >(inputs[0]->data());
OpDataType* output_data = static_cast<OpDataType* >(outputs[0]->mutable_data());
int dim = inputs[0]->valid_size() / inputs[0]->num();

Expand Down
1 change: 1 addition & 0 deletions saber/funcs/impl/x86/saber_topk_avg_pooling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ SaberStatus SaberTopKAvgPooling<X86, OpDtype>::get_topk(std::vector<OpDataType>&
for (int k = real_k; k < top_k; k++) {
dst[k] = (OpDataType) 0.f;
}
return SaberSuccess;
}


Expand Down
3 changes: 2 additions & 1 deletion saber/funcs/impl/x86/saber_topk_pooling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ SaberStatus SaberTopKPooling<X86, OpDtype>::get_topk(std::vector<OpDataType>& sr
for (int k = real_k; k < top_k; k++) {
dst[k] = (OpDataType) 0.f;
}
return SaberSuccess;
}

template <DataType OpDtype>
Expand Down Expand Up @@ -76,7 +77,7 @@ SaberStatus SaberTopKPooling<X86, OpDtype>::dispatch(
int feat_map_size = height_stride * width_stride;
for (int c = 0; c < channel; c++) {
OpDataType* tmp_out_data = output_data + (i * channel + c) * top_k;
OpDataType* tmp_in_data = input_data + (i * channel + c) * feat_map_size;
const OpDataType* tmp_in_data = input_data + (i * channel + c) * feat_map_size;
std::vector<OpDataType> vec;

for (int h = 0; h < height; h++) {
Expand Down
5 changes: 3 additions & 2 deletions saber/funcs/impl/x86/sequence2batch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,14 @@ void CopyMatrixRowsFunctor<Dtype, LayOutType>::operator()(
LOG(ERROR) << "hidden size should be divided with no remainder by fragment_num.";
exit(-1);
}
typedef typename DataTrait<X86,Dtype>::PtrDtype Data_ptr;

auto height = dst_shape[0];
auto dst_width = dst_shape[1] / fragment_num;
auto src_width = src_shape[1] / fragment_num;
auto real_width = (width != 0) ? width : (dst_width > src_width ? src_width : dst_width);
auto* src_data = src->data();
auto* dst_data = dst->mutable_data();
Data_ptr src_data = static_cast<Data_ptr>(src->data());
Data_ptr dst_data = static_cast<Data_ptr>(dst->mutable_data());

if (is_src_index) {
#pragma omp parallel for collapse(2)
Expand Down
14 changes: 11 additions & 3 deletions saber/saber_funcs_param.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,9 @@ struct AffineChannelParam {

AffineChannelParam(const AffineChannelParam<TargetType>& right) {}

AffineChannelParam<TargetType>& operator=(const AffineChannelParam<TargetType>& right) {}
AffineChannelParam<TargetType>& operator=(const AffineChannelParam<TargetType>& right) {
return *this;
}

bool operator==(const AffineChannelParam<TargetType>& right) {return true;}
};
Expand Down Expand Up @@ -1053,6 +1055,7 @@ struct ExpandParam{
}
ExpandParam& operator=(const ExpandParam& right) {
expand_times = right.expand_times;
return *this;
}
bool operator==(const ExpandParam& right) {
bool flag = true;
Expand Down Expand Up @@ -1534,6 +1537,7 @@ struct MatMulParam {
MatMulParam& operator=(const MatMulParam& right) {
_is_transpose_X = right._is_transpose_X;
_is_transpose_Y = right._is_transpose_Y;
return *this;
}
bool operator==(const MatMulParam& right) {
bool comp_eq = true;
Expand Down Expand Up @@ -2396,7 +2400,9 @@ template <typename TargetType>
struct TransposeParam {
TransposeParam() = default;
TransposeParam(const TransposeParam& right) {}
TransposeParam& operator=(const TransposeParam& right) {}
TransposeParam& operator=(const TransposeParam& right) {
return *this;
}
bool operator==(const TransposeParam& right) {
return true;
}
Expand Down Expand Up @@ -2534,7 +2540,9 @@ template <typename TargetType>
struct MeanParam {
MeanParam() = default;
MeanParam(const MeanParam& right) {}
MeanParam& operator=(const MeanParam& right) {}
MeanParam& operator=(const MeanParam& right) {
return *this;
}
bool operator==(const MeanParam& right) {
return true;
}
Expand Down
2 changes: 1 addition & 1 deletion test/framework/net/net_exec_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@ TEST(NetTest, net_execute_reconstruction_test) {
int main(int argc, const char** argv){
if (argc < 2){
LOG(ERROR)<<"no input!!!";
return;
return -1;
}
if (argc > 1) {
g_model_path = std::string(argv[1]);
Expand Down
2 changes: 1 addition & 1 deletion test/framework/net/net_exec_test_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ TEST(NetTest, net_execute_base_test) {
int main(int argc, const char** argv){
if (argc < 2){
LOG(ERROR)<<"no input!!!";
return;
return -1;
}
if (argc > 1) {
g_model_path = std::string(argv[1]);
Expand Down
2 changes: 1 addition & 1 deletion test/saber/test_saber_activation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ void test_model() {

//test example
for (auto shape : {input_shape, input_shape2}) {
for (auto act : {1, 2, 3, 4, 5, 9, 10, active}) {
for (auto act : {Active_sigmoid,Active_relu, Active_tanh, Active_clipped_relu, Active_elu, Active_stanh, Active_prelu}) {
LOG(INFO) << "================ active: " << act;

for (auto neg_slope : {-1.0, 0.5}) {
Expand Down
7 changes: 4 additions & 3 deletions test/saber/test_saber_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,10 +181,11 @@ class TestSaberBase{
add_inputs_shape(shape_v);
for(int i = 0; i < _op_input_num; ++i)
{
SaberStatus status = _inputs_dev[0][i]->set_dtype(input[i]->get_dtype());
status &= _inputs_host[0][i]->set_dtype(input[i]->get_dtype());
if(!status)
SaberStatus status_dev = _inputs_dev[0][i]->set_dtype(input[i]->get_dtype());
SaberStatus status_host= _inputs_host[0][i]->set_dtype(input[i]->get_dtype());
if (status_dev != SaberSuccess || status_host != SaberSuccess){
LOG(INFO) << "ERROR";
}
_inputs_dev[0][i] -> copy_from(*input[i]);
_inputs_host[0][i] -> copy_from(*input[i]);
if(input[i]->get_seq_offset().size() > 0){
Expand Down
3 changes: 2 additions & 1 deletion test/saber/test_saber_conv_eltwise.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -221,8 +221,9 @@ int test_conv_results(int group,
group, kernel_w, kernel_h, stride_w, stride_h,
dilation_w, dilation_h, pad_w, pad_h, bias_term,
param.activation_param.has_active, 1.f);

#ifdef USE_CUDA
cudaDeviceSynchronize();
#endif
conv.init(input_v, output_v, conv_eltwise_param, strategy, imp, ctx1);
conv.trans_weights(*param.mutable_weight(), *param.mutable_bias(),
param.pad_h, param.pad_w, param.dilation_h, param.dilation_w,
Expand Down
2 changes: 1 addition & 1 deletion test/saber/test_saber_gru.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ using namespace std;

template <typename Dtype>
static Dtype InValidAct(Dtype a) {
CHECK(false)<<"InValidAct";
return 0;
}

template <typename Dtype>
Expand Down
Loading