diff --git a/DESCRIPTION b/DESCRIPTION index 1d463fb..c9ed2d6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -9,7 +9,11 @@ License: MIT + file LICENSE Encoding: UTF-8 Roxygen: list(markdown = TRUE) RoxygenNote: 7.2.3 -Imports: attention (>= 0.4.0) +LinkingTo: Rcpp, RcppArmadillo, RcppParallel, RcppEigen +Imports: Rcpp, + RcppArmadillo, + RcppParallel, + RcppEigen Suggests: covr, testthat (>= 3.0.0) diff --git a/NAMESPACE b/NAMESPACE index 6937328..86c1334 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,10 +1,4 @@ -# Generated by roxygen2: do not edit by hand - -export(SoftMax) -export(attention) -export(row_means) -export(row_vars) -export(transformer) -importFrom(attention,SoftMax) -importFrom(attention,attention) -importFrom(stats,rnorm) +importFrom(Rcpp, evalCpp) +importFrom(RcppParallel, RcppParallelLibs) +exportPattern("^[[:alpha:]]+") +useDynLib(transformer) diff --git a/R/RcppExports.R b/R/RcppExports.R new file mode 100644 index 0000000..ddddd64 --- /dev/null +++ b/R/RcppExports.R @@ -0,0 +1,63 @@ +# Generated by using Rcpp::compileAttributes() -> do not edit by hand +# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +SoftMax <- function(x) { + .Call('_transformer_SoftMax', PACKAGE = 'transformer', x) +} + +attention <- function(Q, K, V) { + .Call('_transformer_attention', PACKAGE = 'transformer', Q, K, V) +} + +feed_forward <- function(x, dff, d_model) { + .Call('_transformer_feed_forward', PACKAGE = 'transformer', x, dff, d_model) +} + +fill_mat_rnorm <- function(mat) { + invisible(.Call('_transformer_fill_mat_rnorm', PACKAGE = 'transformer', mat)) +} + +fill_mat_row <- function(nb_Row, row_Vec) { + .Call('_transformer_fill_mat_row', PACKAGE = 'transformer', nb_Row, row_Vec) +} + +insert_sub_mat <- function(mat, sub_Mat, col_Index) { + invisible(.Call('_transformer_insert_sub_mat', PACKAGE = 'transformer', mat, sub_Mat, col_Index)) +} + +layer_norm <- function(x, epsilon) { + .Call('_transformer_layer_norm', PACKAGE = 'transformer', x, epsilon) +} + +mat_mult <- function(mat1, mat2) { + .Call('_transformer_mat_mult', PACKAGE = 'transformer', mat1, mat2) +} + +mat_sum <- function(mat1, mat2) { + .Call('_transformer_mat_sum', PACKAGE = 'transformer', mat1, mat2) +} + +multi_head <- function(Q, K, V, d_model, num_heads) { + .Call('_transformer_multi_head', PACKAGE = 'transformer', Q, K, V, d_model, num_heads) +} + +pmax_mat <- function(mat) { + .Call('_transformer_pmax_mat', PACKAGE = 'transformer', mat) +} + +row_max <- function(mat) { + .Call('_transformer_row_max', PACKAGE = 'transformer', mat) +} + +row_means <- function(mat) { + .Call('_transformer_row_means', PACKAGE = 'transformer', mat) +} + +row_vars <- function(mat) { + .Call('_transformer_row_vars', PACKAGE = 'transformer', mat) +} + +sub_mat <- function(mat, col_Index) { + .Call('_transformer_sub_mat', PACKAGE = 'transformer', mat, col_Index) +} + diff --git a/R/attention.R b/R/attention.R deleted file mode 100644 index a0bd53b..0000000 --- a/R/attention.R +++ /dev/null @@ -1,3 +0,0 @@ -#' @importFrom attention attention -#' @export -attention::attention diff --git a/R/feed_forward.R b/R/feed_forward.R deleted file mode 100644 index 91be3d1..0000000 --- a/R/feed_forward.R +++ /dev/null @@ -1,19 +0,0 @@ -#' @name feed_forward -#' @title Feed Forward Layer -#' @param x inputs -#' @param dff dimensions of feed-forward model -#' @param d_model dimensions of the model -#' @return output of the feed-forward layer -#' @importFrom stats rnorm - -feed_forward <- function(x, dff, d_model) { - W1 <- matrix(rnorm(d_model * dff), d_model, dff) - b1 <- matrix(rnorm(1 * dff), 1, dff) - W2 <- matrix(rnorm(dff * d_model), dff, d_model) - b2 <- matrix(rnorm(1 * d_model), 1, d_model) - - hidden <- pmax(x %*% W1 + matrix(rep(b1, nrow(x)), nrow(x), ncol(b1), byrow = TRUE), 0) - output <- hidden %*% W2 + matrix(rep(b2, nrow(x)), nrow(x), ncol(b2), byrow = TRUE) - - return(output) -} diff --git a/R/layer_norm.R b/R/layer_norm.R deleted file mode 100644 index 6fd438b..0000000 --- a/R/layer_norm.R +++ /dev/null @@ -1,12 +0,0 @@ -#' @name layer_norm -#' @title Layer Normalization -#' @param x inputs -#' @param epsilon scale -#' @return outputs of layer normalization - -layer_norm <- function(x, epsilon = 1e-6) { - mu <- row_means(x) - sigma_sq <- row_vars(x) - normalized_x <- t((t(x) - mu) / sqrt(sigma_sq + epsilon)) - return(normalized_x) -} diff --git a/R/multi_head.R b/R/multi_head.R deleted file mode 100644 index 79eb423..0000000 --- a/R/multi_head.R +++ /dev/null @@ -1,36 +0,0 @@ -#' @name multi_head -#' @title Multi-Headed Attention -#' @param Q queries -#' @param K keys -#' @param V values -#' @param d_model dimensions of the model -#' @param num_heads number of heads -#' @param mask optional mask -#' @return multi-headed attention outputs - -multi_head <- function(Q, K, V, d_model, num_heads, mask = NULL) { - depth <- d_model / num_heads - - WQ <- matrix(rnorm(d_model * d_model), d_model, d_model) - WK <- matrix(rnorm(d_model * d_model), d_model, d_model) - WV <- matrix(rnorm(d_model * d_model), d_model, d_model) - - Q <- Q %*% WQ - K <- K %*% WK - V <- V %*% WV - - Qs <- lapply(1:num_heads, function(i) Q[, ((i - 1) * depth + 1):(i * depth)]) - Ks <- lapply(1:num_heads, function(i) K[, ((i - 1) * depth + 1):(i * depth)]) - Vs <- lapply(1:num_heads, function(i) V[, ((i - 1) * depth + 1):(i * depth)]) - - outputs <- lapply(1:num_heads, function(i) { - attention(Qs[[i]], Ks[[i]], Vs[[i]], mask) - }) - - concat_attention <- do.call(cbind, lapply(outputs, function(x) x[[1]])) - - WO <- matrix(rnorm(d_model * d_model), d_model, d_model) - output <- concat_attention %*% WO - - return(output) -} diff --git a/R/row_means.R b/R/row_means.R deleted file mode 100644 index ff82e44..0000000 --- a/R/row_means.R +++ /dev/null @@ -1,10 +0,0 @@ -#' @name row_means -#' @title Row Means -#' @param x matrix -#' @return vector with the mean of each of row of the input matrix -#' @export -#' @examples -#' row_means(t(matrix(1:5))) - -row_means <- function(x) - apply(x, 1, mean) diff --git a/R/row_vars.R b/R/row_vars.R deleted file mode 100644 index 1154a93..0000000 --- a/R/row_vars.R +++ /dev/null @@ -1,9 +0,0 @@ -#' @name row_vars -#' @title Row Variances -#' @param x matrix -#' @return vector with the variance of each of row of the input matrix -#' @export -#' @examples -#' row_vars(t(matrix(1:5))) -row_vars <- function(x) - row_means((x - row_means(x)) ^ 2) diff --git a/R/softmax.R b/R/softmax.R deleted file mode 100644 index 96257a6..0000000 --- a/R/softmax.R +++ /dev/null @@ -1,3 +0,0 @@ -#' @importFrom attention SoftMax -#' @export -attention::SoftMax diff --git a/R/transformer.R b/R/transformer.R deleted file mode 100644 index fcca9c3..0000000 --- a/R/transformer.R +++ /dev/null @@ -1,25 +0,0 @@ -#' @name transformer -#' @title Transformer -#' @param x inputs -#' @param d_model dimensions of the model -#' @param num_heads number of heads -#' @param dff dimensions of feed-forward model -#' @param mask optional mask -#' @return output of the transformer layer -#' @export -#' @examples -#' x <- matrix(rnorm(50 * 512), 50, 512) -#' d_model <- 512 -#' num_heads <- 8 -#' dff <- 2048 -#' -#' output <- transformer(x, d_model, num_heads, dff) -transformer <- function(x, d_model, num_heads, dff, mask = NULL) { - attn_output <- multi_head(x, x, x, d_model, num_heads, mask) - x1 <- layer_norm(x + attn_output) - - ff_output <- feed_forward(x1, dff, d_model) - x2 <- layer_norm(x1 + ff_output) - - return(x2) -} diff --git a/src/Makevars b/src/Makevars new file mode 100644 index 0000000..237fc4b --- /dev/null +++ b/src/Makevars @@ -0,0 +1,12 @@ +PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) $(SHLIB_OPENMP_CFLAGS) +PKG_LIBS += $(shell "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" \ + -e "RcppParallel::RcppParallelLibs()") + +PKG_CFLAGS = $(SHLIB_OPENMP_CFLAGS) + +PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) +PKG_CXXFLAGS += -std=c++11 +PKG_CXXFLAGS += -DRCPP_PARALLEL_USE_TBB=1 + +CXX=g++ +CXX_STD = CXX11 diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp new file mode 100644 index 0000000..3feb7a3 --- /dev/null +++ b/src/RcppExports.cpp @@ -0,0 +1,216 @@ +// Generated by using Rcpp::compileAttributes() -> do not edit by hand +// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +#include +#include +#include + +using namespace Rcpp; + +#ifdef RCPP_USE_GLOBAL_ROSTREAM +Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); +Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); +#endif + +// SoftMax +NumericVector SoftMax(const NumericVector& x); +RcppExport SEXP _transformer_SoftMax(SEXP xSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const NumericVector& >::type x(xSEXP); + rcpp_result_gen = Rcpp::wrap(SoftMax(x)); + return rcpp_result_gen; +END_RCPP +} +// attention +NumericMatrix attention(const NumericMatrix& Q, const NumericMatrix& K, const NumericMatrix& V); +RcppExport SEXP _transformer_attention(SEXP QSEXP, SEXP KSEXP, SEXP VSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const NumericMatrix& >::type Q(QSEXP); + Rcpp::traits::input_parameter< const NumericMatrix& >::type K(KSEXP); + Rcpp::traits::input_parameter< const NumericMatrix& >::type V(VSEXP); + rcpp_result_gen = Rcpp::wrap(attention(Q, K, V)); + return rcpp_result_gen; +END_RCPP +} +// feed_forward +NumericMatrix feed_forward(const NumericMatrix& x, int dff, int d_model); +RcppExport SEXP _transformer_feed_forward(SEXP xSEXP, SEXP dffSEXP, SEXP d_modelSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const NumericMatrix& >::type x(xSEXP); + Rcpp::traits::input_parameter< int >::type dff(dffSEXP); + Rcpp::traits::input_parameter< int >::type d_model(d_modelSEXP); + rcpp_result_gen = Rcpp::wrap(feed_forward(x, dff, d_model)); + return rcpp_result_gen; +END_RCPP +} +// fill_mat_rnorm +void fill_mat_rnorm(NumericMatrix& mat); +RcppExport SEXP _transformer_fill_mat_rnorm(SEXP matSEXP) { +BEGIN_RCPP + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< NumericMatrix& >::type mat(matSEXP); + fill_mat_rnorm(mat); + return R_NilValue; +END_RCPP +} +// fill_mat_row +NumericMatrix fill_mat_row(int nb_Row, const NumericVector row_Vec); +RcppExport SEXP _transformer_fill_mat_row(SEXP nb_RowSEXP, SEXP row_VecSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< int >::type nb_Row(nb_RowSEXP); + Rcpp::traits::input_parameter< const NumericVector >::type row_Vec(row_VecSEXP); + rcpp_result_gen = Rcpp::wrap(fill_mat_row(nb_Row, row_Vec)); + return rcpp_result_gen; +END_RCPP +} +// insert_sub_mat +void insert_sub_mat(NumericMatrix& mat, const NumericMatrix& sub_Mat, const IntegerVector& col_Index); +RcppExport SEXP _transformer_insert_sub_mat(SEXP matSEXP, SEXP sub_MatSEXP, SEXP col_IndexSEXP) { +BEGIN_RCPP + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< NumericMatrix& >::type mat(matSEXP); + Rcpp::traits::input_parameter< const NumericMatrix& >::type sub_Mat(sub_MatSEXP); + Rcpp::traits::input_parameter< const IntegerVector& >::type col_Index(col_IndexSEXP); + insert_sub_mat(mat, sub_Mat, col_Index); + return R_NilValue; +END_RCPP +} +// layer_norm +NumericMatrix layer_norm(const NumericMatrix& x, double epsilon); +RcppExport SEXP _transformer_layer_norm(SEXP xSEXP, SEXP epsilonSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const NumericMatrix& >::type x(xSEXP); + Rcpp::traits::input_parameter< double >::type epsilon(epsilonSEXP); + rcpp_result_gen = Rcpp::wrap(layer_norm(x, epsilon)); + return rcpp_result_gen; +END_RCPP +} +// mat_mult +NumericMatrix mat_mult(const NumericMatrix& mat1, const NumericMatrix& mat2); +RcppExport SEXP _transformer_mat_mult(SEXP mat1SEXP, SEXP mat2SEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const NumericMatrix& >::type mat1(mat1SEXP); + Rcpp::traits::input_parameter< const NumericMatrix& >::type mat2(mat2SEXP); + rcpp_result_gen = Rcpp::wrap(mat_mult(mat1, mat2)); + return rcpp_result_gen; +END_RCPP +} +// mat_sum +NumericMatrix mat_sum(const NumericMatrix& mat1, const NumericMatrix& mat2); +RcppExport SEXP _transformer_mat_sum(SEXP mat1SEXP, SEXP mat2SEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const NumericMatrix& >::type mat1(mat1SEXP); + Rcpp::traits::input_parameter< const NumericMatrix& >::type mat2(mat2SEXP); + rcpp_result_gen = Rcpp::wrap(mat_sum(mat1, mat2)); + return rcpp_result_gen; +END_RCPP +} +// multi_head +NumericMatrix multi_head(const NumericMatrix& Q, const NumericMatrix& K, const NumericMatrix& V, int d_model, int num_heads); +RcppExport SEXP _transformer_multi_head(SEXP QSEXP, SEXP KSEXP, SEXP VSEXP, SEXP d_modelSEXP, SEXP num_headsSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const NumericMatrix& >::type Q(QSEXP); + Rcpp::traits::input_parameter< const NumericMatrix& >::type K(KSEXP); + Rcpp::traits::input_parameter< const NumericMatrix& >::type V(VSEXP); + Rcpp::traits::input_parameter< int >::type d_model(d_modelSEXP); + Rcpp::traits::input_parameter< int >::type num_heads(num_headsSEXP); + rcpp_result_gen = Rcpp::wrap(multi_head(Q, K, V, d_model, num_heads)); + return rcpp_result_gen; +END_RCPP +} +// pmax_mat +NumericMatrix pmax_mat(const NumericMatrix& mat); +RcppExport SEXP _transformer_pmax_mat(SEXP matSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const NumericMatrix& >::type mat(matSEXP); + rcpp_result_gen = Rcpp::wrap(pmax_mat(mat)); + return rcpp_result_gen; +END_RCPP +} +// row_max +NumericVector row_max(const NumericMatrix& mat); +RcppExport SEXP _transformer_row_max(SEXP matSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const NumericMatrix& >::type mat(matSEXP); + rcpp_result_gen = Rcpp::wrap(row_max(mat)); + return rcpp_result_gen; +END_RCPP +} +// row_means +NumericVector row_means(const NumericMatrix& mat); +RcppExport SEXP _transformer_row_means(SEXP matSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const NumericMatrix& >::type mat(matSEXP); + rcpp_result_gen = Rcpp::wrap(row_means(mat)); + return rcpp_result_gen; +END_RCPP +} +// row_vars +NumericVector row_vars(const NumericMatrix& mat); +RcppExport SEXP _transformer_row_vars(SEXP matSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const NumericMatrix& >::type mat(matSEXP); + rcpp_result_gen = Rcpp::wrap(row_vars(mat)); + return rcpp_result_gen; +END_RCPP +} +// sub_mat +NumericMatrix sub_mat(const NumericMatrix& mat, const IntegerVector& col_Index); +RcppExport SEXP _transformer_sub_mat(SEXP matSEXP, SEXP col_IndexSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const NumericMatrix& >::type mat(matSEXP); + Rcpp::traits::input_parameter< const IntegerVector& >::type col_Index(col_IndexSEXP); + rcpp_result_gen = Rcpp::wrap(sub_mat(mat, col_Index)); + return rcpp_result_gen; +END_RCPP +} + +static const R_CallMethodDef CallEntries[] = { + {"_transformer_SoftMax", (DL_FUNC) &_transformer_SoftMax, 1}, + {"_transformer_attention", (DL_FUNC) &_transformer_attention, 3}, + {"_transformer_feed_forward", (DL_FUNC) &_transformer_feed_forward, 3}, + {"_transformer_fill_mat_rnorm", (DL_FUNC) &_transformer_fill_mat_rnorm, 1}, + {"_transformer_fill_mat_row", (DL_FUNC) &_transformer_fill_mat_row, 2}, + {"_transformer_insert_sub_mat", (DL_FUNC) &_transformer_insert_sub_mat, 3}, + {"_transformer_layer_norm", (DL_FUNC) &_transformer_layer_norm, 2}, + {"_transformer_mat_mult", (DL_FUNC) &_transformer_mat_mult, 2}, + {"_transformer_mat_sum", (DL_FUNC) &_transformer_mat_sum, 2}, + {"_transformer_multi_head", (DL_FUNC) &_transformer_multi_head, 5}, + {"_transformer_pmax_mat", (DL_FUNC) &_transformer_pmax_mat, 1}, + {"_transformer_row_max", (DL_FUNC) &_transformer_row_max, 1}, + {"_transformer_row_means", (DL_FUNC) &_transformer_row_means, 1}, + {"_transformer_row_vars", (DL_FUNC) &_transformer_row_vars, 1}, + {"_transformer_sub_mat", (DL_FUNC) &_transformer_sub_mat, 2}, + {NULL, NULL, 0} +}; + +RcppExport void R_init_transformer(DllInfo *dll) { + R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); + R_useDynamicSymbols(dll, FALSE); +} diff --git a/src/SoftMax.cpp b/src/SoftMax.cpp new file mode 100644 index 0000000..fb22d5e --- /dev/null +++ b/src/SoftMax.cpp @@ -0,0 +1,23 @@ +#include "SoftMax.h" + +NumericVector SoftMax(const NumericVector &x) +{ + NumericVector exp_x = Rcpp::exp(x - Rcpp::max(x)); + exp_x = exp_x / Rcpp::sum(exp_x); + return(exp_x); +} + +NumericMatrix SoftMax(const NumericMatrix &x) +{ + int nb_Row = x.nrow(); + int nb_Col = x.ncol(); + NumericMatrix x_SoftMax(nb_Row, nb_Col); + + for(int i = 0; i < nb_Row; i++) + { + x_SoftMax(i, _) = Rcpp::exp(x(i, _) - Rcpp::max(x(i, _))); + x_SoftMax(i, _) = x_SoftMax(i, _) / Rcpp::sum(x_SoftMax(i, _)); + } + + return(x_SoftMax); +} diff --git a/src/SoftMax.h b/src/SoftMax.h new file mode 100644 index 0000000..68c60fc --- /dev/null +++ b/src/SoftMax.h @@ -0,0 +1,16 @@ +#ifndef __SOFTMAX_H__ +#define __SOFTMAX_H__ + +#include +#include +#include +#include + +using namespace Rcpp; +using namespace std; + +// [[Rcpp::export]] +NumericVector SoftMax(const NumericVector &x); +NumericMatrix SoftMax(const NumericMatrix &x); + +#endif diff --git a/src/attention.cpp b/src/attention.cpp new file mode 100644 index 0000000..ebf7b28 --- /dev/null +++ b/src/attention.cpp @@ -0,0 +1,14 @@ +#include "attention.h" +#include "mat_mult.h" +#include "SoftMax.h" + +NumericMatrix attention(const NumericMatrix &Q, + const NumericMatrix &K, + const NumericMatrix &V) +{ + int dk = K.ncol(); + NumericMatrix scores = mat_mult(Q, transpose(K)) / std::sqrt(dk); + NumericMatrix attention_weights = SoftMax(scores); + NumericMatrix output = mat_mult(attention_weights, V); + return(output); +} diff --git a/src/attention.h b/src/attention.h new file mode 100644 index 0000000..9153aae --- /dev/null +++ b/src/attention.h @@ -0,0 +1,18 @@ +#ifndef __ATTENTION_H__ +#define __ATTENTION_H__ + +#include +#include +#include +#include + +using namespace Rcpp; +using namespace std; + +// [[Rcpp::export]] +NumericMatrix attention(const NumericMatrix &Q, + const NumericMatrix &K, + const NumericMatrix &V); + + +#endif diff --git a/src/feed_forward.cpp b/src/feed_forward.cpp new file mode 100644 index 0000000..ea75794 --- /dev/null +++ b/src/feed_forward.cpp @@ -0,0 +1,21 @@ +#include "mat_mult.h" +#include "mat_sum.h" +#include "fill_mat_row.h" +#include "fill_mat_rnorm.h" +#include "pmax_mat.h" +#include "feed_forward.h" + +NumericMatrix feed_forward(const NumericMatrix &x, int dff, int d_model) +{ + int nb_Row_x = x.nrow(); + NumericMatrix W1(d_model, dff); + NumericMatrix W2(dff, d_model); + NumericVector b1 = Rcpp::rnorm(dff); + NumericVector b2 = Rcpp::rnorm(d_model); + fill_mat_rnorm(W1); + fill_mat_rnorm(W2); + NumericMatrix hidden = mat_sum(mat_mult(x, W1), fill_mat_row(nb_Row_x, b1)); + NumericMatrix output = mat_sum(mat_mult(pmax_mat(hidden), W2), fill_mat_row(nb_Row_x, b2)); + + return(output); +} diff --git a/src/feed_forward.h b/src/feed_forward.h new file mode 100644 index 0000000..6636270 --- /dev/null +++ b/src/feed_forward.h @@ -0,0 +1,20 @@ +#ifndef __FEED_FORWARD_H__ +#define __FEED_FORWARD_H__ + +#include +#include +#include + +using namespace Rcpp; +using namespace std; + +//#' @name feed_forward +//#' @title Feed Forward Layer +//#' @param x inputs +//#' @param dff dimensions of feed-forward model +//#' @param d_model dimensions of the model +//#' @return output of the feed-forward layer +// [[Rcpp::export]] +NumericMatrix feed_forward(const NumericMatrix &x, int dff, int d_model); + +#endif diff --git a/src/fill_mat_rnorm.cpp b/src/fill_mat_rnorm.cpp new file mode 100644 index 0000000..4d1a0e4 --- /dev/null +++ b/src/fill_mat_rnorm.cpp @@ -0,0 +1,12 @@ +#include "fill_mat_rnorm.h" + +void fill_mat_rnorm(NumericMatrix &mat) +{ + int nb_Row = mat.nrow(); + int nb_Col = mat.ncol(); + + for(int i = 0; i < nb_Col; i++) + { + mat(_, i) = Rcpp::rnorm(nb_Row); + } +} diff --git a/src/fill_mat_rnorm.h b/src/fill_mat_rnorm.h new file mode 100644 index 0000000..36d875e --- /dev/null +++ b/src/fill_mat_rnorm.h @@ -0,0 +1,15 @@ +#ifndef __FILL_MAT_RNORM_H__ +#define __FILL_MAT_RNORM_H__ + +#include +#include +#include +#include + +using namespace Rcpp; +using namespace std; + +// [[Rcpp::export]] +void fill_mat_rnorm(NumericMatrix &mat); + +#endif diff --git a/src/fill_mat_row.cpp b/src/fill_mat_row.cpp new file mode 100644 index 0000000..b576fcd --- /dev/null +++ b/src/fill_mat_row.cpp @@ -0,0 +1,15 @@ +#include "fill_mat_row.h" + +NumericMatrix fill_mat_row(int nb_Row, const NumericVector row_Vec) +{ + int nb_Col = row_Vec.size(); + NumericMatrix mat(nb_Row, nb_Col); + + for(int i = 0; i < nb_Row; i++) + { + mat(i, _) = row_Vec; + } + + return(mat); +} + diff --git a/src/fill_mat_row.h b/src/fill_mat_row.h new file mode 100644 index 0000000..4646dc0 --- /dev/null +++ b/src/fill_mat_row.h @@ -0,0 +1,15 @@ +#ifndef __FILL_MAT_ROW_H__ +#define __FILL_MAT_ROW_H__ + +#include +#include +#include +#include + +using namespace Rcpp; +using namespace std; + +// [[Rcpp::export]] +NumericMatrix fill_mat_row(int nb_Row, const NumericVector row_Vec); + +#endif diff --git a/src/insert_sub_mat.cpp b/src/insert_sub_mat.cpp new file mode 100644 index 0000000..95a7cdf --- /dev/null +++ b/src/insert_sub_mat.cpp @@ -0,0 +1,11 @@ +#include "insert_sub_mat.h" + +void insert_sub_mat(NumericMatrix &mat, const NumericMatrix &sub_Mat, const IntegerVector &col_Index) +{ + int nb_Col_Index = col_Index.size(); + + for(int i = 0; i < nb_Col_Index; i++) + { + mat(_, col_Index[i]) = sub_Mat(_, i); + } +} diff --git a/src/insert_sub_mat.h b/src/insert_sub_mat.h new file mode 100644 index 0000000..9eaafbb --- /dev/null +++ b/src/insert_sub_mat.h @@ -0,0 +1,15 @@ +#ifndef __INSERT_SUB_MAT_H__ +#define __INSERT_SUB_MAT_H__ + +#include +#include +#include +#include + +using namespace Rcpp; +using namespace std; + +// [[Rcpp::export]] +void insert_sub_mat(NumericMatrix &mat, const NumericMatrix &sub_Mat, const IntegerVector &col_Index); + +#endif diff --git a/src/layer_norm.cpp b/src/layer_norm.cpp new file mode 100644 index 0000000..7d9a04c --- /dev/null +++ b/src/layer_norm.cpp @@ -0,0 +1,19 @@ +#include "row_means.h" +#include "row_vars.h" +#include "layer_norm.h" + +NumericMatrix layer_norm(const NumericMatrix &x, double epsilon) +{ + NumericVector mu = row_means(x); + NumericVector sigma_sq = row_vars(x); + int nb_Row = x.nrow(); + int nb_Col = x.ncol(); + NumericMatrix x_norm(nb_Row, nb_Col); + + for(int i = 0; i < nb_Row; i++) + { + x_norm(i, _) = (x(i, _) - mu[i]) / std::pow(sigma_sq[i] + epsilon, 0.5); + } + + return(x_norm); +} diff --git a/src/layer_norm.h b/src/layer_norm.h new file mode 100644 index 0000000..e712970 --- /dev/null +++ b/src/layer_norm.h @@ -0,0 +1,15 @@ +#ifndef __LAYER_NORM_H__ +#define __LAYER_NORM_H__ + +#include +#include +#include +#include + +using namespace Rcpp; +using namespace std; + +// [[Rcpp::export]] +NumericMatrix layer_norm(const NumericMatrix &x, double epsilon); + +#endif diff --git a/src/mat_mult.cpp b/src/mat_mult.cpp new file mode 100644 index 0000000..35ec149 --- /dev/null +++ b/src/mat_mult.cpp @@ -0,0 +1,9 @@ +#include "mat_mult.h" + +NumericMatrix mat_mult(const NumericMatrix &mat1, const NumericMatrix &mat2) +{ + const Eigen::Map ttm1(as>(mat1)); + const Eigen::Map ttm2(as>(mat2)); + Eigen::MatrixXd prod = ttm1 * ttm2; + return(wrap(prod)); +} diff --git a/src/mat_mult.h b/src/mat_mult.h new file mode 100644 index 0000000..f717141 --- /dev/null +++ b/src/mat_mult.h @@ -0,0 +1,15 @@ +#ifndef __MAT_MULT_H__ +#define __MAT_MULT_H__ + +#include +#include +#include +#include + +using namespace Rcpp; +using namespace std; + +// [[Rcpp::export]] +NumericMatrix mat_mult(const NumericMatrix &mat1, const NumericMatrix &mat2); + +#endif diff --git a/src/mat_mult.o b/src/mat_mult.o new file mode 100644 index 0000000..1feab75 Binary files /dev/null and b/src/mat_mult.o differ diff --git a/src/mat_sum.cpp b/src/mat_sum.cpp new file mode 100644 index 0000000..d9cf40d --- /dev/null +++ b/src/mat_sum.cpp @@ -0,0 +1,9 @@ +#include "mat_sum.h" + +NumericMatrix mat_sum(const NumericMatrix &mat1, const NumericMatrix &mat2) +{ + const Eigen::Map ttm1(as>(mat1)); + const Eigen::Map ttm2(as>(mat2)); + Eigen::MatrixXd sum = ttm1 + ttm2; + return(wrap(sum)); +} diff --git a/src/mat_sum.h b/src/mat_sum.h new file mode 100644 index 0000000..12f7183 --- /dev/null +++ b/src/mat_sum.h @@ -0,0 +1,15 @@ +#ifndef __MAT_SUM_H__ +#define __MAT_SUM_H__ + +#include +#include +#include +#include + +using namespace Rcpp; +using namespace std; + +// [[Rcpp::export]] +NumericMatrix mat_sum(const NumericMatrix &mat1, const NumericMatrix &mat2); + +#endif diff --git a/src/multi_head.cpp b/src/multi_head.cpp new file mode 100644 index 0000000..bb63e2a --- /dev/null +++ b/src/multi_head.cpp @@ -0,0 +1,50 @@ +#include "multi_head.h" +#include "mat_mult.h" +#include "insert_sub_mat.h" +#include "sub_mat.h" +#include "fill_mat_rnorm.h" +#include "attention.h" + +NumericMatrix multi_head(const NumericMatrix &Q, + const NumericMatrix &K, + const NumericMatrix &V, + int d_model, + int num_heads) +{ + int depth = d_model / num_heads; + int nb_Row_Q = Q.nrow(); + int nb_Col_Q = Q.ncol(); + + NumericMatrix WQ(d_model, d_model); + NumericMatrix WK(d_model, d_model); + NumericMatrix WV(d_model, d_model); + NumericMatrix WO(d_model, d_model); + + fill_mat_rnorm(WQ); + fill_mat_rnorm(WK); + fill_mat_rnorm(WV); + fill_mat_rnorm(WO); + + NumericMatrix Q_WQ = mat_mult(Q, WQ); + NumericMatrix K_WK = mat_mult(K, WK); + NumericMatrix V_WV = mat_mult(V, WV); + NumericMatrix Q_WQ_Sub; + NumericMatrix K_WK_Sub; + NumericMatrix V_WV_Sub; + + IntegerVector index(depth); + NumericMatrix concat_attention(nb_Row_Q, nb_Col_Q); + + for(int i = 0; i < num_heads; i++) + { + index = Rcpp::seq(i * depth, ((i + 1) * depth - 1)); + Q_WQ_Sub = sub_mat(Q_WQ, index); + K_WK_Sub = sub_mat(K_WK, index); + V_WV_Sub = sub_mat(V_WV, index); + insert_sub_mat(concat_attention, attention(Q_WQ_Sub, K_WK_Sub, V_WV_Sub), index); + } + + NumericMatrix output = mat_mult(concat_attention, WO); + + return(output); +} diff --git a/src/multi_head.h b/src/multi_head.h new file mode 100644 index 0000000..81b8bd5 --- /dev/null +++ b/src/multi_head.h @@ -0,0 +1,19 @@ +#ifndef __MULTI_HEAD_H__ +#define __MULTI_HEAD_H__ + +#include +#include +#include +#include + +using namespace Rcpp; +using namespace std; + +// [[Rcpp::export]] +NumericMatrix multi_head(const NumericMatrix &Q, + const NumericMatrix &K, + const NumericMatrix &V, + int d_model, + int num_heads); + +#endif diff --git a/src/pmax_mat.cpp b/src/pmax_mat.cpp new file mode 100644 index 0000000..b3997c6 --- /dev/null +++ b/src/pmax_mat.cpp @@ -0,0 +1,18 @@ +#include "pmax_mat.h" + +NumericMatrix pmax_mat(const NumericMatrix &mat) +{ + int nb_Row = mat.nrow(); + int nb_Col = mat.ncol(); + NumericMatrix mat_Max(nb_Row, nb_Col); + + for(int i = 0; i < nb_Row; i++) + { + for(int j = 0; j < nb_Col; j++) + { + mat_Max(i, j) = std::max(mat(i, j), 0.0); + } + } + + return(mat_Max); +} diff --git a/src/pmax_mat.h b/src/pmax_mat.h new file mode 100644 index 0000000..a618180 --- /dev/null +++ b/src/pmax_mat.h @@ -0,0 +1,15 @@ +#ifndef __PMAX_MAT_H__ +#define __PMAX_MAT_H__ + +#include +#include +#include +#include + +using namespace Rcpp; +using namespace std; + +// [[Rcpp::export]] +NumericMatrix pmax_mat(const NumericMatrix &mat); + +#endif diff --git a/src/row_max.cpp b/src/row_max.cpp new file mode 100644 index 0000000..a91e7ee --- /dev/null +++ b/src/row_max.cpp @@ -0,0 +1,14 @@ +#include "row_max.h" + +NumericVector row_max(const NumericMatrix &mat) +{ + int nb_Row = mat.nrow(); + NumericVector max_by_row(nb_Row); + + for(int i = 0; i < nb_Row; i++) + { + max_by_row[i] = Rcpp::max(mat(i, _)); + } + + return(max_by_row); +} diff --git a/src/row_max.h b/src/row_max.h new file mode 100644 index 0000000..139fb1e --- /dev/null +++ b/src/row_max.h @@ -0,0 +1,19 @@ +#ifndef __ROW_MAX_H__ +#define __ROW_MAX_H__ + +#include +#include +#include +#include + +using namespace Rcpp; +using namespace std; + +// #' @name row_means +//#' @title Row Means +// #' @param x matrix +// #' @return vector with the mean of each of row of the input matrix +// [[Rcpp::export]] +NumericVector row_max(const NumericMatrix &mat); + +#endif diff --git a/src/row_means.cpp b/src/row_means.cpp new file mode 100644 index 0000000..6ab9949 --- /dev/null +++ b/src/row_means.cpp @@ -0,0 +1,14 @@ +#include "row_means.h" + +NumericVector row_means(const NumericMatrix &mat) +{ + int nb_Row = mat.nrow(); + NumericVector mean_by_row(nb_Row); + + for(int i = 0; i < nb_Row; i++) + { + mean_by_row[i] = Rcpp::mean(mat(i, _)); + } + + return(mean_by_row); +} diff --git a/src/row_means.h b/src/row_means.h new file mode 100644 index 0000000..0ea6b0f --- /dev/null +++ b/src/row_means.h @@ -0,0 +1,19 @@ +#ifndef __ROW_MEANS_H__ +#define __ROW_MEANS_H__ + +#include +#include +#include +#include + +using namespace Rcpp; +using namespace std; + +// #' @name row_means +//#' @title Row Means +// #' @param x matrix +// #' @return vector with the mean of each of row of the input matrix +// [[Rcpp::export]] +NumericVector row_means(const NumericMatrix &mat); + +#endif diff --git a/src/row_vars.cpp b/src/row_vars.cpp new file mode 100644 index 0000000..fb72466 --- /dev/null +++ b/src/row_vars.cpp @@ -0,0 +1,16 @@ +#include "row_vars.h" + +NumericVector row_vars(const NumericMatrix &mat) +{ + int nb_Row = mat.nrow(); + NumericVector mean_by_row(nb_Row); + NumericVector var_by_row(nb_Row); + + for(int i = 0; i < nb_Row; i++) + { + mean_by_row[i] = Rcpp::mean(mat(i, _)); + var_by_row[i] = Rcpp::mean(mat(i, _) * mat(i, _)) - mean_by_row[i] * mean_by_row[i]; + } + + return(var_by_row); +} diff --git a/src/row_vars.h b/src/row_vars.h new file mode 100644 index 0000000..72286fc --- /dev/null +++ b/src/row_vars.h @@ -0,0 +1,15 @@ +#ifndef __ROW_VARS_H__ +#define __ROW_VARS_H__ + +#include +#include +#include +#include + +using namespace Rcpp; +using namespace std; + +// [[Rcpp::export]] +NumericVector row_vars(const NumericMatrix &mat); + +#endif diff --git a/src/sub_mat.cpp b/src/sub_mat.cpp new file mode 100644 index 0000000..a1a85d1 --- /dev/null +++ b/src/sub_mat.cpp @@ -0,0 +1,15 @@ +#include "sub_mat.h" + +NumericMatrix sub_mat(const NumericMatrix &mat, const IntegerVector &col_Index) +{ + int nb_Index = col_Index.size(); + int nb_Row = mat.nrow(); + NumericMatrix subset_Matrix(nb_Row, nb_Index); + + for(int i = 0; i < nb_Index; i++) + { + subset_Matrix(_, i) = mat(_, col_Index[i]); + } + + return(subset_Matrix); +} diff --git a/src/sub_mat.h b/src/sub_mat.h new file mode 100644 index 0000000..499d0d8 --- /dev/null +++ b/src/sub_mat.h @@ -0,0 +1,15 @@ +#ifndef __SUB_MAT_H__ +#define __SUB_MAT_H__ + +#include +#include +#include +#include + +using namespace Rcpp; +using namespace std; + +// [[Rcpp::export]] +NumericMatrix sub_mat(const NumericMatrix &mat, const IntegerVector &col_Index); + +#endif