补充一下Ralf Stubner的解决方案,你可以使用以下C ++版本来
- 同时执行多个列,以避免多次重新读取输出向量。
- 添加
__restrict__
以潜在地允许矢量操作(我猜只是读取,所以可能无关紧要)。
#include <Rcpp.h>
using namespace Rcpp;
inline void mat_vec_mult_vanilla
(double const * __restrict__ m,
double const * __restrict__ v,
double * __restrict__ const res,
size_t const dn, size_t const dm) noexcept {
for(size_t j = 0; j < dm; ++j, ++v){
double * r = res;
for(size_t i = 0; i < dn; ++i, ++r, ++m)
*r += *m * *v;
}
}
inline void mat_vec_mult
(double const * __restrict__ const m,
double const * __restrict__ const v,
double * __restrict__ const res,
size_t const dn, size_t const dm) noexcept {
size_t j(0L);
double const * vj = v,
* mi = m;
constexpr size_t const ncl(8L);
{
double const * mvals[ncl];
size_t const end_j = dm - (dm % ncl),
inc = ncl * dn;
for(; j < end_j; j += ncl, vj += ncl, mi += inc){
double *r = res;
mvals[0] = mi;
for(size_t i = 1; i < ncl; ++i)
mvals[i] = mvals[i - 1L] + dn;
for(size_t i = 0; i < dn; ++i, ++r)
for(size_t ii = 0; ii < ncl; ++ii)
*r += *(vj + ii) * *mvals[ii]++;
}
}
mat_vec_mult_vanilla(mi, vj, res, dn, dm - j);
}
NumericVector mat_vec_mult_cpp(NumericMatrix m, NumericVector v){
size_t const dn = m.nrow(),
dm = m.ncol();
NumericVector res(dn);
mat_vec_mult(&m[0], &v[0], &res[0], dn, dm);
return res;
}
NumericVector mat_vec_mult_vanilla_cpp(NumericMatrix m, NumericVector v){
size_t const dn = m.nrow(),
dm = m.ncol();
NumericVector res(dn);
mat_vec_mult_vanilla(&m[0], &v[0], &res[0], dn, dm);
return res;
}
在我的Makevars文件和gcc-8.3中使用-O3
的结果是
set.seed(1)
dn <- 10001L
dm <- 10001L
m <- matrix(rnorm(dn * dm), dn, dm)
lv <- rnorm(dm)
all.equal(drop(m %*% lv), mat_vec_mult(m = m, v = lv))
all.equal(drop(m %*% lv), mat_vec_mult_vanilla(m = m, v = lv))
bench::mark(
R = m %*% lv,
`OP's version` = my_mm(m = m, v = lv),
`BLAS` = blas_mm(m = m, v = lv),
`C++ vanilla` = mat_vec_mult_vanilla(m = m, v = lv),
`C++` = mat_vec_mult(m = m, v = lv), check = FALSE)
稍有改进。然而,结果可能非常依赖于BLAS版本。我使用的版本是
sessionInfo()
我使用 Rcpp::sourceCpp()
编译的整个文件是
#include <Rcpp.h>
#include <R_ext/BLAS.h>
using namespace Rcpp;
inline void mat_vec_mult_vanilla
(double const * __restrict__ m,
double const * __restrict__ v,
double * __restrict__ const res,
size_t const dn, size_t const dm) noexcept {
for(size_t j = 0; j < dm; ++j, ++v){
double * r = res;
for(size_t i = 0; i < dn; ++i, ++r, ++m)
*r += *m * *v;
}
}
inline void mat_vec_mult
(double const * __restrict__ const m,
double const * __restrict__ const v,
double * __restrict__ const res,
size_t const dn, size_t const dm) noexcept {
size_t j(0L);
double const * vj = v,
* mi = m;
constexpr size_t const ncl(8L);
{
double const * mvals[ncl];
size_t const end_j = dm - (dm % ncl),
inc = ncl * dn;
for(; j < end_j; j += ncl, vj += ncl, mi += inc){
double *r = res;
mvals[0] = mi;
for(size_t i = 1; i < ncl; ++i)
mvals[i] = mvals[i - 1L] + dn;
for(size_t i = 0; i < dn; ++i, ++r)
for(size_t ii = 0; ii < ncl; ++ii)
*r += *(vj + ii) * *mvals[ii]++;
}
}
mat_vec_mult_vanilla(mi, vj, res, dn, dm - j);
}
NumericVector mat_vec_mult_cpp(NumericMatrix m, NumericVector v){
size_t const dn = m.nrow(),
dm = m.ncol();
NumericVector res(dn);
mat_vec_mult(&m[0], &v[0], &res[0], dn, dm);
return res;
}
NumericVector mat_vec_mult_vanilla_cpp(NumericMatrix m, NumericVector v){
size_t const dn = m.nrow(),
dm = m.ncol();
NumericVector res(dn);
mat_vec_mult_vanilla(&m[0], &v[0], &res[0], dn, dm);
return res;
}
NumericVector my_mm(NumericMatrix m, NumericVector v){
int nRow = m.rows();
int nCol = m.cols();
NumericVector ans(nRow);
double v_j;
for(int j = 0; j < nCol; j++){
v_j = v[j];
for(int i = 0; i < nRow; i++){
ans[i] += m(i,j) * v_j;
}
}
return(ans);
}
NumericVector blas_mm(NumericMatrix m, NumericVector v){
int nRow = m.rows();
int nCol = m.cols();
NumericVector ans(nRow);
char trans = 'N';
double one = 1.0, zero = 0.0;
int ione = 1;
F77_CALL(dgemv)(&trans, &nRow, &nCol, &one, m.begin(), &nRow, v.begin(),
&ione, &zero, ans.begin(), &ione);
return ans;
}
getAnywhere(
%*%)
,我们有:function (x, y) .Primitive("%*%")
。所以,这是与一个_C_库进行接口交互,但正如@joran指出的那样,您没有考虑NA
处理。 - coatlessNA
的很好。我唯一能看到的区别是它会产生一个向量而不是矩阵。 - Cliff AB