sample():RcppArmadilloを使ってみる

  • Cppの線形代数ライブラリArmadillo
  • sample()をcppにしてみよう。速くはならないけれど、cpp内での処理が可能になる
  • "csample_char.cpp","csample_num.cpp"という関数ファイルを作って、sourceRcpp()する(参考)
// [[Rcpp::depends(RcppArmadillo)]]

#include <RcppArmadilloExtensions/sample.h>

using namespace Rcpp ;

// [[Rcpp::export]]
CharacterVector csample_char( CharacterVector x, 
                              int size,
                              bool replace, 
                              NumericVector prob = NumericVector::create()
                              ) {
  RNGScope scope ;
  CharacterVector ret = RcppArmadillo::sample(x, size, replace, prob) ;
  return ret ;
}
#include <RcppArmadilloExtensions/sample.h>
// [[Rcpp::depends(RcppArmadillo)]]

using namespace Rcpp ;

// [[Rcpp::export]]
NumericVector csample_num( NumericVector x,
                           int size,
                           bool replace,
                           NumericVector prob = NumericVector::create()
                           ) {
  RNGScope scope;
  NumericVector ret = RcppArmadillo::sample(x, size, replace, prob);
  return ret;
}
sourceCpp("csample_char.cpp")
sourceCpp("csample_num.cpp")
  • 使い方
N <- 10
set.seed(7)

sample.r <- sample(letters, N, replace=T)

set.seed(7)
sample.c <- csample_char(letters, N, replace=T)

print(identical(sample.r, sample.c))
  • Rのsample()との比較
library(rbenchmark)
Loading required package: rbenchmark
set.seed(7)

## Definition of Sampling Frame
n.elem <- 1e2
frame1 <- rnorm(n.elem)
probs1 <- runif(n.elem)

## Definition of sampling regime
## Use replacement throughout
.replace <- TRUE
## Samplesize
n.samples1 <- 1e4

## Without probabilities
benchmark(r = sample(frame1, n.samples1, replace=.replace),        
          cpp = csample_num(frame1, n.samples1, replace=.replace),
          replications = 1e3,
          order = 'relative',
          columns = c("test", "replications", "relative", "elapsed")
          )