カーネル密度分布推定

  • 1次元、2次元プロットから密度分布推定をする
    • 分布(たとえば正規分布)の重なりとして推定する
library(KernSmooth)
library(spatgraphs)
# linbin2D関数オブジェクトの呼び出し失敗を回避するために次の処理をする
linbin2D <- get("linbin2D", envir=environment(bkde2D))

# 円周状のプロットを作る
  
Npt<-300
x<-y<-rep(0,Npt)
r<-10
for(i in 1:Npt){
	t<-runif(1)*2*pi/1
	r2<-r*(sin(t*200)+10)+rnorm(1)*0.1
	x[i]<-r2*cos(t)
	y[i]<-r2*sin(t)
}
# spatgraphsのオブジェクトを作る
	pp2d<-list(x=x,y=y,n=length(x),window=list(x=range(c(x,y)),y=range(c(x,y))))
	R<-0.2
	k<-1
   e1<-spatgraph(pp2d,"geometric",par=R)
   e2<-spatgraph(pp2d,"knn",par=k)
   e3<-spatgraph(pp2d,"MST")
      A<-spatcluster(e2)

   #par(mfrow=c(1,3))
   #plot(pp2d,main=paste("Geometric,R =",R))
   #plot(e1,pp2d)
   #plot(pp2d,main=paste("k-nn, k =",k))
   #plot(e2,pp2d)
   #plot(A,pp2d,pch=19)
   plot(pp2d, main="Minimum spanning tree")
   plot(e3,pp2d)


 #f1 <- bkde2D(cbind(x, y), bandwidth=c(width.SJ(x), width.SJ(y)),gridsize=c(101,101))
 f1 <- bkde2D(cbind(x, y) ,bandwidth=c(10,10),gridsize=c(101,101))
  contour(f1$x1, f1$x2, f1$fhat)
persp(f1$fhat)
library(rgl)
xy<-expand.grid(f1$x1,f1$x2)
plot3d(xy[,1],xy[,2],f1$fhat)

# bkde2d の中身を確認するために

x<-cbind(x,y)
gridsize = c(51L, 51L)
truncate = TRUE 
bandwidth=c(10,10)
    
    n <- nrow(x)
    M <- gridsize
    h <- bandwidth
    tau <- 3.4
    if (length(h) == 1L) 
        h <- c(h, h)
    #if (missing(range.x)) {
        range.x <- list(0, 0)
        for (id in (1L:2L)) range.x[[id]] <- c(min(x[, id]) - 
            1.5 * h[id], max(x[, id]) + 1.5 * h[id])
    #}
    a <- c(range.x[[1L]][1L], range.x[[2L]][1L])
    b <- c(range.x[[1L]][2L], range.x[[2L]][2L])
    gpoints1 <- seq(a[1L], b[1L], length = M[1L])
    gpoints2 <- seq(a[2L], b[2L], length = M[2L])
    gcounts <- linbin2D(x, gpoints1, gpoints2)
    L <- numeric(2L)
    kapid <- list(0, 0)
    for (id in 1L:2L) {
        L[id] <- min(floor(tau * h[id] * (M[id] - 1)/(b[id] - 
            a[id])), M[id] - 1L)
        lvecid <- 0:L[id]
        facid <- (b[id] - a[id])/(h[id] * (M[id] - 1L))
        z <- matrix(dnorm(lvecid * facid)/h[id])
        tot <- sum(c(z, rev(z[-1L]))) * facid * h[id]
        kapid[[id]] <- z/tot
    }
    kapp <- kapid[[1L]] %*% (t(kapid[[2L]]))/n
    if (min(L) == 0) 
        warning("Binning grid too coarse for current (small) bandwidth: consider increasing 'gridsize'")
    P <- 2^(ceiling(log(M + L)/log(2)))
    L1 <- L[1L]
    L2 <- L[2L]
    M1 <- M[1L]
    M2 <- M[2L]
    P1 <- P[1L]
    P2 <- P[2L]
    rp <- matrix(0, P1, P2)
    rp[1L:(L1 + 1), 1L:(L2 + 1)] <- kapp
    if (L1) 
        rp[(P1 - L1 + 1):P1, 1L:(L2 + 1)] <- kapp[(L1 + 1):2, 
            1L:(L2 + 1)]
    if (L2) 
        rp[, (P2 - L2 + 1):P2] <- rp[, (L2 + 1):2]
    sp <- matrix(0, P1, P2)
    sp[1L:M1, 1L:M2] <- gcounts
    rp2 <- fft(rp)
    sp2 <- fft(sp)
    rp3 <- Re(fft(rp2 * sp2, inverse = TRUE)/(P1 * P2))[1L:M1, 1L:M2]
    rp4 <- rp3 * matrix(as.numeric(rp3 > 0), nrow(rp3), ncol(rp3))
    list(x1 = gpoints1, x2 = gpoints2, fhat = rp4)

get()関数

  • 2次元プロットの分布を扱っている
  • KernSmoothパッケージ(今はデフォルトで入る?)のbkde2D()関数でカーネル平滑化ができる
  • その処理の中身をbkde2D()関数のソースに沿って確認しようと思ったら、中で使っているlinbin2D()という関数が「読めません」ととまった
  • このlinbin2D()は、2次元格子のセルごとに点の個数を数える関数なのだが、RでのFortranの関数を呼び出しているようだ
  • 中身はこう
function (X, gpoints1, gpoints2) 
{
    n <- nrow(X)
    X <- c(X[, 1L], X[, 2L])
    M1 <- length(gpoints1)
    M2 <- length(gpoints2)
    a1 <- gpoints1[1L]
    a2 <- gpoints2[1L]
    b1 <- gpoints1[M1]
    b2 <- gpoints2[M2]
    out <- .Fortran(F_lbtwod, as.double(X), as.integer(n), as.double(a1), 
        as.double(a2), as.double(b1), as.double(b2), as.integer(M1), 
        as.integer(M2), double(M1 * M2))
    matrix(out[[9L]], M1, M2)
}
<environment: namespace:KernSmooth>
  • linbin2Dオブジェクトに、この内容の関数を入れるために実行したコマンドがget()函数を用いた、こちら
linbin2D <- get("linbin2D", envir=environment(bkde2D))
  • この方法を見つけたネット上のサイトがこちら
  • get()関数はそのヘルプ記事を見るとオブジェクトの中身を取り出す関数らしい。これでも出る
get("linbin2D")
c  Part of R package KernSmooth
c  Copyright (C) 1995  M. P. Wand
c
c  Unlimited use and distribution (see LICENCE).

cccccccccc FORTRAN subroutine linbin.f cccccccccc

c Obtains bin counts for univariate data
c via the linear binning strategy. If "trun=0" then
c weight from end observations is given to corresponding
c end grid points. If "trun=1" then end observations
c are truncated.

c Last changed: 20 MAR 2009

      subroutine linbin(X,n,a,b,M,trun,gcnts)
      double precision X(*),a,b,gcnts(*),lxi,delta,rem
      integer n,M,i,li,trun

c     Initialize grid counts to zero

      do 10 i=1,M
         gcnts(i) = dble(0)
10    continue

      delta = (b-a)/(M-1)
      do 20 i=1,n
         lxi = ((X(i)-a)/delta) + 1

c        Find integer part of "lxi"

         li = int(lxi) 

         rem = lxi - li
         if (li.ge.1.and.li.lt.M) then
            gcnts(li) = gcnts(li) + (1-rem)
            gcnts(li+1) = gcnts(li+1) + rem
         endif

         if (li.lt.1.and.trun.eq.0) then
            gcnts(1) = gcnts(1) + 1
         endif

         if (li.ge.M.and.trun.eq.0) then
            gcnts(M) = gcnts(M) + 1
         endif

20    continue

      return
      end

cccccccccc End of linbin.f cccccccccc
c  Part of R package KernSmooth
c  Copyright (C) 1995  M. P. Wand
c
c  Unlimited use and distribution (see LICENCE).

cccccccccc FORTRAN subroutine linbin2D.f cccccccccc

c Obtains bin counts for bivariate data
c via the linear binning strategy. In this version
c observations outside the mesh are ignored.

      subroutine lbtwod(X,n,a1,a2,b1,b2,M1,M2,gcnts)
      integer n,M1,M2,i,li1,li2,ind1,ind2,ind3,ind4
      double precision X(*),a1,a2,b1,b2,gcnts(*)
      double precision lxi1,lxi2,delta1,delta2,rem1,rem2

c     Initialize grid cnts to zero

      do 10 i = 1,(M1*M2)
         gcnts(i) = dble(0)
10    continue

      delta1 = (b1 - a1)/(M1 - 1)
      delta2 = (b2 - a2)/(M2 - 1)
      do 20 i = 1,n
         lxi1 = ((X(i) - a1)/delta1) + 1
         lxi2 = ((X(n+i) - a2)/delta2) + 1

c        Find the integer part of "lxi1" and "lxi2"

         li1 = int(lxi1)
         li2 = int(lxi2)
         rem1 = lxi1 - li1
         rem2 = lxi2 - li2
         if (li1.ge.1) then
            if (li2.ge.1) then
               if (li1.lt.M1) then
                  if (li2.lt.M2) then
                     ind1 = M1*(li2-1) + li1
                     ind2 = M1*(li2-1) + li1 + 1
                     ind3 = M1*li2 + li1
                     ind4 = M1*li2 + li1 + 1
                     gcnts(ind1) = gcnts(ind1)+(1-rem1)*(1-rem2)
                     gcnts(ind2) = gcnts(ind2)+rem1*(1-rem2)
                     gcnts(ind3) = gcnts(ind3)+(1-rem1)*rem2
                     gcnts(ind4) = gcnts(ind4)+rem1*rem2
                  endif
               endif
            endif
         endif
20    continue

      return
      end

cccccccccc End of linbin2D.f cccccccccc