カーネル密度分布推定
- 1次元、2次元プロットから密度分布推定をする
- 分布(たとえば正規分布)の重なりとして推定する
library(KernSmooth) library(spatgraphs) # linbin2D関数オブジェクトの呼び出し失敗を回避するために次の処理をする linbin2D <- get("linbin2D", envir=environment(bkde2D)) # 円周状のプロットを作る Npt<-300 x<-y<-rep(0,Npt) r<-10 for(i in 1:Npt){ t<-runif(1)*2*pi/1 r2<-r*(sin(t*200)+10)+rnorm(1)*0.1 x[i]<-r2*cos(t) y[i]<-r2*sin(t) } # spatgraphsのオブジェクトを作る pp2d<-list(x=x,y=y,n=length(x),window=list(x=range(c(x,y)),y=range(c(x,y)))) R<-0.2 k<-1 e1<-spatgraph(pp2d,"geometric",par=R) e2<-spatgraph(pp2d,"knn",par=k) e3<-spatgraph(pp2d,"MST") A<-spatcluster(e2) #par(mfrow=c(1,3)) #plot(pp2d,main=paste("Geometric,R =",R)) #plot(e1,pp2d) #plot(pp2d,main=paste("k-nn, k =",k)) #plot(e2,pp2d) #plot(A,pp2d,pch=19) plot(pp2d, main="Minimum spanning tree") plot(e3,pp2d) #f1 <- bkde2D(cbind(x, y), bandwidth=c(width.SJ(x), width.SJ(y)),gridsize=c(101,101)) f1 <- bkde2D(cbind(x, y) ,bandwidth=c(10,10),gridsize=c(101,101)) contour(f1$x1, f1$x2, f1$fhat) persp(f1$fhat) library(rgl) xy<-expand.grid(f1$x1,f1$x2) plot3d(xy[,1],xy[,2],f1$fhat) # bkde2d の中身を確認するために x<-cbind(x,y) gridsize = c(51L, 51L) truncate = TRUE bandwidth=c(10,10) n <- nrow(x) M <- gridsize h <- bandwidth tau <- 3.4 if (length(h) == 1L) h <- c(h, h) #if (missing(range.x)) { range.x <- list(0, 0) for (id in (1L:2L)) range.x[[id]] <- c(min(x[, id]) - 1.5 * h[id], max(x[, id]) + 1.5 * h[id]) #} a <- c(range.x[[1L]][1L], range.x[[2L]][1L]) b <- c(range.x[[1L]][2L], range.x[[2L]][2L]) gpoints1 <- seq(a[1L], b[1L], length = M[1L]) gpoints2 <- seq(a[2L], b[2L], length = M[2L]) gcounts <- linbin2D(x, gpoints1, gpoints2) L <- numeric(2L) kapid <- list(0, 0) for (id in 1L:2L) { L[id] <- min(floor(tau * h[id] * (M[id] - 1)/(b[id] - a[id])), M[id] - 1L) lvecid <- 0:L[id] facid <- (b[id] - a[id])/(h[id] * (M[id] - 1L)) z <- matrix(dnorm(lvecid * facid)/h[id]) tot <- sum(c(z, rev(z[-1L]))) * facid * h[id] kapid[[id]] <- z/tot } kapp <- kapid[[1L]] %*% (t(kapid[[2L]]))/n if (min(L) == 0) warning("Binning grid too coarse for current (small) bandwidth: consider increasing 'gridsize'") P <- 2^(ceiling(log(M + L)/log(2))) L1 <- L[1L] L2 <- L[2L] M1 <- M[1L] M2 <- M[2L] P1 <- P[1L] P2 <- P[2L] rp <- matrix(0, P1, P2) rp[1L:(L1 + 1), 1L:(L2 + 1)] <- kapp if (L1) rp[(P1 - L1 + 1):P1, 1L:(L2 + 1)] <- kapp[(L1 + 1):2, 1L:(L2 + 1)] if (L2) rp[, (P2 - L2 + 1):P2] <- rp[, (L2 + 1):2] sp <- matrix(0, P1, P2) sp[1L:M1, 1L:M2] <- gcounts rp2 <- fft(rp) sp2 <- fft(sp) rp3 <- Re(fft(rp2 * sp2, inverse = TRUE)/(P1 * P2))[1L:M1, 1L:M2] rp4 <- rp3 * matrix(as.numeric(rp3 > 0), nrow(rp3), ncol(rp3)) list(x1 = gpoints1, x2 = gpoints2, fhat = rp4)
get()関数
- 2次元プロットの分布を扱っている
- KernSmoothパッケージ(今はデフォルトで入る?)のbkde2D()関数でカーネル平滑化ができる
- その処理の中身をbkde2D()関数のソースに沿って確認しようと思ったら、中で使っているlinbin2D()という関数が「読めません」ととまった
- このlinbin2D()は、2次元格子のセルごとに点の個数を数える関数なのだが、RでのFortranの関数を呼び出しているようだ
- 中身はこう
function (X, gpoints1, gpoints2) { n <- nrow(X) X <- c(X[, 1L], X[, 2L]) M1 <- length(gpoints1) M2 <- length(gpoints2) a1 <- gpoints1[1L] a2 <- gpoints2[1L] b1 <- gpoints1[M1] b2 <- gpoints2[M2] out <- .Fortran(F_lbtwod, as.double(X), as.integer(n), as.double(a1), as.double(a2), as.double(b1), as.double(b2), as.integer(M1), as.integer(M2), double(M1 * M2)) matrix(out[[9L]], M1, M2) } <environment: namespace:KernSmooth>
- linbin2Dオブジェクトに、この内容の関数を入れるために実行したコマンドがget()函数を用いた、こちら
linbin2D <- get("linbin2D", envir=environment(bkde2D))
- この方法を見つけたネット上のサイトがこちら
- get()関数はそのヘルプ記事を見るとオブジェクトの中身を取り出す関数らしい。これでも出る
get("linbin2D")
c Part of R package KernSmooth c Copyright (C) 1995 M. P. Wand c c Unlimited use and distribution (see LICENCE). cccccccccc FORTRAN subroutine linbin.f cccccccccc c Obtains bin counts for univariate data c via the linear binning strategy. If "trun=0" then c weight from end observations is given to corresponding c end grid points. If "trun=1" then end observations c are truncated. c Last changed: 20 MAR 2009 subroutine linbin(X,n,a,b,M,trun,gcnts) double precision X(*),a,b,gcnts(*),lxi,delta,rem integer n,M,i,li,trun c Initialize grid counts to zero do 10 i=1,M gcnts(i) = dble(0) 10 continue delta = (b-a)/(M-1) do 20 i=1,n lxi = ((X(i)-a)/delta) + 1 c Find integer part of "lxi" li = int(lxi) rem = lxi - li if (li.ge.1.and.li.lt.M) then gcnts(li) = gcnts(li) + (1-rem) gcnts(li+1) = gcnts(li+1) + rem endif if (li.lt.1.and.trun.eq.0) then gcnts(1) = gcnts(1) + 1 endif if (li.ge.M.and.trun.eq.0) then gcnts(M) = gcnts(M) + 1 endif 20 continue return end cccccccccc End of linbin.f cccccccccc
c Part of R package KernSmooth c Copyright (C) 1995 M. P. Wand c c Unlimited use and distribution (see LICENCE). cccccccccc FORTRAN subroutine linbin2D.f cccccccccc c Obtains bin counts for bivariate data c via the linear binning strategy. In this version c observations outside the mesh are ignored. subroutine lbtwod(X,n,a1,a2,b1,b2,M1,M2,gcnts) integer n,M1,M2,i,li1,li2,ind1,ind2,ind3,ind4 double precision X(*),a1,a2,b1,b2,gcnts(*) double precision lxi1,lxi2,delta1,delta2,rem1,rem2 c Initialize grid cnts to zero do 10 i = 1,(M1*M2) gcnts(i) = dble(0) 10 continue delta1 = (b1 - a1)/(M1 - 1) delta2 = (b2 - a2)/(M2 - 1) do 20 i = 1,n lxi1 = ((X(i) - a1)/delta1) + 1 lxi2 = ((X(n+i) - a2)/delta2) + 1 c Find the integer part of "lxi1" and "lxi2" li1 = int(lxi1) li2 = int(lxi2) rem1 = lxi1 - li1 rem2 = lxi2 - li2 if (li1.ge.1) then if (li2.ge.1) then if (li1.lt.M1) then if (li2.lt.M2) then ind1 = M1*(li2-1) + li1 ind2 = M1*(li2-1) + li1 + 1 ind3 = M1*li2 + li1 ind4 = M1*li2 + li1 + 1 gcnts(ind1) = gcnts(ind1)+(1-rem1)*(1-rem2) gcnts(ind2) = gcnts(ind2)+rem1*(1-rem2) gcnts(ind3) = gcnts(ind3)+(1-rem1)*rem2 gcnts(ind4) = gcnts(ind4)+rem1*rem2 endif endif endif endif 20 continue return end cccccccccc End of linbin2D.f cccccccccc