library(fda) # display the data files associated with the fda package data(package='fda') system.file('scripts', package='fda') dir( "/Users/mats-ksp/Library/R/3.4/library/fda/scripts") # smoothing methods data(growth) help(growth) BH=growth$hgtm GH=growth$hgtf A=growth$age plot(A,BH[,1],type="l",col=1,ylim=c(min(BH),max(BH))) #First Boy for(i in 2:dim(BH)[2]){ lines(A,BH[,i],type="l",col=i) } # Velocity for boys VBH=t(t(apply(BH,2,diff))%*%diag(diff(A))) AA=A[2:length(A)] plot(AA,VBH[,1],type="l",col=1,ylim=c(min(VBH),max(VBH))) #First Boy for(i in 2:dim(VBH)[2]){ lines(A,BH[,i],type="l",col=i) } # Acceleration for boys ABH=t(t(apply(VBH,2,diff))%*%diag(diff(AA))) AAA=AA[2:length(AA)] plot(AAA,ABH[,1],type="l",col=1,ylim=c(min(ABH),max(ABH))) #First Boy for(i in 2:dim(ABH)[2]){ lines(AAA,ABH[,i],type="l",col=i) } #Fitting the curve for the first boy fit=lm(ABH[,1]~bs(AAA,knots=c(5,8,10,15))) Al=range(AAA) Agrid=seq(from=Al[1],to=Al[2],length.out =100) pred=predict(fit,newdata=list(AAA=Agrid),se=T) plot(AAA,ABH[,1],col="gray") lines(Agrid,pred$fit,lwd=2) lines(Agrid,pred$fit+2*pred$se,lty="dashed") lines(Agrid,pred$fit-2*pred$se,lty="dashed") #Other features of splines dim(bs(AAA,knots=c(5,8,10,15))) dim(bs(AAA,df=7)) attr(bs(AAA,df=7),"knots") #These are knots returned by B-splines method BS=bs(AAA,knots=c(5,8,10,15),intercept=TRUE) dim(BS) plot(AAA,BS[,1], col=1, type='l',ylab="",lwd=2) for(i in 2:8) { lines(AAA,BS[,i], col=i,lwd=2) } fit2=lm(ABH[,1]~ns(AAA,df=7)) pred2=predict(fit2,newdata=list(AAA=Agrid)) lines(Agrid, pred2,col="red",lwd=2) #Smoothing splines title("Smoothing Spline") fit=smooth.spline(AAA,ABH[,1],spar=0.25) fit2=smooth.spline(AAA,ABH[,1], cv=T) fit2$df plot(AAA,ABH[,1],col="gray") lines(fit,col="red",lwd=2) lines(fit2,col="blue",lwd=2) legend("topright",legend=c("50 DF","2 DF"),col=c("red","blue"),lty=1,lwd=2,cex=.8) #Comment on the smoothing splines results. #Part Estimation of the mean and covariance for the functional data dayOfYearShifted = c(182:365, 1:181) logprecav = CanadianWeather$dailyAv[dayOfYearShifted, , 'log10precip'] size=dim(logprecav) plot(logprecav[,1],type="l", col=1, ylim=c(min(logprecav),max(logprecav))) for(i in 2: size[2]){ lines(logprecav[,i],type="l", col=i) } #Mean value mu=rowMeans(logprecav) plot(mu,type="l", col=1, ylim=c(min(logprecav),max(logprecav))) #Covariance plot tt=1:365 x=matrix(logprecav,ncol=size[2]) mu=rowMeans(x) KK=x%*%t(x)/size[2]-mu%*%t(mu) contour(tt,tt,KK) ## ## 5.3. Case Study: The Log Precipitation Data ## # organize data to have winter in the center of the plot dayOfYearShifted = c(182:365, 1:181) logprecav = CanadianWeather$dailyAv[ dayOfYearShifted, , 'log10precip'] # set up a saturated basis: as many basis functions as observations nbasis = 365 yearRng=c(0,365) daybasis = create.fourier.basis(yearRng, nbasis) # set up the harmonic acceleration operator for smoothing Lcoef = c(0,(2*pi/diff(yearRng))^2,0) harmaccelLfd = vec2Lfd(Lcoef, yearRng) # step through values of log(lambda), the parameter that controls smoothing loglam = seq(4,9,0.25) nlam = length(loglam) dfsave = rep(NA,nlam) # measure of the improvements names(dfsave) = loglam gcvsave = dfsave for (ilam in 1:nlam) { cat(paste('log10 lambda =',loglam[ilam],'\n')) lambda = 10^loglam[ilam] fdParobj = fdPar(daybasis, harmaccelLfd, lambda) smoothlist = smooth.basis(day.5, logprecav, fdParobj) dfsave[ilam] = smoothlist$df gcvsave[ilam] = sum(smoothlist$gcv) } # Figure 5.2. plot(loglam, gcvsave, type='b', lwd=2, ylab='GCV Criterion', xlab=expression(log[10](lambda)) ) # smooth data with minimizing value of lambda lambda = 1e6 fdParobj = fdPar(daybasis, harmaccelLfd, lambda) logprec.fit = smooth.basis(day.5, logprecav, fdParobj) logprec.fd = logprec.fit$fd fdnames = list("Day (July 1 to June 30)", "Weather Station" = CanadianWeather$place, "Log 10 Precipitation (mm)") logprec.fd$fdnames = fdnames # plot the functional data object plot(logprec.fd, lwd=2) # Getting to Section 6.1 # define the harmonic acceleration operator Lcoef = c(0,(2*pi/diff(yearRng))^2,0) harmaccelLfd = vec2Lfd(Lcoef, yearRng) # smooth data with lambda that minimizes GCV lambda = 1e6 fdParobj = fdPar(daybasis, harmaccelLfd, lambda) logprec.fd = smooth.basis(day.5, logprecav, fdParobj)$fd # elementary pointwise mean and standard deviation meanlogprec = mean(logprec.fd) stddevlogprec = std.fd(logprec.fd) plot(meanlogprec, ylim=c(min(logprecav),max(logprecav))) lines(mu) logprecvar.bifd = var.fd(logprec.fd) weektime = seq(0,365,length=53) logprecvar_mat = eval.bifd(weektime, weektime, logprecvar.bifd) # Figure 6.1 persp(weektime, weektime, logprecvar_mat, theta=-45, phi=25, r=3, expand = 0.5, ticktype='detailed', xlab="Day (July 1 to June 30)", ylab="Day (July 1 to June 30)", zlab="variance(log10 precip)") contour(weektime, weektime, logprecvar_mat, xlab="Day (July 1 to June 30)", ylab="Day (July 1 to June 30)") # PCA #Example of the principle component analysis logprecav = CanadianWeather$dailyAv[ dayOfYearShifted, , 'log10precip'] dayrange = c(0,365) daybasis = create.fourier.basis(dayrange, 365) Lcoef = c(0,(2*pi/diff(dayrange))^2,0) harmaccelLfd = vec2Lfd(Lcoef, dayrange) lambda = 1e6 fdParobj = fdPar(daybasis, harmaccelLfd, lambda) logprec.fit = smooth.basis(day.5, logprecav, fdParobj) logprec.fd = logprec.fit$fd plot(logprec.fd) plot(logprec.fd[1]) eval.fd(logprec.fd[1],100) # do principal component analysis with 2 components nharm = 2 logprec.pcalist = pca.fd(logprec.fd, nharm) print(logprec.pcalist$values[1:4]) plot(logprec.pcalist$values) plot(logprec.pcalist$harmonics) har=logprec.pcalist$harmonics plot(har[1]) inprod(har[1],har[1]) inprod(har[1],har[2]) # Figure 7.1 plot.pca.fd(logprec.pcalist) # The expansion supplied by the function is too large, # and here we supply a smaller value, 0.5 plot(logprec.pcalist, expand=.5) # Figure 7.2 logprec.rotpcalist = varmx.pca.fd(logprec.pcalist) plot.pca.fd(logprec.rotpcalist, expand=.5) daybasis65 = create.fourier.basis(c(0, 365), nbasis=65,period=365) harmaccelLfd = vec2Lfd(c(0,(2*pi/365)^2,0), c(0, 365)) harmfdPar = fdPar(daybasis65, harmaccelLfd, lambda=1e5) daytempfd = smooth.basis(day.5, CanadianWeather$dailyAv[,,"Temperature.C"], daybasis65, fdnames=list("Day", "Station", "Deg C"))$fd daytemppcaobj = pca.fd(daytempfd, nharm=4, harmfdPar) op = par(mfrow=c(2,2)) plot.pca.fd(daytemppcaobj, cex.main=0.9) par(op) plot(daytemppcaobj$harmonics) ##Extract the eigenvalues ev=daytemppcaobj$values dev.off() plot(ev)