This is an old revision of the document!


Knitr

Knitr is a package that can be used to generate dynamic reports or web pages from R code. The code is evaluated at the moment the report is generated.

Code can be easily written in RStudio use the Markdown language. View this page in Markdown language, and view the resulting web page.

Data Table

Data table is a very useful package in R which allows to facilitate and to improve the efficiency of certain operations in R. Data tables are just like data frames. You can even create them from data frames.

Introduction to Data table (PDF)

install.packages('data.table')
library(data.table)

Generate very long data frame with one column with letters, and one column with random numbers

mydf=data.frame(a=rep(LETTERS,each=1e5),b=rnorm(26*1e5))

Convert the data frame to a data table format.

mydt=data.table(mydf)

Each data table has to be assigned a key, which is one (or more) of the columns from the table. This key defines the basis for the organization and the sorting of the table.

setkey(mydt,a)

Once the key is set, we can return all rows with column a (the key) equal to F

mydt['F']

Gives the mean value of column b for each letter in column a.

mydt[,mean(b),by=a]

Let's compare the performance of Data table with other methods to achieve the same thing.

system.time(t1<-mydt[,mean(b),by=a])

With tapply()

system.time(t2<-tapply(mydf$b,mydf$a,mean))

With reshape2

library(reshape2)
meltdf=melt(mydf)
system.time(t3<-dcast(meltdf,a~variable,mean))

With plyr

library(plyr)
system.time(t4<-ddply(mydf,.(a),summarize,mean(b)))

With sqldf. This package allows one to write Structured Query Language commands to perfom queries on a data frame.

library(sqldf)
system.time(t5<-sqldf('SELECT a, avg(b) FROM mydf GROUP BY a'))

With a basic FOR loop

ti1<-proc.time()
t6<-data.frame(letter=unique(mydf$a),mean=rep(0,26))
for (i in t6$letter ){
  t6[t6$letter==i,2]=mean(mydf[mydf$a==i,2])
}
eltime<-proc.time()-ti1
eltime

With a parallelized FOR loop

library(foreach)
library(doMC)
registerDoMC(4) #Four-core processor
ti1<-proc.time()
t7<-data.frame(letter=unique(mydf$a),mean=rep(0,26))
t7[,2] <- foreach(i=t7$letter, .combine='c') %dopar% {
 mean(mydf[mydf$a==i,2])
}
eltime<-proc.time()-ti1
eltime

RgoogleMaps!

library(RgoogleMaps)
myhome=getGeoCode('Olympic stadium, Montreal');
mymap<-GetMap(center=myhome, zoom=14)
PlotOnStaticMap(mymap,lat=myhome['lat'],lon=myhome['lon'],cex=5,pch=10,lwd=3,col=c('red'));

Taxize

library(taxize)
spp<-tax_name(query=c("american beaver"),get="species")
fam<-tax_name(query=c("american beaver"),get="family")
correctname <- tnrs(c("fraxinus americanus"))
cla<-classification("acer rubrum", db = 'itis')

Spocc

library(spocc)
occ_data <- occ(query = 'Acer nigrum', from = 'gbif')
mapggplot(occ_data)

Combine spocc and RgoogleMaps

occ_data <- occ(query = 'Puma concolor', from = 'gbif')
occ_data_df=occ2df(occ_data)
occ_data_df<-subset(occ_data_df,!is.na(latitude) & latitude!=0)
mymap<-GetMap(center=c(mean(occ_data_df$latitude),mean(occ_data_df$longitude)), zoom=2)
PlotOnStaticMap(mymap,lat=occ_data_df$latitude,lon=occ_data_df$longitude,cex=1,pch=16,lwd=3,col=c('red'));

geonames

library(geonames)
options(geonamesUsername="glaroc")
res<-GNsearch(q="Mont Saint-Hilaire")
res[,c('toponymName','fclName')]
dc<-GNcities(45.4, -73.55, 45.7, -73.6, lang = "en", maxRows = 10)
dc[,c('toponymName')]