Differences
This shows you the differences between two versions of the page.
Both sides previous revision Previous revision | |||
r_programming_gl [2014/11/21 16:31] glaroc |
r_programming_gl [2014/11/21 16:31] glaroc old revision restored (2014/11/21 15:54) |
||
---|---|---|---|
Line 23: | Line 23: | ||
<file rsplus> | <file rsplus> | ||
mydt=data.table(mydf) | mydt=data.table(mydf) | ||
- | </file>%0 | + | </file> |
+ | |||
+ | Each data table has to be assigned a key, which is one (or more) of the columns from the table. This key defines the basis for the organization and the sorting of the table. | ||
+ | <file rsplus> | ||
+ | setkey(mydt,a) | ||
+ | </file> | ||
+ | |||
+ | Once the key is set, we can return all rows with column a (the key) equal to F | ||
+ | <file rsplus> | ||
+ | mydt['F'] | ||
+ | </file> | ||
+ | |||
+ | Gives the mean value of column b for each letter in column a. | ||
+ | <file rsplus> | ||
+ | mydt[,mean(b),by=a] | ||
+ | </file> | ||
+ | |||
+ | Let's compare the performance of Data table with other methods to achieve the same thing. | ||
+ | <file rsplus> | ||
+ | system.time(t1<-mydt[,mean(b),by=a]) | ||
+ | </file> | ||
+ | |||
+ | **With tapply()** | ||
+ | <file rsplus> | ||
+ | system.time(t2<-tapply(mydf$b,mydf$a,mean)) | ||
+ | </file> | ||
+ | |||
+ | **With reshape2** | ||
+ | <file rsplus> | ||
+ | library(reshape2) | ||
+ | meltdf=melt(mydf) | ||
+ | system.time(t3<-dcast(meltdf,a~variable,mean)) | ||
+ | </file> | ||
+ | |||
+ | **With plyr** | ||
+ | <file rsplus> | ||
+ | library(plyr) | ||
+ | system.time(t4<-ddply(mydf,.(a),summarize,mean(b))) | ||
+ | </file> | ||
+ | |||
+ | **With sqldf**. This package allows one to write Structured Query Language commands to perfom queries on a data frame. | ||
+ | <file rsplus> | ||
+ | library(sqldf) | ||
+ | system.time(t5<-sqldf('SELECT a, avg(b) FROM mydf GROUP BY a')) | ||
+ | </file> | ||
+ | |||
+ | **With a basic FOR loop** | ||
+ | <file rsplus> | ||
+ | ti1<-proc.time() | ||
+ | t6<-data.frame(letter=unique(mydf$a),mean=rep(0,26)) | ||
+ | for (i in t6$letter ){ | ||
+ | t6[t6$letter==i,2]=mean(mydf[mydf$a==i,2]) | ||
+ | } | ||
+ | eltime<-proc.time()-ti1 | ||
+ | eltime | ||
+ | </file> | ||
+ | |||
+ | **With a parallelized FOR loop** | ||
+ | <file rsplus> | ||
+ | library(foreach) | ||
+ | library(doMC) | ||
+ | registerDoMC(4) #Four-core processor | ||
+ | ti1<-proc.time() | ||
+ | t7<-data.frame(letter=unique(mydf$a),mean=rep(0,26)) | ||
+ | t7[,2] <- foreach(i=t7$letter, .combine='c') %dopar% { | ||
+ | mean(mydf[mydf$a==i,2]) | ||
+ | } | ||
+ | eltime<-proc.time()-ti1 | ||
+ | eltime | ||
+ | </file> | ||
+ | |||
+ | ====== RgoogleMaps! ====== | ||
+ | |||
+ | <file rsplus> | ||
+ | library(RgoogleMaps) | ||
+ | myhome=getGeoCode('Olympic stadium, Montreal'); | ||
+ | mymap<-GetMap(center=myhome, zoom=14) | ||
+ | PlotOnStaticMap(mymap,lat=myhome['lat'],lon=myhome['lon'],cex=5,pch=10,lwd=3,col=c('red')); | ||
+ | </file> | ||
+ | |||
+ | ====== Taxize ====== | ||
+ | <file rsplus> | ||
+ | library(taxize) | ||
+ | spp<-tax_name(query=c("american beaver"),get="species") | ||
+ | fam<-tax_name(query=c("american beaver"),get="family") | ||
+ | correctname <- tnrs(c("fraxinus americanus")) | ||
+ | cla<-classification("acer rubrum", db = 'itis') | ||
+ | </file> | ||
+ | |||
+ | ====== Spocc ====== | ||
+ | <file rsplus> | ||
+ | library(spocc) | ||
+ | occ_data <- occ(query = 'Acer nigrum', from = 'gbif') | ||
+ | mapggplot(occ_data) | ||
+ | </file> | ||
+ | |||
+ | Combine spocc and RgoogleMaps | ||
+ | <file rsplus> | ||
+ | occ_data <- occ(query = 'Puma concolor', from = 'gbif') | ||
+ | occ_data_df=occ2df(occ_data) | ||
+ | occ_data_df<-subset(occ_data_df,!is.na(latitude) & latitude!=0) | ||
+ | mymap<-GetMap(center=c(mean(occ_data_df$latitude),mean(occ_data_df$longitude)), zoom=2) | ||
+ | PlotOnStaticMap(mymap,lat=occ_data_df$latitude,lon=occ_data_df$longitude,cex=1,pch=16,lwd=3,col=c('red')); | ||
+ | </file> | ||
+ | |||
+ | ====== geonames ====== | ||
+ | |||
+ | <file rsplus> | ||
+ | library(geonames) | ||
+ | options(geonamesUsername="glaroc") | ||
+ | res<-GNsearch(q="Mont Saint-Hilaire") | ||
+ | res[,c('toponymName','fclName')] | ||
+ | dc<-GNcities(45.4, -73.55, 45.7, -73.6, lang = "en", maxRows = 10) | ||
+ | dc[,c('toponymName')] | ||
+ | </file> | ||
+ | |||
+ |