Differences
This shows you the differences between two versions of the page.
Both sides previous revision Previous revision | Last revision Both sides next revision | ||
r_programming_gl [2014/11/21 15:54] glaroc |
r_programming_gl [2014/11/21 16:31] glaroc |
||
---|---|---|---|
Line 23: | Line 23: | ||
<file rsplus> | <file rsplus> | ||
mydt=data.table(mydf) | mydt=data.table(mydf) | ||
- | </file> | + | </file>%0 |
- | + | ||
- | Each data table has to be assigned a key, which is one (or more) of the columns from the table. This key defines the basis for the organization and the sorting of the table. | + | |
- | <file rsplus> | + | |
- | setkey(mydt,a) | + | |
- | </file> | + | |
- | + | ||
- | Once the key is set, we can return all rows with column a (the key) equal to F | + | |
- | <file rsplus> | + | |
- | mydt['F'] | + | |
- | </file> | + | |
- | + | ||
- | Gives the mean value of column b for each letter in column a. | + | |
- | <file rsplus> | + | |
- | mydt[,mean(b),by=a] | + | |
- | </file> | + | |
- | + | ||
- | Let's compare the performance of Data table with other methods to achieve the same thing. | + | |
- | <file rsplus> | + | |
- | system.time(t1<-mydt[,mean(b),by=a]) | + | |
- | </file> | + | |
- | + | ||
- | **With tapply()** | + | |
- | <file rsplus> | + | |
- | system.time(t2<-tapply(mydf$b,mydf$a,mean)) | + | |
- | </file> | + | |
- | + | ||
- | **With reshape2** | + | |
- | <file rsplus> | + | |
- | library(reshape2) | + | |
- | meltdf=melt(mydf) | + | |
- | system.time(t3<-dcast(meltdf,a~variable,mean)) | + | |
- | </file> | + | |
- | + | ||
- | **With plyr** | + | |
- | <file rsplus> | + | |
- | library(plyr) | + | |
- | system.time(t4<-ddply(mydf,.(a),summarize,mean(b))) | + | |
- | </file> | + | |
- | + | ||
- | **With sqldf**. This package allows one to write Structured Query Language commands to perfom queries on a data frame. | + | |
- | <file rsplus> | + | |
- | library(sqldf) | + | |
- | system.time(t5<-sqldf('SELECT a, avg(b) FROM mydf GROUP BY a')) | + | |
- | </file> | + | |
- | + | ||
- | **With a basic FOR loop** | + | |
- | <file rsplus> | + | |
- | ti1<-proc.time() | + | |
- | t6<-data.frame(letter=unique(mydf$a),mean=rep(0,26)) | + | |
- | for (i in t6$letter ){ | + | |
- | t6[t6$letter==i,2]=mean(mydf[mydf$a==i,2]) | + | |
- | } | + | |
- | eltime<-proc.time()-ti1 | + | |
- | eltime | + | |
- | </file> | + | |
- | + | ||
- | **With a parallelized FOR loop** | + | |
- | <file rsplus> | + | |
- | library(foreach) | + | |
- | library(doMC) | + | |
- | registerDoMC(4) #Four-core processor | + | |
- | ti1<-proc.time() | + | |
- | t7<-data.frame(letter=unique(mydf$a),mean=rep(0,26)) | + | |
- | t7[,2] <- foreach(i=t7$letter, .combine='c') %dopar% { | + | |
- | mean(mydf[mydf$a==i,2]) | + | |
- | } | + | |
- | eltime<-proc.time()-ti1 | + | |
- | eltime | + | |
- | </file> | + | |
- | + | ||
- | ====== RgoogleMaps! ====== | + | |
- | + | ||
- | <file rsplus> | + | |
- | library(RgoogleMaps) | + | |
- | myhome=getGeoCode('Olympic stadium, Montreal'); | + | |
- | mymap<-GetMap(center=myhome, zoom=14) | + | |
- | PlotOnStaticMap(mymap,lat=myhome['lat'],lon=myhome['lon'],cex=5,pch=10,lwd=3,col=c('red')); | + | |
- | </file> | + | |
- | + | ||
- | ====== Taxize ====== | + | |
- | <file rsplus> | + | |
- | library(taxize) | + | |
- | spp<-tax_name(query=c("american beaver"),get="species") | + | |
- | fam<-tax_name(query=c("american beaver"),get="family") | + | |
- | correctname <- tnrs(c("fraxinus americanus")) | + | |
- | cla<-classification("acer rubrum", db = 'itis') | + | |
- | </file> | + | |
- | + | ||
- | ====== Spocc ====== | + | |
- | <file rsplus> | + | |
- | library(spocc) | + | |
- | occ_data <- occ(query = 'Acer nigrum', from = 'gbif') | + | |
- | mapggplot(occ_data) | + | |
- | </file> | + | |
- | + | ||
- | Combine spocc and RgoogleMaps | + | |
- | <file rsplus> | + | |
- | occ_data <- occ(query = 'Puma concolor', from = 'gbif') | + | |
- | occ_data_df=occ2df(occ_data) | + | |
- | occ_data_df<-subset(occ_data_df,!is.na(latitude) & latitude!=0) | + | |
- | mymap<-GetMap(center=c(mean(occ_data_df$latitude),mean(occ_data_df$longitude)), zoom=2) | + | |
- | PlotOnStaticMap(mymap,lat=occ_data_df$latitude,lon=occ_data_df$longitude,cex=1,pch=16,lwd=3,col=c('red')); | + | |
- | </file> | + | |
- | + | ||
- | ====== geonames ====== | + | |
- | + | ||
- | <file rsplus> | + | |
- | library(geonames) | + | |
- | options(geonamesUsername="glaroc") | + | |
- | res<-GNsearch(q="Mont Saint-Hilaire") | + | |
- | res[,c('toponymName','fclName')] | + | |
- | dc<-GNcities(45.4, -73.55, 45.7, -73.6, lang = "en", maxRows = 10) | + | |
- | dc[,c('toponymName')] | + | |
- | </file> | + | |
- | + | ||
- | + |