Differences

This shows you the differences between two versions of the page.

--- r_programming_gl [2014/11/21 16:31]
glaroc
+++ r_programming_gl [2014/11/21 16:31]
glaroc old revision restored (2014/11/21 15:54)
@@ Line 23: / Line 23: @@
 <file rsplus>
 mydt=data.table(mydf)
-</file>%0
+</file>
+Each data table has to be assigned a key, which is one (or more) of the columns from the table. This key defines the basis for the organization and the sorting of the table.
+<file rsplus>
+setkey(mydt,a)
+</file>
+Once the key is set, we can return all rows with column a (the key) equal to F
+<file rsplus>
+mydt['F']
+</file>
+Gives the mean value of column b for each letter in column a.
+<file rsplus>
+mydt[,mean(b),by=a]
+</file>
+Let's compare the performance of Data table with other methods to achieve the same thing.
+<file rsplus>
+system.time(t1<-mydt[,mean(b),by=a])
+</file>
+**With tapply()**
+<file rsplus>
+system.time(t2<-tapply(mydf$b,mydf$a,mean))
+</file>
+**With reshape2**
+<file rsplus>
+library(reshape2)
+meltdf=melt(mydf)
+system.time(t3<-dcast(meltdf,a~variable,mean))
+</file>
+**With plyr**
+<file rsplus>
+library(plyr)
+system.time(t4<-ddply(mydf,.(a),summarize,mean(b)))
+</file>
+**With sqldf**. This package allows one to write Structured Query Language commands to perfom queries on a data frame.
+<file rsplus>
+library(sqldf)
+system.time(t5<-sqldf('SELECT a, avg(b) FROM mydf GROUP BY a'))
+</file>
+**With a basic FOR loop**
+<file rsplus>
+ti1<-proc.time()
+t6<-data.frame(letter=unique(mydf$a),mean=rep(0,26))
+for (i in t6$letter ){
+  t6[t6$letter==i,2]=mean(mydf[mydf$a==i,2])
+}
+eltime<-proc.time()-ti1
+eltime
+</file>
+**With a parallelized FOR loop**
+<file rsplus>
+library(foreach)
+library(doMC)
+registerDoMC(4) #Four-core processor
+ti1<-proc.time()
+t7<-data.frame(letter=unique(mydf$a),mean=rep(0,26))
+t7[,2] <- foreach(i=t7$letter, .combine='c') %dopar% {
+ mean(mydf[mydf$a==i,2])
+}
+eltime<-proc.time()-ti1
+eltime
+</file>
+====== RgoogleMaps! ======
+<file rsplus>
+library(RgoogleMaps)
+myhome=getGeoCode('Olympic stadium, Montreal');
+mymap<-GetMap(center=myhome, zoom=14)
+PlotOnStaticMap(mymap,lat=myhome['lat'],lon=myhome['lon'],cex=5,pch=10,lwd=3,col=c('red'));
+</file>
+====== Taxize ======
+<file rsplus>
+library(taxize)
+spp<-tax_name(query=c("american beaver"),get="species")
+fam<-tax_name(query=c("american beaver"),get="family")
+correctname <- tnrs(c("fraxinus americanus"))
+cla<-classification("acer rubrum", db = 'itis')
+</file>
+====== Spocc ======
+<file rsplus>
+library(spocc)
+occ_data <- occ(query = 'Acer nigrum', from = 'gbif')
+mapggplot(occ_data)
+</file>
+Combine spocc and RgoogleMaps
+<file rsplus>
+occ_data <- occ(query = 'Puma concolor', from = 'gbif')
+occ_data_df=occ2df(occ_data)
+occ_data_df<-subset(occ_data_df,!is.na(latitude) & latitude!=0)
+mymap<-GetMap(center=c(mean(occ_data_df$latitude),mean(occ_data_df$longitude)), zoom=2)
+PlotOnStaticMap(mymap,lat=occ_data_df$latitude,lon=occ_data_df$longitude,cex=1,pch=16,lwd=3,col=c('red'));
+</file>
+====== geonames ======
+<file rsplus>
+library(geonames)
+options(geonamesUsername="glaroc")
+res<-GNsearch(q="Mont Saint-Hilaire")
+res[,c('toponymName','fclName')]
+dc<-GNcities(45.4, -73.55, 45.7, -73.6, lang = "en", maxRows = 10)
+dc[,c('toponymName')]
+</file>