Differences

This shows you the differences between two versions of the page.

--- r_programming_gl [2014/11/21 15:54]
glaroc
+++ r_programming_gl [2014/11/21 16:31]
glaroc
@@ Line 23: / Line 23: @@
 <file rsplus>
 mydt=data.table(mydf)
-</file>
+</file>%0
-Each data table has to be assigned a key, which is one (or more) of the columns from the table. This key defines the basis for the organization and the sorting of the table.
-<file rsplus>
-setkey(mydt,a)
-</file>
-Once the key is set, we can return all rows with column a (the key) equal to F
-<file rsplus>
-mydt['F']
-</file>
-Gives the mean value of column b for each letter in column a.
-<file rsplus>
-mydt[,mean(b),by=a]
-</file>
-Let's compare the performance of Data table with other methods to achieve the same thing.
-<file rsplus>
-system.time(t1<-mydt[,mean(b),by=a])
-</file>
-**With tapply()**
-<file rsplus>
-system.time(t2<-tapply(mydf$b,mydf$a,mean))
-</file>
-**With reshape2**
-<file rsplus>
-library(reshape2)
-meltdf=melt(mydf)
-system.time(t3<-dcast(meltdf,a~variable,mean))
-</file>
-**With plyr**
-<file rsplus>
-library(plyr)
-system.time(t4<-ddply(mydf,.(a),summarize,mean(b)))
-</file>
-**With sqldf**. This package allows one to write Structured Query Language commands to perfom queries on a data frame.
-<file rsplus>
-library(sqldf)
-system.time(t5<-sqldf('SELECT a, avg(b) FROM mydf GROUP BY a'))
-</file>
-**With a basic FOR loop**
-<file rsplus>
-ti1<-proc.time()
-t6<-data.frame(letter=unique(mydf$a),mean=rep(0,26))
-for (i in t6$letter ){
-  t6[t6$letter==i,2]=mean(mydf[mydf$a==i,2])
-}
-eltime<-proc.time()-ti1
-eltime
-</file>
-**With a parallelized FOR loop**
-<file rsplus>
-library(foreach)
-library(doMC)
-registerDoMC(4) #Four-core processor
-ti1<-proc.time()
-t7<-data.frame(letter=unique(mydf$a),mean=rep(0,26))
-t7[,2] <- foreach(i=t7$letter, .combine='c') %dopar% {
- mean(mydf[mydf$a==i,2])
-}
-eltime<-proc.time()-ti1
-eltime
-</file>
-====== RgoogleMaps! ======
-<file rsplus>
-library(RgoogleMaps)
-myhome=getGeoCode('Olympic stadium, Montreal');
-mymap<-GetMap(center=myhome, zoom=14)
-PlotOnStaticMap(mymap,lat=myhome['lat'],lon=myhome['lon'],cex=5,pch=10,lwd=3,col=c('red'));
-</file>
-====== Taxize ======
-<file rsplus>
-library(taxize)
-spp<-tax_name(query=c("american beaver"),get="species")
-fam<-tax_name(query=c("american beaver"),get="family")
-correctname <- tnrs(c("fraxinus americanus"))
-cla<-classification("acer rubrum", db = 'itis')
-</file>
-====== Spocc ======
-<file rsplus>
-library(spocc)
-occ_data <- occ(query = 'Acer nigrum', from = 'gbif')
-mapggplot(occ_data)
-</file>
-Combine spocc and RgoogleMaps
-<file rsplus>
-occ_data <- occ(query = 'Puma concolor', from = 'gbif')
-occ_data_df=occ2df(occ_data)
-occ_data_df<-subset(occ_data_df,!is.na(latitude) & latitude!=0)
-mymap<-GetMap(center=c(mean(occ_data_df$latitude),mean(occ_data_df$longitude)), zoom=2)
-PlotOnStaticMap(mymap,lat=occ_data_df$latitude,lon=occ_data_df$longitude,cex=1,pch=16,lwd=3,col=c('red'));
-</file>
-====== geonames ======
-<file rsplus>
-library(geonames)
-options(geonamesUsername="glaroc")
-res<-GNsearch(q="Mont Saint-Hilaire")
-res[,c('toponymName','fclName')]
-dc<-GNcities(45.4, -73.55, 45.7, -73.6, lang = "en", maxRows = 10)
-dc[,c('toponymName')]
-</file>