Differences
This shows you the differences between two versions of the page.
Both sides previous revision Previous revision Next revision | Previous revision | ||
onlinedb [2022/03/09 11:19] qcbs |
onlinedb [2025/04/08 14:34] (current) qcbs [Exercise 3 - SELECT statement] |
||
---|---|---|---|
Line 325: | Line 325: | ||
CREATE TABLE species (species_id INT NOT NULL, full_name varchar(255), short_name VARCHAR(100)); | CREATE TABLE species (species_id INT NOT NULL, full_name varchar(255), short_name VARCHAR(100)); | ||
COPY species FROM 'C:/bdlibre/species_acro.csv' WITH csv HEADER DELIMITER AS ','; | COPY species FROM 'C:/bdlibre/species_acro.csv' WITH csv HEADER DELIMITER AS ','; | ||
+ | </file> | ||
+ | |||
+ | ===== Exercise 3 - Importing data into SQLite in R ===== | ||
+ | |||
+ | First install the RSQLite package | ||
+ | <file rsplus> | ||
+ | install.packages('RSQLite') | ||
+ | </file> | ||
+ | |||
+ | To load it in R, you will need to load the DBI package | ||
+ | <file rsplus> | ||
+ | library(DBI) | ||
+ | </file> | ||
+ | |||
+ | Then, create the file containing the database | ||
+ | <file rsplus> | ||
+ | mydb <- dbConnect(RSQLite::SQLite(), "QCBS_Workshop.sqlite") | ||
+ | </file> | ||
+ | |||
+ | Set this to where you have downloaded the files | ||
+ | <file> | ||
+ | setwd('C:\\User\MyName\Workshop\') | ||
+ | </file> | ||
+ | |||
+ | You can now load the CSV files in R as data frames | ||
+ | <file> | ||
+ | lakes <- read.csv('lakes.csv') | ||
+ | lakes_species <- read.csv('lakes_species.csv') | ||
+ | species_acro <- read.csv('species_acro.csv') | ||
+ | </file> | ||
+ | |||
+ | And then load those dataframes into the database | ||
+ | <file rsplus> | ||
+ | dbWriteTable(mydb, "lakes", lakes) | ||
+ | dbWriteTable(mydb, "lakes_species", lakes_species) | ||
+ | dbWriteTable(mydb, "species_acro", species_acro) | ||
+ | dbListTables(mydb) | ||
+ | </file> | ||
+ | |||
+ | You can then run most queries below with dbGetQuery. For example: | ||
+ | <file rsplus> | ||
+ | lake_prairies<-dbGetQuery(mydb, "SELECT * FROM lakes WHERE ecozone='Prairies'") | ||
</file> | </file> | ||
===== REFERENCES ===== | ===== REFERENCES ===== | ||
Line 406: | Line 448: | ||
**Question 1** :?: - How many lakes in British Columbia receive more than 3000 mm of precipitation (totp_ann column)? | **Question 1** :?: - How many lakes in British Columbia receive more than 3000 mm of precipitation (totp_ann column)? | ||
- | \\ ++answer| 12 ++ \\ | + | \\ ++answer| SELECT count(*) FROM lakes WHERE province='BRITISH COLUMBIA' AND totp_ann>3000; 12 ++ \\ |
- | **Question 2** :?: - What is the average Elevation (mean_ele column) of all lakes in the Montane Cordillera Ecozone (use the avg() operator)? | + | **Question 2** :?: - What is the average Elevation (mean_elev column) of all lakes in the Montane Cordillera Ecozones (use the avg() operator)? |
- | \\ ++answer| 1364.36 ++ \\ | + | \\ ++answer| SELECT avg(mean_elev) FROM lakes WHERE ecoprov LIKE '%Montane Cordillera'; 1364.36 ++ \\ |
\\ | \\ | ||
- | Note: the % operator is a wildcard used to replace the beginning or the end of a string. | + | Note: the % operator is a wildcard used to replace the beginning or the end of a string in a query using LIKE. |
\\ | \\ | ||
Select all lake names that contain the word 'Small' | Select all lake names that contain the word 'Small' | ||
Line 429: | Line 471: | ||
**Question 3** :?: - What is the maximum latitude of all lakes in Ecozones with names that start with 'Taiga' | **Question 3** :?: - What is the maximum latitude of all lakes in Ecozones with names that start with 'Taiga' | ||
- | \\ ++answer| 68.8 ++ \\ | + | \\ ++answer| SELECT max(latitude) FROM lakes WHERE ecozone like 'Taiga%'; 68.8 ++ \\ |
**Question 4** :?: - How many species of Daphnia are there in the species table? | **Question 4** :?: - How many species of Daphnia are there in the species table? | ||
- | \\ ++answer| 17 (21 have the word 'Daphnia' somewhere in their name) ++ \\ | + | \\ ++answer| SELECT count(*) FROM species_acro WHERE full_name like 'Daphnia%'; 17 (21 have the word 'Daphnia' somewhere in their name) ++ \\ |
===== Exercise 4 - GROUPING ===== | ===== Exercise 4 - GROUPING ===== | ||
Line 451: | Line 493: | ||
**Question 5** :?: - Which province has the lake with the highest maximum temperature (column tmax_ann)? | **Question 5** :?: - Which province has the lake with the highest maximum temperature (column tmax_ann)? | ||
- | \\ ++answer| BRITISH COLUMBIA 14.60 ++ \\ | + | \\ ++answer| SELECT province, max(tmax_ann) FROM lakes GROUP BY province ORDER BY max DESC LIMIT 1; |
+ | BRITISH COLUMBIA 14.60 ++ \\ | ||
Update statement: | Update statement: | ||
Line 671: | Line 714: | ||
lakes <- tbl(src, "lakes") # Define lakes table | lakes <- tbl(src, "lakes") # Define lakes table | ||
lakes_qc<-filter(lakes, province %=% 'QUEBEC') # Select lakes in Quebec | lakes_qc<-filter(lakes, province %=% 'QUEBEC') # Select lakes in Quebec | ||
- | prov_tmean<-summarise(group_by(lakes, province), mean(tmean_an)) # Mean annual temperature per province | + | prov_tmean<- lakes |> group_by(province) |> summarise(mean(tmean_an)) # Mean annual temperature per province |
- | prov_tmean=collect(prov_tmean) # Transfer result to standard R data frame | + | prov_tmean <- collect(prov_tmean) # Transfer result to standard R data frame |
- | lakes_qc2<-tbl(src, sql("SELECT * FROM lakes WHERE province='QUEBEC'")) #Perform any SQL statement | + | lakes_qc2 <- tbl(src, sql("SELECT * FROM lakes WHERE province='QUEBEC'")) #Perform any SQL statement |
</file> | </file> | ||