# Lab settings - please ingnore
# Make plots a resonable size
#options(repr.plot.width=7, repr.plot.height=3.5)
#options(repr.plot.width=15, repr.plot.height=10)
options(repr.plot.width=12, repr.plot.height=8)


surveys <- read.csv("surveys_clean.csv")


head(surveys)


names(surveys)


class(names(surveys))


str(surveys)

'data.frame':	30463 obs. of  13 variables:
 $ record_id      : int  845 1164 1261 1756 1818 1882 2133 2184 2406 3000 ...
 $ month          : int  5 8 9 4 5 7 10 11 1 5 ...
 $ day            : int  6 5 4 29 30 4 25 17 16 18 ...
 $ year           : int  1978 1978 1978 1979 1979 1979 1979 1979 1980 1980 ...
 $ plot_id        : int  2 2 2 2 2 2 2 2 2 2 ...
 $ species_id     : chr  "NL" "NL" "NL" "NL" ...
 $ sex            : chr  "M" "M" "M" "M" ...
 $ hindfoot_length: int  32 34 32 33 32 32 33 30 33 31 ...
 $ weight         : int  204 199 197 166 184 206 274 186 184 87 ...
 $ genus          : chr  "Neotoma" "Neotoma" "Neotoma" "Neotoma" ...
 $ species        : chr  "albigula" "albigula" "albigula" "albigula" ...
 $ taxa           : chr  "Rodent" "Rodent" "Rodent" "Rodent" ...
 $ plot_type      : chr  "Control" "Control" "Control" "Control" ...


dim(surveys)


nrow(surveys)


summary(surveys)

   record_id         month             day             year     
 Min.   :   63   Min.   : 1.000   Min.   : 1.00   Min.   :1977  
 1st Qu.: 9834   1st Qu.: 4.000   1st Qu.:10.00   1st Qu.:1985  
 Median :18611   Median : 7.000   Median :16.00   Median :1991  
 Mean   :18458   Mean   : 6.549   Mean   :16.18   Mean   :1991  
 3rd Qu.:27157   3rd Qu.:10.000   3rd Qu.:23.00   3rd Qu.:1997  
 Max.   :35548   Max.   :12.000   Max.   :31.00   Max.   :2002  
    plot_id       species_id            sex            hindfoot_length
 Min.   : 1.00   Length:30463       Length:30463       Min.   : 2.00  
 1st Qu.: 5.00   Class :character   Class :character   1st Qu.:21.00  
 Median :11.00   Mode  :character   Mode  :character   Median :32.00  
 Mean   :11.22                                         Mean   :29.27  
 3rd Qu.:17.00                                         3rd Qu.:36.00  
 Max.   :24.00                                         Max.   :64.00  
     weight          genus             species              taxa          
 Min.   :  4.00   Length:30463       Length:30463       Length:30463      
 1st Qu.: 20.00   Class :character   Class :character   Class :character  
 Median : 36.00   Mode  :character   Mode  :character   Mode  :character  
 Mean   : 41.86                                                           
 3rd Qu.: 47.00                                                           
 Max.   :280.00                                                           
  plot_type        
 Length:30463      
 Class :character  
 Mode  :character


# Example histogram
hist(surveys$weight)


# Histogram with red bars and better labels
hist(surveys$weight, col = "red", breaks = 25, xlab="Weight (gr)", main = "Survey data")


# Another way to write the code above
hist(surveys$weight,
     col = "red",
     breaks = 25,
     xlab="Weight (gr)",
     main = "Survey data")


# Histogram with 50 bars
hist(surveys$weight, breaks = 50)


# Histrogram of genus Dipodomys
hist(surveys$weight[surveys$genus == "Dipodomys"],
     breaks = 20,
     xlim = c(0,250),
     main = "Dipodomys")


# Histrogram of genus Chaetodipus
hist(surveys$weight[surveys$genus == "Chaetodipus"],
     breaks = 20,
     xlim = c(0,250),
     main = "Chaetodipus")


# Box plot example
boxplot(surveys$weight ~ surveys$species_id)


# Subset data
subset_surveys <- subset(surveys, species_id == "DO" |
                                  species_id == "DM" |
                                  species_id == "PP")

# Boxplot of subset
boxplot(subset_surveys$weight ~ subset_surveys$species_id)


?boxplot


# Example scatterplot
plot(surveys$weight,surveys$hindfoot_length)


# Scatterplot
plot(surveys$weight,surveys$hindfoot_length,
    xlab="Weight (gr)", ylab="Foot length (mm)")

# Add green points
points(surveys$weight[surveys$species_id == 'PP'],
       surveys$hindfoot_length[surveys$species_id == 'PP'],
       col="green", pch="o",lty=1)

# Add red points
points(surveys$weight[surveys$species_id == 'DO'],
       surveys$hindfoot_length[surveys$species_id == 'DO'],
       col="red", pch="o",lty=2)

# Add blue points
points(surveys$weight[surveys$species_id == 'DM'],
       surveys$hindfoot_length[surveys$species_id == 'DM'],
       col="blue", pch="o",lty=3)

# Add legend
legend(5,250,legend=c('PP','DO','RO'),col=c("green","red","blue"),pch='o',lty=c(1,2,3))


surveys <- read.csv("surveys_clean.csv")


# Load data
data(volcano)

# Make countour plot
filled.contour(volcano, color = terrain.colors, asp = 1)


library(ggplot2)


# Example ggplot
ggplot(data = surveys, aes(x = weight, y = hindfoot_length)) + geom_point()


ggplot(data = surveys)


ggplot(data = surveys, mapping = aes(x = weight, y = hindfoot_length))


ggplot(data = surveys, aes(x = weight, y = hindfoot_length)) + geom_point()


# Assign plot to a variable
surveys_plot <- ggplot(data = surveys, 
                       mapping = aes(x = weight, y = hindfoot_length))

# Draw the plot
surveys_plot + 
    geom_point()


class(surveys_plot)


# Try 1
ggplot(data = surveys, aes(x = weight, y = hindfoot_length)) +
    geom_point()


# Try 2
ggplot(data = surveys, aes(x = weight, y = hindfoot_length)) +
    geom_point(alpha = 0.1)


# Try 3
ggplot(data = surveys, mapping = aes(x = weight, y = hindfoot_length)) +
    geom_point(alpha = 0.1, color = "blue")


# Try 4
ggplot(data = surveys, mapping = aes(x = weight, y = hindfoot_length)) +
    geom_point(alpha = 0.2, aes(color = species_id))


# Plot using RColorBrewer's Dark2 colormap
ggplot(data = surveys, mapping = aes(x = weight, y = hindfoot_length)) +
    geom_point(alpha = 0.2, aes(color = species_id)) +
    scale_color_brewer(palette="Dark2")

Warning message in RColorBrewer::brewer.pal(n, pal):
"n too large, allowed maximum for palette Dark2 is 8
Returning the palette you asked for with that many colors
"
Warning message:
"Removed 7891 rows containing missing values (geom_point)."


# Adding labels - Method 1
ggplot(data = surveys, mapping = aes(x = weight, y = hindfoot_length)) +
    geom_point(alpha = 0.2, aes(color = species_id)) + 
    ggtitle("Surveys Scatterplot - 1st method to do labeling") +
    xlab("Weight (gr)") +
    ylab("Hindfoot length (mm)")


# Adding labels - Method 2
ggplot(data = surveys, mapping = aes(x = weight, y = hindfoot_length)) +
    geom_point(alpha = 0.2, aes(color = species_id)) + 
    labs(title = "Surveys Scatterplot - 2nd method to do labeling",
         x = "Weight (gr)",
         y = "Hindfoot length (mm)")


install.packages("hexbin")


library(hexbin)


# Example geom_hex
surveys_plot +
 geom_hex(bins = 50)


# Subset data
SH_surveys <- subset(surveys, species_id == "SH")

# Make scaterplot with smooth line + confidence intervals
ggplot(data = SH_surveys, mapping = aes(x = weight, y = hindfoot_length)) +
    geom_point(alpha = 0.5, color = "red") + 
    geom_smooth()

`geom_smooth()` using method = 'loess' and formula 'y ~ x'


# Example geom_boxplot
ggplot(data = surveys, mapping = aes(x = species_id, y = weight)) +
    geom_boxplot()


# Boxplot of subset
ggplot(data = subset_surveys, mapping = aes(x = species_id, y = weight)) +
    geom_boxplot()


# Example geom_jitter
ggplot(data = surveys, mapping = aes(x = species_id, y = weight)) +
    geom_boxplot(alpha = 0) +
    geom_jitter(alpha = 0.1, color = "tomato")


# Jitter + boxplot
ggplot(data = surveys, mapping = aes(x = species_id, y = weight)) +
    geom_jitter(alpha = 0.1, color = "tomato") + 
    geom_boxplot(alpha = 0)


# Violin plot
ggplot(data = surveys, mapping = aes(x = species_id, y = weight)) +
    geom_violin()


# Red violins of subset data
ggplot(data = subset_surveys, mapping = aes(x = species_id, y = weight)) +
    geom_violin(trim=FALSE, fill='red', color="darkred")


# Red violins + boxplots
ggplot(data = subset_surveys, mapping = aes(x = species_id, y = weight)) +
    geom_violin(trim=FALSE, fill='red', color="darkred") +
    geom_boxplot(width=0.1)


install.packages("dplyr")


library(dplyr)

# Count entries per year for every genus
yearly_counts <- surveys %>%
  count(year, genus)


# Time-series - rough
ggplot(data = yearly_counts, aes(x = year, y = n)) +
     geom_line()


# Time-series - better
ggplot(data = yearly_counts, aes(x = year, y = n, group = genus)) +
    geom_line()


# Time-series with color
ggplot(data = yearly_counts, aes(x = year, y = n, color = genus)) +
    geom_line()


# Time-series - longdash
ggplot(data = yearly_counts, aes(x = year, y = n, color = genus)) +
    geom_line(linetype="longdash")


# Faceting
ggplot(data = yearly_counts, aes(x = year, y = n)) +
    geom_line() +
    facet_wrap(facets = vars(genus))


# Count entries every year for genus and sex
yearly_sex_counts <- surveys %>%
        count(year, genus, sex)


# Faceting - Genus and sex
ggplot(data = yearly_sex_counts, mapping = aes(x = year, y = n, color = sex)) +
  geom_line() +
  facet_wrap(facets =  vars(genus))


# Facet_grid
ggplot(data = yearly_sex_counts, 
       mapping = aes(x = year, y = n, color = sex)) +
  geom_line() +
  facet_grid(rows = vars(sex), cols =  vars(genus))


# One column, facet by rows
ggplot(data = yearly_sex_counts, 
       mapping = aes(x = year, y = n, color = sex)) +
  geom_line() +
  facet_grid(rows = vars(genus))


# Theme_bw
ggplot(data = yearly_sex_counts, 
        mapping = aes(x = year, y = n, color = sex)) +
     geom_line() +
     facet_wrap(vars(genus)) +
     theme_bw()


# Theme_dark
ggplot(data = yearly_sex_counts, 
    mapping = aes(x = year, y = n, color = sex)) +
    geom_line() +
    facet_wrap(vars(genus)) +
    theme_dark()


# Customized plot - theme_bw, axis labels and title
ggplot(data = yearly_sex_counts, aes(x = year, y = n, color = sex)) +
    geom_line() +
    facet_wrap(vars(genus)) +
    labs(title = "Observed genera through time",
         x = "Year of observation",
         y = "Number of individuals") +
    theme_bw()


# Same as before + increased font size
ggplot(data = yearly_sex_counts, mapping = aes(x = year, y = n, color = sex)) +
    geom_line() +
    facet_wrap(vars(genus)) +
    labs(title = "Observed genera through time",
        x = "Year of observation",
        y = "Number of individuals") +
    theme_bw() +
    theme(text=element_text(size = 16))


# Same as before + italics, color change and font rotation 
ggplot(data = yearly_sex_counts, mapping = aes(x = year, y = n, color = sex)) +
    geom_line() +
    facet_wrap(vars(genus)) +
    labs(title = "Observed genera through time",
        x = "Year of observation",
        y = "Number of individuals") +
    theme_bw() +
    theme(axis.text.x = element_text(colour = "grey20", size = 12, angle = 90, hjust = 0.5, vjust = 0.5),
                        axis.text.y = element_text(colour = "grey20", size = 12),
                        strip.text = element_text(face = "italic"),
                        text = element_text(size = 16))


# Make our theme
grey_theme <- theme(axis.text.x = element_text(colour="grey20", size = 12, 
                                               angle = 90, hjust = 0.5, 
                                               vjust = 0.5),
                    axis.text.y = element_text(colour = "grey20", size = 12),
                    text=element_text(size = 16))

# Apply our theme
ggplot(surveys, aes(x = species_id, y = hindfoot_length)) +
    geom_boxplot() +
    grey_theme


install.packages("patchwork")


library(patchwork)

# Boxplot
plot1 <- ggplot(data = surveys, aes(x = species_id, y = weight)) +
  geom_boxplot() +
  labs(x = "Species", y = expression(log[10](Weight))) +
  scale_y_log10()

# Time-series
plot2 <- ggplot(data = yearly_counts, aes(x = year, y = n, color = genus)) +
  geom_line() + 
  labs(x = "Year", y = "Abundance")

# Layout
plot1 / plot2 + plot_layout(heights = c(3, 2))


# Save plot to variable
my_plot <- ggplot(data = yearly_sex_counts, 
                  aes(x = year, y = n, color = sex)) +
    geom_line() +
    facet_wrap(vars(genus)) +
    labs(title = "Observed genera through time",
        x = "Year of observation",
        y = "Number of individuals") +
    theme_bw() +
    theme(axis.text.x = element_text(colour = "grey20", size = 12, angle = 90,
                                     hjust = 0.5, vjust = 0.5),
          axis.text.y = element_text(colour = "grey20", size = 12),
          text = element_text(size = 16))

# Display plot
my_plot


# Save plot
ggsave("name_of_file.png", my_plot, width = 10, height = 7, dpi=700)


cssFile <- '../css/custom.css'
IRdisplay::display_html(readChar(cssFile, file.info(cssFile)$size))

IRdisplay::display_html("<style>.Q::before {counter-increment: question_num;
    content: 'QUESTION ' counter(question_num) ': '; white-space: pre; }.T::before {counter-increment: task_num;
    content: 'Task ' counter(task_num) ': ';</style>")

Parameter	Value	Description
din,fin,pin	=c(width,height)	Dimensions (width and height) of the device, figure and plotting regions (in inches)
fig	=c(left,right,bottom,top)	Coordinates of the figure region within the device. Coordinates expressed as a fraction of the device region.
mai,mar	=c(bottom,left,top,right)	Size of each of the four figure margins in inches and lines of text (relative to current font size).
mfg	=c(row,column)	Position of the currently active figure within a grid of figures defined by either mfcol or mfrow.
mfcol,mfrow	=c(rows,columns)	Number of rows and columns in a multi-figure grid.
new	=TRUE or =FALSE	Indicates whether to treat the current figure region as a new frame (and thus begin a new plot over the top of the previous plot (TRUE) or to allow a new high level plotting function to clear the figure region first (FALSE).
oma,omd,omi	=c(bottom,left,top,right)	Size of each of the four outer margins in lines of text (relative to current font size), inches and as a fraction of the device region dimensions
plt	=c(left,right,bottom,top)	Coordinates of the plotting region expressed as a fraction of the device region.
pty	="s" or "m"	Type of plotting region within the figure region. Is the plotting region a square (="s") or is it maximized (="m") to fit within the shape of the figure region.
usr	=c(left,right,bottom,top)	Coordinates of the plotting region corresponding to the axes limits of the plot.

Setup of workspace¶

Basic data exploration¶

Reading the data into R¶

View data contents¶

Head and tail functions¶

RStudio's spreadsheet-style data viewer¶

Explore data size and structure¶

Basic statistics¶

Introduction to plotting¶

Base R plotting¶

Histograms¶

Boxplot¶

Scatterplots¶

Heatmaps and countour plots¶

Plotting with ggplot2¶

The basics¶

Aesthetics¶

Geoms¶

Building your plots iteratively (scatterplot)¶

Transparency¶

Color¶

Labeling¶

Hexagon 2-D histogram¶

Smooth trend line¶

Boxplot¶

Jitter¶

Violin plot¶

Plotting time-series data¶

Faceting¶

ggplot2 themes¶

Customization¶

Arranging plots¶

Figure dimensions¶

Exporting plots¶

	record_id	month	day	year	plot_id	species_id	sex	hindfoot_length	weight	genus	species	taxa	plot_type
	<int>	<int>	<int>	<int>	<int>	<chr>	<chr>	<int>	<int>	<chr>	<chr>	<chr>	<chr>
1	845	5	6	1978	2	NL	M	32	204	Neotoma	albigula	Rodent	Control
2	1164	8	5	1978	2	NL	M	34	199	Neotoma	albigula	Rodent	Control
3	1261	9	4	1978	2	NL	M	32	197	Neotoma	albigula	Rodent	Control
4	1756	4	29	1979	2	NL	M	33	166	Neotoma	albigula	Rodent	Control
5	1818	5	30	1979	2	NL	M	32	184	Neotoma	albigula	Rodent	Control
6	1882	7	4	1979	2	NL	M	32	206	Neotoma	albigula	Rodent	Control