This is the last multivariate code post from the quantitative methods class. Next week will be the final set of code (on non-normal data) from that class, which I co-taught with Dr. Nicola Koper.
Get the data import code and data here.
######################################
####Principle Componenents Analysis###
######################################
#R has two methods for performing PCA.
#Generally they will give similar results and the differences involve the computation and a few summary functions.
pca.morphology<-prcomp(titmouse.na.omit[,1:7],
#specifying columns by number can be dangerous if you change your
data format.
scale=TRUE, #correlation
matrix used instead of covariance matrix, which is only appropriate if
everything in same units.
retx=TRUE) #required to get PC scores for each individual.
summary(pca.morphology)
(pca.eigenvalues<-pca.morphology$sdev^2) #Get eigenvalues.
screeplot(pca.morphology) #Plot eigenvalues.
biplot(pca.morphology)
pca.morphology$rotation #eigenvectors. Again the signs are arbitrary so don't worry
#if they differ but absolute values are the same between different programs or versions of R.
(loadings.pca<-cor(pca.morphology$x, titmouse.na.omit[,1:7], method="pearson"))
#Pearson's correlation of components with original variables. Easier to interpret.
#Eigenvectors are how you get PCs, so also a sort of weight, just harder to think about.
#If not obvious, probably not interpretable. Interpret loadings above |0.33|.
#PC1 has shorter wing and tail, smaller mass, shorter and shallower bills (size) and longer crest.
#PC1 explains 32.4% of variance.
#PC2 is longer wing and tail, shorter and shallower bills (shape) and explains 20.5% of variance.
#plotting PCA
defaultpalette<-palette() #save the default palette before we mess with it.
gray<-gray.colors(7, start=1, end=0)
palette(gray)
#creates standard hybrid index gray palette that has white (1) through black (7)
#for colors. Can be used to label figures with indexsum+1.
palette() #shows palette hex codes
pscores<-data.frame(pca.morphology$x) #puts PCA scores in a data frame
pscores$indexsum<-titmouse.na.omit$indexsum # adds indexsum values to PC scores for individual birds
pscores$indexcolor<-as.factor(pscores$indexsum+1)
pscores$zone<-as.factor(titmouse.na.omit$zone) # adds zone age
pscores$zonenum<-as.factor(titmouse.na.omit$zone) # makes zone age number column
pscores$zonenum<- sub("old",21,pscores$zonenum)
pscores$zonenum<- sub("young",23,pscores$zonenum)
pscores$zonenum<-as.numeric(pscores$zonenum)
str(pscores)
#plotting principal components
plot(x=pscores$PC1,y=pscores$PC2,
asp=1,
mar=c(2,2,2,2)+0.1,
bg=pscores$indexcolor,
col=7,
pch=pscores$zonenum,
cex=2,
lwd=2,
xlab="PC1 Increasing values: shorter wing and tail, shorter and shallower bill, less weight",
ylab="PC2 Increasing values: longer wing and tail, shorter and shallower bill",
cex.axis=1,
cex.lab=0.6)
legend(title="Hybrid index values",
title.adj=0,
x=-5.5,
y=3.76,
ncol=1,
xjust=0,
bty="n",
x.intersp=0.75,
y.intersp=1,
text.width=0.5,
legend = c(0,1,2,3,4,5,6),
pt.lwd=2,
pt.bg=gray,
pch=c(21,21,21,21,21,21),
cex=0.8,
pt.cex=1.25)
# specifies colors, labels for legend, and symbols, and location for legend and size (cex)
legend(title="Zone age",
title.adj=0,
x=-5.5,
y=0,
ncol=1,
bty="n",
x.intersp=1,
y.intersp=1,
text.width=0.5,
legend=c("Old","Young"),
pt.bg="gray",
pch=c(21,23),
cex=0.8,
pt.cex=1.25,
pt.lwd=2)
### add ellipses for old/young
#make some logical objects to keep your selections cleaner in the code below.
tuftedmo<-pscores$indexsum<1 & pscores$zone=='old'
bcmo<-pscores$indexsum>5 & pscores$zone=='old'
tuftedmy<-pscores$indexsum<1 & pscores$zone=='young'
bcmy<-pscores$indexsum>5 & pscores$zone=='young'
library(car) #required for dataEllipse although we've called it earlier so it's unnecessary here.
dataEllipse(pscores[tuftedmo, ]$PC1,
pscores[tuftedmo, ]$PC2,
levels=0.68,
lty=1,
plot.points=FALSE,
add=TRUE,col="gray",
center.pch=FALSE)
dataEllipse(pscores[bcmo, ]$PC1,
pscores[bcmo, ]$PC2,
levels=0.68,lty=1,
plot.points=FALSE,
add=TRUE,
col="black",
center.pch=FALSE)
dataEllipse(pscores[tuftedmy, ]$PC1,
pscores[tuftedmy, ]$PC2,
levels=0.68,
lty=3,
plot.points=FALSE,
add=TRUE,
col="gray",
center.pch=FALSE)
dataEllipse(pscores[bcmy, ]$PC1,
pscores[bcmy, ]$PC2,
levels=0.68,lty=3,
plot.points=FALSE,
add=TRUE,
col="black",
center.pch=FALSE)
#These give black lines for Black-crested Titmouse and gray lines for Tufted Titmouse.
#Birds from the young zone have diamond symbols and dotted lines,
#while birds from the older zone have solid lines and round symbols.
#Return palette() to default after using the gray palette for this plot.
palette(defaultpalette)
palette()
No comments:
Post a Comment
Comments and suggestions welcome.