They also tend to have a lot of dirt on their beaks. Kinda messy critters. Savannah Sparrow, near Brooks, Newell County, Alberta. 18 May 2015. |
Thursday, May 28, 2015
Savannah Sparrows are so easy to catch
Tuesday, May 26, 2015
Friday, May 22, 2015
Field work in full swing!
Tuesday, May 05, 2015
Beginning R: a very brief survey of methods for non-normal data
This was the last lab code for the quantitative methods class I co-taught with Dr. Nicola Koper this winter semester (i.e., the semester I used to know as "spring").
#Start by setting your workspace. Even though we don't import a file this time,
#it's good practice so any figures you generate will go to the correct folder.
#Otherwise they tend to disappear into that totally obvious folder that you then forget.
######################################################################################
setwd("C:\\Users\\YourUsername\\")
#Your directory here.
#Or use the menu...
#Session>Set Working Directory>To Source File Location [or you can choose the directory]
#Today we'll use a sample dataset that's already in R,
#plus generate some random data of our own.
#No need for text files or importing!
#Generate some proportions
#No proportion data, so we'll just add it.
#I generate more data than we need, then cut off <0 and >1.
inventedproportions.prelim<-rnorm(mean=0.5,
sd=0.5,
n=300)
inventedproportions.keep.logical<-inventedproportions.prelim<=1&inventedproportions.prelim>=0
inventedproportions<-inventedproportions.prelim[inventedproportions.keep.logical]
airquality$inventedproportions<-inventedproportions[1:153]
#Generate zero-inflated data (download this package if necessary; I had never used it before).
library(gamlss.dist)
airquality$somanyzeros<-rZIP(n=153,
mu=5,
sigma=0.3)
str(airquality)
airquality<-na.omit(airquality)
str(airquality)
###########################################
#Brief review of graphical data exploration
###########################################
par(mfrow=c(2,3))
#Histograms for normality.
hist(airquality$Ozone)
hist(airquality$Solar.R)
hist(airquality$Wind)
hist(airquality$Temp)
hist(airquality$inventedproportions)
hist(airquality$somanyzeros)
#Residuals are what count for our models.
model.Solar.R<-lm(Ozone~Solar.R,
data=airquality)
par(mfrow=c(2,2))
plot(model.Solar.R)
resid.model.Solar.R<-resid(model.Solar.R)
par(mfrow=c(1,1))
hist(resid.model.Solar.R)
model.somanyzeros<-lm(Ozone~somanyzeros,
data=airquality)
par(mfrow=c(2,2))
plot(model.somanyzeros)
resid.model.somanyzeros<-resid(model.somanyzeros)
hist(resid.model.somanyzeros)
###########################################
#Transforming your data
###########################################
airquality$ln.Solar.R<-log(airquality$Solar.R)
#add a constant if there are zeros.
#pick a constant that transforms your variable best,
#see pp. 65-66 in Quinn and Keough for explanation.
airquality$ln.Solar.R.min<-log(airquality$Solar.R+min(airquality$Solar.R, na.rm=TRUE))
airquality$ln.Solar.R.001<-log(airquality$Solar.R+0.001)
airquality$ln.Solar.R.1<-log(airquality$Solar.R+1)
#Some other transformations.
airquality$Solar.R.inverted<-1/(airquality$Solar.R)
airquality$sqrt.Solar.R<-sqrt(airquality$Solar.R) #square root
airquality$cube.root.Solar.R<-airquality$Solar.R^(1/3) #cube root
airquality$fourth.root.Solar.R<-airquality$Solar.R^(1/4) #fourth root
#Box-Cox transformation. R only lets you actually run this on a model.
#It's for transforming your response variable ONLY.
library(MASS)
par(mfrow=c(1,2))
Solar.R.boxcox<-boxcox(model.Solar.R)
#Use the graph to zoom in around the mostly likely lambda value.
Solar.R.boxcox.zoom<-boxcox(model.Solar.R,
lambda=seq(-0.05,0.35,0.1))
#From the plot, lambda should be about 0.15 (1/6 or 1/7, so a 6th or 7th root transformation.)
airquality$sixth.root.Ozone<-airquality$Ozone^(1/6)
airquality$arcsinsqrt.inventedproportions<-asin(sqrt(airquality$inventedproportions)) #arcsin square root transformation for proportions
#reflecting your data before transforming (pg. 66 in Quinn and Keough)
reflection.constant<-(max(airquality$somanyzeros)+1)
airquality$lnreflected.somanyzeros<-log((reflection.constant-airquality$somanyzeros)+0.001)
#Re-evaluating your data after transformation: run the exploratory analyses again.
par(mfrow=c(1,2))
hist(airquality$Ozone)
hist(airquality$sixth.root.Ozone)
par(mfrow=c(2,4))
hist(airquality$Solar.R)
hist(airquality$ln.Solar.R.min)
hist(airquality$ln.Solar.R.001)
hist(airquality$ln.Solar.R.1)
#Some other transformations.
hist(airquality$Solar.R.inverted)
hist(airquality$sqrt.Solar.R)
hist(airquality$cube.root.Solar.R)
hist(airquality$fourth.root.Solar.R)
par(mfrow=c(1,2))
hist(airquality$inventedproportions)
hist(airquality$arcsinsqrt.inventedproportions)
hist(airquality$somanyzeros)
hist(airquality$lnreflected.somanyzeros)
#Residuals are what count for our models.
model.sqrt.Solar.R<-lm(Ozone~sqrt.Solar.R,
data=airquality)
par(mfrow=c(2,2))
plot(model.sqrt.Solar.R)
resid.model.sqrt.Solar.R<-resid(model.sqrt.Solar.R)
par(mfrow=c(1,2))
hist(resid.model.sqrt.Solar.R)
#previous
hist(resid.model.Solar.R)
model.somanyzeros<-lm(Ozone~somanyzeros,
data=airquality)
par(mfrow=c(2,2))
plot(model.somanyzeros)
resid.model.somanyzeros<-resid(model.somanyzeros)
model.lnreflected.somanyzeros<-lm(Ozone~lnreflected.somanyzeros,
data=airquality)
par(mfrow=c(2,2))
plot(model.lnreflected.somanyzeros)
resid.model.lnreflected.somanyzeros<-resid(model.lnreflected.somanyzeros)
#compare
hist(resid.model.lnreflected.somanyzeros)
hist(resid.model.somanyzeros)
#The original is actually better... so remember to look at residuals!
###########################################
#Heteroscedasticity: inequality of variance
###########################################
#code from last lab, but with the new sample data.
pairs(~Ozone+Solar.R+Wind+Temp,
data=airquality)
#density plots with our scatterplots
library(car)
scatterplotMatrix(~Ozone+Solar.R+Wind+Temp, #weird formula here, nothing as "response"/Y, and |indicates grouping variable.
data=airquality,
smoother=FALSE, #default is TRUE, draws a nonparametric smoother.
reg.line=FALSE, #default is TRUE, draws a regression line.
diagonal="density", #puts a density kernel line on the diagonal for each group.
# ellipse=TRUE,
# levels=c(0.5,0.95), #dataEllipse quartiles
pch=c(2,3,1), #rearrange the default points a bit
col=c(1,1,1)) #changed the colors to all black.
#tests for homogeneity of variance
#adapted from: http://www.cookbook-r.com/Statistical_analysis/Homogeneity_of_variance/
#These only work on groups, so group your Solar.R first.
airquality$Solar.R.cat<-NA
high<-airquality$Solar.R<=max(airquality$Solar.R, na.rm=TRUE)&airquality$Solar.R>=median(airquality$Solar.R, na.rm=TRUE)
airquality[high,"Solar.R.cat"]="high"
low<-airquality$Solar.R>=min(airquality$Solar.R, na.rm=TRUE)&airquality$Solar.R<median(airquality$Solar.R, na.rm=TRUE)
airquality[low,"Solar.R.cat"]="low"
airquality$Solar.R.cat<-as.factor(airquality$Solar.R.cat)
bartlett.test(Ozone~Solar.R.cat, #variable as a function of groups
data=airquality )
?bartlett.test #gives options for other tests.
library(car)
leveneTest(Ozone~Solar.R.cat,
data=airquality)
#Less sensitive than Bartlett's to non-normality.
###########################################
#Variations on regression
###########################################
#nonlinear relationships (adding a polynomial term), adapted from here.
#The I() function is necessary for R to read your quadratic or cubed term correctly.
quadratic.model<-lm(Ozone~Temp+I(Temp^2),
data=airquality)
summary(quadratic.model)
#cubic model
cubic.model<-lm(Ozone~Temp+I(Temp^2)+I(Temp^3),
data=airquality)
summary(cubic.model)
par(mfrow=c(1,1))
plot(airquality$Ozone~airquality$Temp)
ndQ<- data.frame("Temp"=seq(min(airquality$Temp),
max(airquality$Temp),
length.out=length(airquality$Temp)))
lines(ndQ$Temp,
predict(quadratic.model,
newdata=ndQ),
lty="solid",
col="black")
lines(ndQ$Temp,
predict(cubic.model,
newdata=ndQ),
lty="solid",
col="red")
anova(quadratic.model,cubic.model)
#weighted OLS regression
#Additional info.
#Group your data, or use existing groups. (Here I just use the two groups from before for the airquality data.)
(var.solar<-tapply(airquality$Ozone, #the variable we're interested in
airquality$Solar.R.cat, #index (i.e., the grouping variable)
FUN=var, na.rm=TRUE))
solar.wts<-1/var.solar
airquality$wts<-NA
airquality[airquality$Solar.R.cat=="high","wts"]=solar.wts[1]
airquality[airquality$Solar.R.cat=="low","wts"]=solar.wts[2]
wt.lm<-lm(Ozone~Solar.R,
weights=wts,
data=airquality)
summary(wt.lm)
par(mfrow=c(2,2))
plot(wt.lm)
#compare with original
plot(model.Solar.R)
summary(model.Solar.R)
#Or run as ANOVA.
wt.anova<-lm(Ozone~Solar.R.cat,
weights=wts,
data=airquality)
summary(wt.anova)
plot(wt.anova)
#compare with unweighted.
solar.anova<-lm(Ozone~Solar.R.cat,
data=airquality)
summary(solar.anova)
plot(solar.anova)
#For more details on how to do this (including normalizing your weights, which is apparently needed
#if you plan to get confidence intervals), check the top answer here.
###########################################
#Generalized linear models
###########################################
#functions, how to select family and link
glm.wolf<-glm(number~latitude,
data=Depredations,
family=poisson)
#uses default link for family.
summary(glm.wolf)
plot(glm.wolf)
par(mfrow=c(1,1))
plot(Depredations$number~Depredations$latitude)
ndW<- data.frame("latitude"=seq(min(Depredations$latitude),
max(Depredations$latitude),
length.out=length(Depredations$latitude)))
#plot the prediction with the new data (otherwise it uses rownumber and stretches the line out uselessly).
lines(ndW$latitude,
predict.glm(glm.wolf,
newdata=ndW,
type="response"),
lty="solid",lwd=2)
#GLM with a factor.
glm.snails<-glm(Deaths~Rel.Hum*Species,
data=snails,
family=poisson(link="log"))
#select the link manually, although still leaving it at default.
summary(glm.snails)
#quasipoisson suggested because very high deviance to df ratio.
glm.snails.q<-glm(Deaths~Rel.Hum*Species,
data=snails,
family=quasipoisson)
summary(glm.snails.q)
#plot the two lines.
plot(snails$Deaths[snails$Species=="A"]~jitter(snails$Rel.Hum[snails$Species=="A"]),
pch="A",
cex=0.5,
xlim=c(60,80),
ylim=c(0,20))
points(snails$Deaths[snails$Species=="B"]~jitter(snails$Rel.Hum[snails$Species=="B"]),
pch="B",
cex=0.5)
ndA<- data.frame("Rel.Hum"=seq(min(snails$Rel.Hum),
max(snails$Rel.Hum),
length.out=length(snails$Rel.Hum)),
"Species"="A")
lines(ndA$Rel.Hum,
predict.glm(glm.snails.q,
newdata=ndA,
type="response"), #type="response" is necessary for these interesting links.
lty="solid")
ndB<- data.frame("Rel.Hum"=seq(min(snails$Rel.Hum),
max(snails$Rel.Hum),
length.out=length(snails$Rel.Hum)),
"Species"="B")
lines(ndB$Rel.Hum,
exp(predict.glm(glm.snails.q,
newdata=ndB)),
lty="dotted")
#Just to show that predict.glm is going the same thing as exp(prediction):
lines(ndB$Rel.Hum,
predict.glm(glm.snails.q,
newdata=ndB,
type="response"),
lty="solid",
col="blue")
(predictions.compared<-data.frame(snails$Species,
snails$Deaths,
fitted(glm.snails.q),
predict(glm.snails.q, type="response"),
predict(glm.snails.q))) #note that without type="response" you get the linear predictors only.
#Run as an ANCOVA (analysis of covariance)
Anova(glm.snails.q, type=3)
#More info on Poisson regression specifically.
#evaluation of distribution fit using deviance (see lecture).
#Look at summary(yourmodel)
summary(glm.snails.q)
#Ratio of residual deviance to degrees of freedom as described in lecture.
#You can also look for packages that expand on this topic.
###########################################
#Nonparametric tests
###########################################
#Mann-Whitney/Wilcoxon tests.
wilcox.test(Ozone~Solar.R.cat,
data=airquality)
#pairwise.wilcox.test() if you have paired measurements (i.e. repeated measurements), also known as the Signed-Ranks test.
#For two or MORE samples, use the Kruskal-Wallis rank sum test.
kruskal.test(Ozone~Month,
data=airquality) #for two or more samples.
boxplot(Ozone~Month,
data=airquality,
xlab="Month",
ylab="Ozone in some units")
#Opinions on post-hoc tests vary, but a few packages are available and easily findable via google.
#Start by setting your workspace. Even though we don't import a file this time,
#it's good practice so any figures you generate will go to the correct folder.
#Otherwise they tend to disappear into that totally obvious folder that you then forget.
######################################################################################
setwd("C:\\Users\\YourUsername\\")
#Your directory here.
#Or use the menu...
#Session>Set Working Directory>To Source File Location [or you can choose the directory]
#Today we'll use a sample dataset that's already in R,
#plus generate some random data of our own.
#No need for text files or importing!
#Generate some proportions
#No proportion data, so we'll just add it.
#I generate more data than we need, then cut off <0 and >1.
inventedproportions.prelim<-rnorm(mean=0.5,
sd=0.5,
n=300)
inventedproportions.keep.logical<-inventedproportions.prelim<=1&inventedproportions.prelim>=0
inventedproportions<-inventedproportions.prelim[inventedproportions.keep.logical]
airquality$inventedproportions<-inventedproportions[1:153]
#Generate zero-inflated data (download this package if necessary; I had never used it before).
library(gamlss.dist)
airquality$somanyzeros<-rZIP(n=153,
mu=5,
sigma=0.3)
str(airquality)
airquality<-na.omit(airquality)
str(airquality)
###########################################
#Brief review of graphical data exploration
###########################################
par(mfrow=c(2,3))
#Histograms for normality.
hist(airquality$Ozone)
hist(airquality$Solar.R)
hist(airquality$Wind)
hist(airquality$Temp)
hist(airquality$inventedproportions)
hist(airquality$somanyzeros)
#Residuals are what count for our models.
model.Solar.R<-lm(Ozone~Solar.R,
data=airquality)
par(mfrow=c(2,2))
plot(model.Solar.R)
resid.model.Solar.R<-resid(model.Solar.R)
par(mfrow=c(1,1))
hist(resid.model.Solar.R)
model.somanyzeros<-lm(Ozone~somanyzeros,
data=airquality)
par(mfrow=c(2,2))
plot(model.somanyzeros)
resid.model.somanyzeros<-resid(model.somanyzeros)
hist(resid.model.somanyzeros)
###########################################
#Transforming your data
###########################################
airquality$ln.Solar.R<-log(airquality$Solar.R)
#add a constant if there are zeros.
#pick a constant that transforms your variable best,
#see pp. 65-66 in Quinn and Keough for explanation.
airquality$ln.Solar.R.min<-log(airquality$Solar.R+min(airquality$Solar.R, na.rm=TRUE))
airquality$ln.Solar.R.001<-log(airquality$Solar.R+0.001)
airquality$ln.Solar.R.1<-log(airquality$Solar.R+1)
#Some other transformations.
airquality$Solar.R.inverted<-1/(airquality$Solar.R)
airquality$sqrt.Solar.R<-sqrt(airquality$Solar.R) #square root
airquality$cube.root.Solar.R<-airquality$Solar.R^(1/3) #cube root
airquality$fourth.root.Solar.R<-airquality$Solar.R^(1/4) #fourth root
#Box-Cox transformation. R only lets you actually run this on a model.
#It's for transforming your response variable ONLY.
library(MASS)
par(mfrow=c(1,2))
Solar.R.boxcox<-boxcox(model.Solar.R)
#Use the graph to zoom in around the mostly likely lambda value.
Solar.R.boxcox.zoom<-boxcox(model.Solar.R,
lambda=seq(-0.05,0.35,0.1))
#From the plot, lambda should be about 0.15 (1/6 or 1/7, so a 6th or 7th root transformation.)
airquality$sixth.root.Ozone<-airquality$Ozone^(1/6)
airquality$arcsinsqrt.inventedproportions<-asin(sqrt(airquality$inventedproportions)) #arcsin square root transformation for proportions
#reflecting your data before transforming (pg. 66 in Quinn and Keough)
reflection.constant<-(max(airquality$somanyzeros)+1)
airquality$lnreflected.somanyzeros<-log((reflection.constant-airquality$somanyzeros)+0.001)
#Re-evaluating your data after transformation: run the exploratory analyses again.
par(mfrow=c(1,2))
hist(airquality$Ozone)
hist(airquality$sixth.root.Ozone)
par(mfrow=c(2,4))
hist(airquality$Solar.R)
hist(airquality$ln.Solar.R.min)
hist(airquality$ln.Solar.R.001)
hist(airquality$ln.Solar.R.1)
#Some other transformations.
hist(airquality$Solar.R.inverted)
hist(airquality$sqrt.Solar.R)
hist(airquality$cube.root.Solar.R)
hist(airquality$fourth.root.Solar.R)
par(mfrow=c(1,2))
hist(airquality$inventedproportions)
hist(airquality$arcsinsqrt.inventedproportions)
hist(airquality$somanyzeros)
hist(airquality$lnreflected.somanyzeros)
#Residuals are what count for our models.
model.sqrt.Solar.R<-lm(Ozone~sqrt.Solar.R,
data=airquality)
par(mfrow=c(2,2))
plot(model.sqrt.Solar.R)
resid.model.sqrt.Solar.R<-resid(model.sqrt.Solar.R)
par(mfrow=c(1,2))
hist(resid.model.sqrt.Solar.R)
#previous
hist(resid.model.Solar.R)
model.somanyzeros<-lm(Ozone~somanyzeros,
data=airquality)
par(mfrow=c(2,2))
plot(model.somanyzeros)
resid.model.somanyzeros<-resid(model.somanyzeros)
model.lnreflected.somanyzeros<-lm(Ozone~lnreflected.somanyzeros,
data=airquality)
par(mfrow=c(2,2))
plot(model.lnreflected.somanyzeros)
resid.model.lnreflected.somanyzeros<-resid(model.lnreflected.somanyzeros)
#compare
hist(resid.model.lnreflected.somanyzeros)
hist(resid.model.somanyzeros)
#The original is actually better... so remember to look at residuals!
###########################################
#Heteroscedasticity: inequality of variance
###########################################
#code from last lab, but with the new sample data.
pairs(~Ozone+Solar.R+Wind+Temp,
data=airquality)
#density plots with our scatterplots
library(car)
scatterplotMatrix(~Ozone+Solar.R+Wind+Temp, #weird formula here, nothing as "response"/Y, and |indicates grouping variable.
data=airquality,
smoother=FALSE, #default is TRUE, draws a nonparametric smoother.
reg.line=FALSE, #default is TRUE, draws a regression line.
diagonal="density", #puts a density kernel line on the diagonal for each group.
# ellipse=TRUE,
# levels=c(0.5,0.95), #dataEllipse quartiles
pch=c(2,3,1), #rearrange the default points a bit
col=c(1,1,1)) #changed the colors to all black.
#tests for homogeneity of variance
#adapted from: http://www.cookbook-r.com/Statistical_analysis/Homogeneity_of_variance/
#These only work on groups, so group your Solar.R first.
airquality$Solar.R.cat<-NA
high<-airquality$Solar.R<=max(airquality$Solar.R, na.rm=TRUE)&airquality$Solar.R>=median(airquality$Solar.R, na.rm=TRUE)
airquality[high,"Solar.R.cat"]="high"
low<-airquality$Solar.R>=min(airquality$Solar.R, na.rm=TRUE)&airquality$Solar.R<median(airquality$Solar.R, na.rm=TRUE)
airquality[low,"Solar.R.cat"]="low"
airquality$Solar.R.cat<-as.factor(airquality$Solar.R.cat)
bartlett.test(Ozone~Solar.R.cat, #variable as a function of groups
data=airquality )
?bartlett.test #gives options for other tests.
library(car)
leveneTest(Ozone~Solar.R.cat,
data=airquality)
#Less sensitive than Bartlett's to non-normality.
###########################################
#Variations on regression
###########################################
#nonlinear relationships (adding a polynomial term), adapted from here.
#The I() function is necessary for R to read your quadratic or cubed term correctly.
quadratic.model<-lm(Ozone~Temp+I(Temp^2),
data=airquality)
summary(quadratic.model)
#cubic model
cubic.model<-lm(Ozone~Temp+I(Temp^2)+I(Temp^3),
data=airquality)
summary(cubic.model)
par(mfrow=c(1,1))
plot(airquality$Ozone~airquality$Temp)
ndQ<- data.frame("Temp"=seq(min(airquality$Temp),
max(airquality$Temp),
length.out=length(airquality$Temp)))
lines(ndQ$Temp,
predict(quadratic.model,
newdata=ndQ),
lty="solid",
col="black")
lines(ndQ$Temp,
predict(cubic.model,
newdata=ndQ),
lty="solid",
col="red")
anova(quadratic.model,cubic.model)
#weighted OLS regression
#Additional info.
#Group your data, or use existing groups. (Here I just use the two groups from before for the airquality data.)
(var.solar<-tapply(airquality$Ozone, #the variable we're interested in
airquality$Solar.R.cat, #index (i.e., the grouping variable)
FUN=var, na.rm=TRUE))
solar.wts<-1/var.solar
airquality$wts<-NA
airquality[airquality$Solar.R.cat=="high","wts"]=solar.wts[1]
airquality[airquality$Solar.R.cat=="low","wts"]=solar.wts[2]
wt.lm<-lm(Ozone~Solar.R,
weights=wts,
data=airquality)
summary(wt.lm)
par(mfrow=c(2,2))
plot(wt.lm)
#compare with original
plot(model.Solar.R)
summary(model.Solar.R)
#Or run as ANOVA.
wt.anova<-lm(Ozone~Solar.R.cat,
weights=wts,
data=airquality)
summary(wt.anova)
plot(wt.anova)
#compare with unweighted.
solar.anova<-lm(Ozone~Solar.R.cat,
data=airquality)
summary(solar.anova)
plot(solar.anova)
#For more details on how to do this (including normalizing your weights, which is apparently needed
#if you plan to get confidence intervals), check the top answer here.
###########################################
#Generalized linear models
###########################################
#functions, how to select family and link
glm.wolf<-glm(number~latitude,
data=Depredations,
family=poisson)
#uses default link for family.
summary(glm.wolf)
plot(glm.wolf)
par(mfrow=c(1,1))
plot(Depredations$number~Depredations$latitude)
ndW<- data.frame("latitude"=seq(min(Depredations$latitude),
max(Depredations$latitude),
length.out=length(Depredations$latitude)))
#plot the prediction with the new data (otherwise it uses rownumber and stretches the line out uselessly).
lines(ndW$latitude,
predict.glm(glm.wolf,
newdata=ndW,
type="response"),
lty="solid",lwd=2)
#GLM with a factor.
glm.snails<-glm(Deaths~Rel.Hum*Species,
data=snails,
family=poisson(link="log"))
#select the link manually, although still leaving it at default.
summary(glm.snails)
#quasipoisson suggested because very high deviance to df ratio.
glm.snails.q<-glm(Deaths~Rel.Hum*Species,
data=snails,
family=quasipoisson)
summary(glm.snails.q)
#plot the two lines.
plot(snails$Deaths[snails$Species=="A"]~jitter(snails$Rel.Hum[snails$Species=="A"]),
pch="A",
cex=0.5,
xlim=c(60,80),
ylim=c(0,20))
points(snails$Deaths[snails$Species=="B"]~jitter(snails$Rel.Hum[snails$Species=="B"]),
pch="B",
cex=0.5)
ndA<- data.frame("Rel.Hum"=seq(min(snails$Rel.Hum),
max(snails$Rel.Hum),
length.out=length(snails$Rel.Hum)),
"Species"="A")
lines(ndA$Rel.Hum,
predict.glm(glm.snails.q,
newdata=ndA,
type="response"), #type="response" is necessary for these interesting links.
lty="solid")
ndB<- data.frame("Rel.Hum"=seq(min(snails$Rel.Hum),
max(snails$Rel.Hum),
length.out=length(snails$Rel.Hum)),
"Species"="B")
lines(ndB$Rel.Hum,
exp(predict.glm(glm.snails.q,
newdata=ndB)),
lty="dotted")
#Just to show that predict.glm is going the same thing as exp(prediction):
lines(ndB$Rel.Hum,
predict.glm(glm.snails.q,
newdata=ndB,
type="response"),
lty="solid",
col="blue")
(predictions.compared<-data.frame(snails$Species,
snails$Deaths,
fitted(glm.snails.q),
predict(glm.snails.q, type="response"),
predict(glm.snails.q))) #note that without type="response" you get the linear predictors only.
#Run as an ANCOVA (analysis of covariance)
Anova(glm.snails.q, type=3)
#More info on Poisson regression specifically.
#evaluation of distribution fit using deviance (see lecture).
#Look at summary(yourmodel)
summary(glm.snails.q)
#Ratio of residual deviance to degrees of freedom as described in lecture.
#You can also look for packages that expand on this topic.
###########################################
#Nonparametric tests
###########################################
#Mann-Whitney/Wilcoxon tests.
wilcox.test(Ozone~Solar.R.cat,
data=airquality)
#pairwise.wilcox.test() if you have paired measurements (i.e. repeated measurements), also known as the Signed-Ranks test.
#For two or MORE samples, use the Kruskal-Wallis rank sum test.
kruskal.test(Ozone~Month,
data=airquality) #for two or more samples.
boxplot(Ozone~Month,
data=airquality,
xlab="Month",
ylab="Ozone in some units")
#Opinions on post-hoc tests vary, but a few packages are available and easily findable via google.
Subscribe to:
Posts (Atom)