Section 6.3 A Model for the Normal Distribution
#This section describes what produces a normal distribution and
#a heuristic showing how it is related to the binomial distribution.
Section 6.4 Applications of the Normal Distribution
#This section describes how to do what we did to get Table 6.1,
#but with a different set of data back from
Section 4.2.
mean.bw<-109.9
sd.bw<-13.593
(sdeviate.bw<-(151-mean.bw)/sd.bw)
pnorm(sdeviate.bw) #This value is more precise
pnorm(3.02) #But the book uses this rounded value so we will too here.
#We can do some of the calculations the book does on pp. 103-104.
#Only a very few individuals are higher than 151 oz.
1-pnorm(3.02)
#If we want to look both directions we can double this number since the distribution is symmetrical.
2*(1-pnorm(3.02))
#By default, lower.tail=TRUE in pnorm.
pnorm(3.02,
lower.tail=FALSE)
#It is the same as our earlier value of
1-pnorm(3.02)
#Here is the default setting for comparison.
pnorm(3.02,
lower.tail=TRUE)
#This simply tells use which direction we want to look at,
#the upper or lower tail of the distribution from our value of standard deviate.
Section 6.5 Fitting a Normal Distribution to Observed Data
classmark<-seq(from=59.5, to=171.5, by=8)
frequencies<-c(2,6,39,385,888,1729,2240,2007,1233,641,201,74,14,5,1)
samplesize<-sum(frequencies) #This confirms that we entered the data correctly, and gets our sample size.
#Multiply classmark and frequencies to get the sums for each class.
classsums<-classmark*frequencies
#To look at all this stuff together, combine it into a dataset.
birthweights<-data.frame(cbind(classmark, frequencies, classsums))
#Add on a row of the next class up which contains 0 individuals.
(birthweights<-rbind(birthweights, c(179.5, 0, 0)))
#On page 104, equation 6.2 is like equation 6.1 above but with sample size (n) and i (class intervals).
normal.manual.applied<-function(mean, sd, n, i){
curve(((1/(sd*sqrt(2*pi)))*exp((-((x-mean)/sd)^2)/2))*n*i,
-4, 4, #go from -4 to +4 standard deviations.
add = FALSE,
ylab="freq",
xlab="Y",
type = "l")
}
normal.manual.applied(mean=0, sd=1, n=1000, i=0.5)
#This gives the curve that Table 6.1 also has (same class intervals of 0.5, 1000 samples, and mean=0 with sd=1).
#Let's do this with the birthweights data.
birthweights.mean<-109.8996
birthweights.sd<-13.5942
#Like above we need the lower boundaries of the class marks.
birthweights$boundaries<-birthweights$classmark-4
#Get the expected frequencies for the class boundaries with the mean and sd of our dataset.
birthweights$pnorm.results<-pnorm(birthweights$boundaries,
mean=birthweights.mean,
sd=birthweights.sd)
#Then, take the difference of the first row minus the next row.
#The last row will not have anything, which is why we needed to add the lower boundary of
#the next class mark, which has a frequency of zero. Thus, this calculation generates a vector of length 14.
#We need 15, so we just add a zero on for the last difference as they do in Table 6.2
birthweights$expected.freqs<-c(abs(diff(birthweights$pnorm.results)),0) #add a zero on for the last difference
#Multipy the frequencies by the sample size to get the expected frequencies for a sample of this size.
#Round as in the table.
birthweights$expected.freqs.values<-round(birthweights$expected.freqs*samplesize, 1)
#We can even add the plus and minus signs using ifelse and sign() to see in which direction the differences are.
birthweights$departure.signs<-ifelse(sign(birthweights$frequencies-birthweights$expected.freqs.values)==-1,
"-", #if -1, then write "-"
"+") #else if not -1, write "+"
#View the table to confirm it has the same data as Table 6.2.
birthweights