# (1) Each member of a group of 10 subjects is asked to read two sentences, each containing a particular vowel segment, and the pitch level of the vowel is measured in each reading. The results (in arbitrary units) are as follows; you want to test whether the average pitch levels in sentence 1 and sentence 2 are significantly different.
# SUBJECT SENTENCE1 SENTENCE2
# 1 30 27
# 2 41 36
# 3 34 35
# 4 28 30
# 5 35 38
# 6 39 44
# 7 40 46
# 8 29 31
# 9 27 33
# 10 33 37
rm(list=ls(all=TRUE)) # clear memory
SENTENCE1 <- c(30, 41, 34, 28, 35, 39, 40, 29, 27, 33) # create 1st vector of data
SENTENCE2 <- c(27, 36, 35, 30, 38, 44, 46, 31, 33, 37) # create 2nd vector of data
differences <- SENTENCE2-SENTENCE1 # compute pairwise differences
par(mfrow=c(1, 2)) # define two plotting panels
plot(differences, type="h", ylim=c(-7, 7)) # plot the differences vertically
plot(1, 1, type="n", axes=FALSE, # plot 1 point NOT with no axes ...
xlim=c(1, 2), ylim=c(20, 50), # but use these axis limits ...
xlab="Sentence", ylab="Pitch measurement") # and labels
axis(1, at=1:2); axis(2); grid() # add axes and a grid
arrows(rep(1, 10), SENTENCE1, # draw arrows from x=1 and y=values for SENTENCE1
rep(2, 10), SENTENCE2, # to x=2 and y-values for SENTENCE2
col=ifelse(differences>0, "blue", "red")) # if the diff is positive, in blue, otherwise in red
arrows(1, mean(SENTENCE1), # draw an arrow from x=1 and y= average of 1st vector
2, mean(SENTENCE2), # to x=2 and y= average of 2nd vector
lwd=3) # with a bold line
par(mfrow=c(1, 1)) # define one plotting panel
mean(SENTENCE1); mean(SENTENCE2) # compute means of both vectors
sd(SENTENCE1); sd(SENTENCE2) # compute standard deviations of both vectors
# test for normality
ks.test(differences, # K-S test on the differences
"pnorm", # checking their fit against data that ARE normally-distributed and
mean=mean(differences), # have the same mean as the differences
sd=sd(differences)) # have the same sd as the differences
t.test(SENTENCE1, SENTENCE2, paired=TRUE) # compute a t-test for dependent samples comparing the vectors
t.test(differences) # do a t-test for goodness of fit of whether the mean difference is 0
# alternative way of reading in the data:
summary(qwe <- read.table(text="
SUBJECT SENTENCE1 SENTENCE2
1 30 27
2 41 36
3 34 35
4 28 30
5 35 38
6 39 44
7 40 46
8 29 31
9 27 33
10 33 37
", header=TRUE)); attach(qwe)
# (2) An experiment is performed to test the effect of certain linguistic features on the politeness of two sentences in a particular social context. 15 informants are asked to rate the two sentences on a scale from 1 (very impolite) to 5 (very polite), with the following results. You want to test the hypothesis that sentence 2 is rated as more polite than sentence 1.
# SUBJECT SENTENCE1 SENTENCE2
# 1 1 3
# 2 2 2
# 3 1 4
# 4 2 3
# 5 3 1
# 6 2 4
# 7 1 1
# 8 2 3
# 9 3 5
# 10 1 3
# 11 2 3
# 12 1 4
# 13 2 1
# 14 2 4
# 15 1 3
rm(list=ls(all=TRUE)) # clear memory
SENTENCE1 <- c(1,2,1,2,3,2,1,2,3,1,2,1,2,2,1) # create 1st vector of data
SENTENCE2 <- c(3,2,4,3,1,4,1,3,5,3,3,4,1,4,3) # create 2nd vector of data
differences <- SENTENCE2-SENTENCE1 # compute pairwise differences
par(mfrow=c(1, 2)) # define two plotting panels
plot(differences, type="h", ylim=c(-7, 7)) # plot the differences vertically
plot(1, 1, type="n", axes=FALSE, # plot 1 point NOT with no axes ...
xlim=c(1, 2), ylim=c(1, 5), # but use these axis limits ...
xlab="Sentence", ylab="Politeness rating") # and labels
axis(1, at=1:2); axis(2); grid() # add axes and a grid
arrows(rep(1, 15), SENTENCE1, # draw arrows from x=1 and y=values for SENTENCE1
rep(2, 15), SENTENCE2, # to x=2 and y-values for SENTENCE2
col=ifelse(differences>0, "blue", "red")) # if the diff is positive, in blue, otherwise in red
arrows(1, mean(SENTENCE1), # draw an arrow from x=1 and y= average of 1st vector
2, mean(SENTENCE2), # to x=2 and y= average of 2nd vector
lwd=3) # with a bold line
par(mfrow=c(1, 1)) # define one plotting panel
wilcox.test(SENTENCE1, SENTENCE2, paired=TRUE, # compute a Wilcoxon test of both vectors
correct=FALSE, # no continutity correction
alternative="less") # expecting that the median of the 1st is < than that of the 2nd
# alternative way of reading in the data:
summary(qwe <- read.table(text="
SUBJECT SENTENCE1 SENTENCE2
1 1 3
2 2 2
3 1 4
4 2 3
5 3 1
6 2 4
7 1 1
8 2 3
9 3 5
10 1 3
11 2 3
12 1 4
13 2 1
14 2 4
15 1 3
", header=TRUE)); attach(qwe)
# (3) You want to test whether the numbers of IUs transcribers identify in a recording changes depending on whether the transcribers work on the recording once or twice. Ten transcribers annotated a recording for IUs on Monday and then again on Wednesday. Do the numbers of IUs differ on average?
# 1st pass: 16 18 15 18 10 12 16 14 16 11
# 2nd pass: 15 16 13 15 11 11 14 11 15 15
rm(list=ls(all=TRUE)) # clear memory
FIRST <- c(16,18,15,18,10,12,16,14,16,11) # create 1st vector of data
SECOND <- c(15,16,13,15,11,11,14,11,15,15) # create 2nd vector of data
differences <- SECOND-FIRST # compute pairwise differences
par(mfrow=c(1, 2)) # define two plotting panels
plot(differences, type="h", ylim=c(-5, 5)) # plot the differences vertically
plot(1, 1, type="n", axes=FALSE, # plot 1 point NOT with no axes ...
xlim=c(1, 2), ylim=c(10, 20), # but use these axis limits ...
xlab="Transcription", ylab="IU numbers") # and labels
axis(1, at=1:2); axis(2); grid() # add axes and a grid
arrows(rep(1, 15), FIRST, # draw arrows from x=1 and y=values for FIRST
rep(2, 15), SECOND, # to x=2 and y-values for SECOND
col=ifelse(differences>0, "blue", "red")) # if the diff is positive, in blue, otherwise in red
arrows(1, mean(FIRST), # draw an arrow from x=1 and y= average of 1st vector
2, mean(SECOND), # to x=2 and y= average of 2nd vector
lwd=3) # with a bold line
par(mfrow=c(1, 1)) # define one plotting panel
# test for normality
shapiro.test(differences) # checking the differences for normality
ks.test(differences, # K-S test on the differences
"pnorm", # checking their fit against data that ARE normally-distributed and
mean=mean(differences), # have the same mean as the differences
sd=sd(differences)) # have the same sd as the differences
wilcox.test(FIRST, SECOND, paired=TRUE, # compute a Wilcoxon-test of FIRST vs SECOND
correct=FALSE) # but use no continuity correction
t.test(FIRST, SECOND, paired=TRUE) # do a t-test for dependent samples of FIRST vs. SECOND
t.test(differences) # do a t-test for goodness of fit of whether the mean difference is 0
# alternative way of reading in the data:
# first <- scan() # and then paste in the above number for first with spaces between them
# second <- scan() # and then paste in the above number for second with spaces between them
# (4) The following scores are obtained by two groups of subjects on a language proficiency test. You want to test whether the group means differ significantly.
# group A: 41 58 62 51 48 34 63 50 53 60 44
# group B: 38 40 64 47 51 49 32 44 61
rm(list=ls(all=TRUE)) # clear memory
grp.A <- c(41,58,62,51,48,34,64,50,53,60,44) # create 1st vector of data
grp.B <- c(38,40,64,47,51,49,32,44,61) # create 2nd vector of data
boxplot(grp.A, grp.B, # generate a boxplot for each group
notch=TRUE) # with notches
# test for normality
shapiro.test(grp.A)
ks.test(grp.A, # K-S test on the values of group A
"pnorm", # checking their fit against data that ARE normally-distributed and
mean=mean(grp.A), # have the same mean as the values of group A
sd=sd(grp.A)) # have the same sd as the values of group A
shapiro.test(grp.B)
ks.test(grp.B, # K-S test on the values of group B
"pnorm", # checking their fit against data that ARE normally-distributed and
mean=mean(grp.B), # have the same mean as the values of group B
sd=sd(grp.B)) # have the same sd as the values of group B
# test for variance homogeneity
var.test(grp.A, grp.B)
summary(grp.A); summary(grp.B) # compute summary statistics for both groups
sd(grp.A); sd(grp.B) # compute standard deviations for both groups
t.test(grp.A, grp.B) # compute t-test for independent samples
# alternative way of reading in the data:
# grp.A <- scan() # and then paste in the above number for grp.A with spaces between them
# grp.B <- scan() # and then paste in the above number for grp.B with spaces between them
# (5) 20 adult learners of Swahili are divided into two groups of 10 at random. Group A is taught by a grammar-translation method, group B by an audio-lingual method. At the end of the course, the two groups obtain the following scores on a proficiency test. You want to test whether group B performs significantly better on average.
# group A: 45 58 60 51 53 59 54 40 56 56
# group B: 48 58 71 56 59 62 64 62 52 69
rm(list=ls(all=TRUE)) # clear memory
grp.A <- c(45,58,60,51,53,59,54,40,56,56) # create 1st vector of data
grp.B <- c(48,58,71,56,59,62,64,62,52,69) # create 2nd vector of data
boxplot(grp.A, grp.B, # generate a boxplot for each group
notch=TRUE) # with notches
# test for normality
shapiro.test(grp.A)
ks.test(grp.A, # K-S test on the values of group A
"pnorm", # checking their fit against data that ARE normally-distributed and
mean=mean(grp.A), # have the same mean as the values of group A
sd=sd(grp.A)) # have the same sd as the values of group A
shapiro.test(grp.B)
ks.test(grp.B, # K-S test on the values of group B
"pnorm", # checking their fit against data that ARE normally-distributed and
mean=mean(grp.B), # have the same mean as the values of group B
sd=sd(grp.B)) # have the same sd as the values of group B
# test for variance homogeneity
var.test(grp.A, grp.B)
summary(grp.A); summary(grp.B) # compute summary statistics for both groups
sd(grp.A); sd(grp.B) # compute standard deviations for both groups
t.test(grp.A, grp.B, # compute t-test for independent samples
alternative="less") # where the values for grp.A are expected to be smaller than those for grp.B
# alternative way of reading in the data:
# grp.A <- scan() # and then paste in the above number for grp.A with spaces between them
# grp.B <- scan() # and then paste in the above number for grp.B with spaces between them
# (6) A study counted the frequencies of inversion of subject and verb after an introductory adverbial in declarative affirmative clauses in two time periods. You want to test for each time period whether the texts differ with regard to their frequencies of inversions.
# Inversion Inversion
# Early ME no yes
# Text 1 27 11
# Text 2 34 16
# Text 3 34 14
# Inversion Inversion
# Late ME no yes
# Text 1 109 27
# Text 2 61 11
# Text 3 49 29
rm(list=ls(all=TRUE)) # clear memory
addmargins(early <- matrix(c(27,11,34,16,34,14), # define and show a matrix with these frequencies
byrow=TRUE, ncol=2, # defined row-wise with 2 columns
dimnames=list(TEXT=1:3, # and these row names
INVERSION=c("no", "yes")))) # and these column names
(test.early <- chisq.test(early, correct=FALSE)) # compute and store the chi-squared test for this data
test.early$expected # compute the frequencies expected from H0
test.early$residuals # compute the residuals
sqrt(test.early$statistic / # compute an effect size ...
(sum(early)*(min(dim(early))-1))) # ... for this table
assocplot(t(early)) # generate an association plot
addmargins(late <- matrix(c(109,27,61,11,49,29), # define and show a matrix with these frequencies
byrow=TRUE, ncol=2, # defined row-wise with 2 columns
dimnames=list(TEXT=1:3, # and these row names
INVERSION=c("no", "yes")))) # and these column names
(test.late <- chisq.test(late, correct=FALSE)) # compute and store the chi-squared test for this data
test.late$expected # compute the frequencies expected from H0
test.late$residuals # compute the residuals
sqrt(test.late$statistic / # compute an effect size ...
(sum(late)*(min(dim(late))-1))) # ... for this table
assocplot(t(late)) # generate an association plot
# (7) A panel of teachers is asked to grade the reading and writing abilities of 15 children on a scale from 1 (very poor) to 7 (excellent), with the results given below. You want to calculate an appropriate correlation coefficient and test it for significance.
# CHILD READING WRITING
# 1 3 6
# 2 4 4
# 3 5 3
# 4 6 7
# 5 3 4
# 6 4 2
# 7 3 5
# 8 5 6
# 9 2 3
# 10 4 5
# 11 7 3
# 12 5 4
# 13 6 4
# 14 2 3
# 15 3 2
rm(list=ls(all=TRUE)) # clear memory
READING <- c(3, 4, 5, 6, 3, 4, 3, 5, 2, 4, 7, 5, 6, 2, 3) # create 1st vector of data
WRITING <- c(6, 4, 3, 7, 4, 2, 5, 6, 3, 5, 3, 4, 4, 3, 2) # create 2nd vector of data
plot(WRITING ~ READING) # plot WRITING (y-axis) as a function of READING (x-axis)
lines(lowess(WRITING ~ READING)) # add a locally-weighted smoother
cor.test(READING, WRITING, # compute a measure of correlation for these data
method="kendall") # namely Kendall's tau
# it might be useful to adopt a directional H1 here, actually
# alternative way of reading in the data:
summary(qwe <- read.table(text="
CHILD READING WRITING
1 3 6
2 4 4
3 5 3
4 6 7
5 3 4
6 4 2
7 3 5
8 5 6
9 2 3
10 4 5
11 7 3
12 5 4
13 6 4
14 2 3
15 3 2
", header=TRUE)); attach(qwe)