rm(list=ls(all=TRUE)); options(warn=1)
library(effects); library(lme4); library(MuMIn)
load("2018_STG_OverUnderUse&Multifactoriality_JOf2ndLgStud.RData")



# preparing data sets #############################################################################
## data frame x: all in 1 data set
all.quites <- c(icle.quites, locn.quites)
   all.quites <- ifelse(all.quites==0, "no", "yes")
all.files <- gsub(".txt$", "", c(icle.files, locn.files), perl=TRUE)
all.l1s <- c(icle.l1s, locn.l1s)

summary(x <- data.frame(
   QUITE=factor(all.quites),
   FILE=factor(all.files),
   L1=factor(all.l1s)))
##  QUITE                FILE          L1
##  no :1197086   BRSUR1_01:   3704   EN:323898
##  yes:    451   BRSUR1_05:   3545   FR:227361
##                SPAL1005 :   3395   GE:232773
##                BRSUR1_15:   3161   NO:213716
##                BRSUR1_04:   3071   SP:199789
##                BRSUR1_09:   2995
##                (Other)  :1177666
## orthogonal contrasts for L1
x$L1 <- factor(x$L1, levels=c("EN", "SP", "FR", "NO", "GE"))
native_vs_nonnative <- c(0.8, -0.2, -0.2, -0.2, -0.2)
   rom_vs_germ <- c(0, 0.5, 0.5, -0.5, -0.5)
   span_vs_fren <- c(0, 0.5, -0.5, 0, 0)
   norw_vs_germ <- c(0, 0, 0, 0.5, -0.5)
   contrasts(x$L1) <- cbind(native_vs_nonnative, rom_vs_germ, span_vs_fren, norw_vs_germ)

## data frame min1: data with only the files that contain at least 1 quite
qwe <- table(x$FILE, x$QUITE)
min1 <- x; min1 <- droplevels(min1[min1$FILE %in% rownames(qwe)[qwe[,"yes"]>0],])
   contrasts(min1$L1) <- cbind(native_vs_nonnative, rom_vs_germ, span_vs_fren, norw_vs_germ)

## data frame min1lrn:  with all EN files and only the learner files that contain at least 1 quite
min1lrn <- droplevels(rbind(x[x$L1=="EN",], x[x$L1!="EN",]))
   contrasts(min1lrn$L1) <- cbind(native_vs_nonnative, rom_vs_germ, span_vs_fren, norw_vs_germ)

## data frame mingermoutl: min1 data without the one highest German outlier
apply(asd <- prop.table(qwe, 1), 2, summary)
##
##                  no          yes
##   Min.    0.9897377 0.0000000000
##   1st Qu. 1.0000000 0.0000000000
##   Median  1.0000000 0.0000000000
##   Mean    0.9996182 0.0003818391
##   3rd Qu. 1.0000000 0.0000000000
##   Max.    1.0000000 0.0102622577
mingermoutl <- min1; mingermoutl <- droplevels(mingermoutl[!(mingermoutl$FILE %in% names(which(asd[,"yes"]==max(asd[,"yes"])))),])
   contrasts(mingermoutl$L1) <- cbind(native_vs_nonnative, rom_vs_germ, span_vs_fren, norw_vs_germ)

## %s of files without quite in them
zxc1 <- table(x$FILE, x$QUITE)
zxc2 <- prop.table(zxc1, 1)[,2]
percs.of.0 <- "/"(
   tapply(zxc2, x$L1[match(names(zxc2), x$FILE)], function(x) sum(x==0)),
   tapply(zxc2, x$L1[match(names(zxc2), x$FILE)], length)
); percs.of.0
##        EN        SP        FR        NO        GE
## 0.8665049 0.8127490 0.8357349 0.7854890 0.7848970
# glms ############################################################################################
## data frame x: all in 1 data set
summary(model.l1s.glm.x <- glm(QUITE ~ L1, family=binomial, data=x))
##
## Call:
## glm(formula = QUITE ~ L1, family = binomial, data = x)
##
## Deviance Residuals:
##     Min       1Q   Median       3Q      Max
## -0.0354  -0.0293  -0.0269  -0.0203   4.1191
##
## Coefficients:
##                       Estimate Std. Error  z value Pr(>|z|)
## (Intercept)           -7.91602    0.04919 -160.935  < 2e-16 ***
## L1native_vs_nonnative -0.70909    0.13333   -5.318 1.05e-07 ***
## L1rom_vs_germ         -0.42463    0.10672   -3.979 6.92e-05 ***
## L1span_vs_fren        -0.11860    0.16682   -0.711  0.47713
## L1norw_vs_germ        -0.37660    0.13315   -2.828  0.00468 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
##     Null deviance: 8013.5  on 1197536  degrees of freedom
## Residual deviance: 7948.0  on 1197532  degrees of freedom
## AIC: 7958
##
## Number of Fisher Scoring iterations: 11
drop1(model.l1s.glm.x, test="Chisq") # overall G-squared
## Single term deletions
##
## Model:
## QUITE ~ L1
##        Df Deviance    AIC    LRT  Pr(>Chi)
## <none>      7948.0 7958.0
## L1      4   8013.5 8015.5 65.475 2.044e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(model.l1s.glm.x)             # takes a long time
##                            2.5 %     97.5 %
## (Intercept)           -8.0141108 -7.8212336
## L1native_vs_nonnative -0.9785627 -0.4550681
## L1rom_vs_germ         -0.6356134 -0.2168553
## L1span_vs_fren        -0.4485036  0.2068961
## L1norw_vs_germ        -0.6405195 -0.1178410
data.frame(l1eff <- effect("L1", model.l1s.glm.x))
##   L1          fit         se        lower        upper
## 1 EN 0.0002068552 0.12218161 0.0001628113 0.0002628109
## 2 SP 0.0003203380 0.12502002 0.0002507387 0.0004092484
## 3 FR 0.0003606599 0.11045144 0.0002904771 0.0004477921
## 4 NO 0.0004304778 0.10427965 0.0003509315 0.0005280456
## 5 GE 0.0006272205 0.08278656 0.0005333263 0.0007376330
# plot(l1eff, type="response", ylim=c(0, 0.003), grid=TRUE, ylab="Predicted probability of 'quite'")
prop.table(with(x, table(QUITE, L1)), 2)
##      L1
## QUITE           EN           SP           FR           NO           GE
##   no  0.9997931448 0.9996796620 0.9996393401 0.9995695222 0.9993727795
##   yes 0.0002068552 0.0003203380 0.0003606599 0.0004304778 0.0006272205
## data frame min1: data with only the files that contain at least 1 quite
summary(model.l1s.glm.min1 <- glm(QUITE ~ L1, family=binomial, data=min1))
##
## Call:
## glm(formula = QUITE ~ L1, family = binomial, data = min1)
##
## Deviance Residuals:
##     Min       1Q   Median       3Q      Max
## -0.0676  -0.0676  -0.0632  -0.0533   3.6760
##
## Coefficients:
##                       Estimate Std. Error  z value Pr(>|z|)
## (Intercept)           -6.34698    0.04922 -128.950  < 2e-16 ***
## L1native_vs_nonnative -0.51027    0.13339   -3.825 0.000131 ***
## L1rom_vs_germ         -0.19649    0.10680   -1.840 0.065798 .
## L1span_vs_fren        -0.42771    0.16694   -2.562 0.010406 *
## L1norw_vs_germ        -0.13490    0.13325   -1.012 0.311358
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
##     Null deviance: 6601.3  on 250420  degrees of freedom
## Residual deviance: 6571.1  on 250416  degrees of freedom
## AIC: 6581.1
##
## Number of Fisher Scoring iterations: 9
drop1(model.l1s.glm.min1, test="Chisq") # overall G-squared
## Single term deletions
##
## Model:
## QUITE ~ L1
##        Df Deviance    AIC    LRT  Pr(>Chi)
## <none>      6571.1 6581.1
## L1      4   6601.3 6603.3 30.212 4.432e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(model.l1s.glm.min1)             # takes a long time
##                            2.5 %      97.5 %
## (Intercept)           -6.4451359 -6.25212781
## L1native_vs_nonnative -0.7798769 -0.25611035
## L1rom_vs_germ         -0.4076272  0.01144116
## L1span_vs_fren        -0.7578374 -0.10197863
## L1norw_vs_germ        -0.3990268  0.12406874
data.frame(l1eff <- effect("L1", model.l1s.glm.min1))
##   L1         fit         se        lower       upper
## 1 EN 0.001163457 0.12223662 0.0009158198 0.001477956
## 2 SP 0.001418094 0.12508785 0.0011101048 0.001811377
## 3 FR 0.002173337 0.11055171 0.0017506897 0.002697743
## 4 NO 0.001996961 0.10436143 0.0016281614 0.002449094
## 5 GE 0.002284713 0.08285529 0.0019429151 0.002686478
# plot(l1eff, type="response", ylim=c(0, 0.003), grid=TRUE, ylab="Predicted probability of 'quite'")
prop.table(with(min1, table(QUITE, L1)), 2)
##      L1
## QUITE          EN          SP          FR          NO          GE
##   no  0.998836543 0.998581906 0.997826663 0.998003039 0.997715287
##   yes 0.001163457 0.001418094 0.002173337 0.001996961 0.002284713
## data frame min1lrn:  with all EN files and only the learner files that contain at least 1 quite
summary(model.l1s.glm.min1lrn <- glm(QUITE ~ L1, family=binomial, data=min1lrn))
##
## Call:
## glm(formula = QUITE ~ L1, family = binomial, data = min1lrn)
##
## Deviance Residuals:
##     Min       1Q   Median       3Q      Max
## -0.0354  -0.0293  -0.0269  -0.0203   4.1191
##
## Coefficients:
##                       Estimate Std. Error  z value Pr(>|z|)
## (Intercept)           -7.91602    0.04919 -160.935  < 2e-16 ***
## L1native_vs_nonnative -0.70909    0.13333   -5.318 1.05e-07 ***
## L1rom_vs_germ         -0.42463    0.10672   -3.979 6.92e-05 ***
## L1span_vs_fren        -0.11860    0.16682   -0.711  0.47713
## L1norw_vs_germ        -0.37660    0.13315   -2.828  0.00468 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
##     Null deviance: 8013.5  on 1197536  degrees of freedom
## Residual deviance: 7948.0  on 1197532  degrees of freedom
## AIC: 7958
##
## Number of Fisher Scoring iterations: 11
drop1(model.l1s.glm.min1lrn, test="Chisq")
## Single term deletions
##
## Model:
## QUITE ~ L1
##        Df Deviance    AIC    LRT  Pr(>Chi)
## <none>      7948.0 7958.0
## L1      4   8013.5 8015.5 65.475 2.044e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(model.l1s.glm.min1lrn)
##                            2.5 %     97.5 %
## (Intercept)           -8.0141108 -7.8212336
## L1native_vs_nonnative -0.9785627 -0.4550681
## L1rom_vs_germ         -0.6356134 -0.2168553
## L1span_vs_fren        -0.4485036  0.2068961
## L1norw_vs_germ        -0.6405195 -0.1178410
data.frame(l1eff <- effect("L1", model.l1s.glm.min1lrn))
##   L1          fit         se        lower        upper
## 1 EN 0.0002068552 0.12218161 0.0001628113 0.0002628109
## 2 SP 0.0003203380 0.12502002 0.0002507387 0.0004092484
## 3 FR 0.0003606599 0.11045144 0.0002904771 0.0004477921
## 4 NO 0.0004304778 0.10427965 0.0003509315 0.0005280456
## 5 GE 0.0006272205 0.08278656 0.0005333263 0.0007376330
# plot(l1eff, type="response", ylim=c(0, 0.003), grid=TRUE, ylab="Predicted probability of 'quite'")
prop.table(with(min1lrn, table(QUITE, L1)), 2)
##      L1
## QUITE           EN           SP           FR           NO           GE
##   no  0.9997931448 0.9996796620 0.9996393401 0.9995695222 0.9993727795
##   yes 0.0002068552 0.0003203380 0.0003606599 0.0004304778 0.0006272205
## data frame mingermoutl: min1 data without the one highest German outlier
summary(model.l1s.glm.mingermoutl <- glm(QUITE ~ L1, family=binomial, data=mingermoutl))
##
## Call:
## glm(formula = QUITE ~ L1, family = binomial, data = mingermoutl)
##
## Deviance Residuals:
##     Min       1Q   Median       3Q      Max
## -0.0660  -0.0660  -0.0632  -0.0533   3.6760
##
## Coefficients:
##                       Estimate Std. Error  z value Pr(>|z|)
## (Intercept)           -6.35696    0.04940 -128.675  < 2e-16 ***
## L1native_vs_nonnative -0.49779    0.13350   -3.729 0.000192 ***
## L1rom_vs_germ         -0.17153    0.10733   -1.598 0.109991
## L1span_vs_fren        -0.42771    0.16694   -2.562 0.010406 *
## L1norw_vs_germ        -0.08498    0.13493   -0.630 0.528805
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
##     Null deviance: 6484.3  on 249543  degrees of freedom
## Residual deviance: 6457.3  on 249539  degrees of freedom
## AIC: 6467.3
##
## Number of Fisher Scoring iterations: 9
drop1(model.l1s.glm.mingermoutl, test="Chisq")
## Single term deletions
##
## Model:
## QUITE ~ L1
##        Df Deviance    AIC    LRT  Pr(>Chi)
## <none>      6457.3 6467.3
## L1      4   6484.3 6486.3 26.994 1.994e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(model.l1s.glm.mingermoutl)
##                            2.5 %      97.5 %
## (Intercept)           -6.4554782 -6.26175502
## L1native_vs_nonnative -0.7675885 -0.24340835
## L1rom_vs_germ         -0.3836392  0.03749079
## L1span_vs_fren        -0.7578374 -0.10197863
## L1norw_vs_germ        -0.3521234  0.17757775
data.frame(l1eff <- effect("L1", model.l1s.glm.mingermoutl))
##   L1         fit         se        lower       upper
## 1 EN 0.001163457 0.12223662 0.0009158198 0.001477956
## 2 SP 0.001418094 0.12508785 0.0011101048 0.001811377
## 3 FR 0.002173337 0.11055171 0.0017506897 0.002697743
## 4 NO 0.001996961 0.10436143 0.0016281614 0.002449094
## 5 GE 0.002173706 0.08552876 0.0018388416 0.002569394
# plot(l1eff, type="response", ylim=c(0, 0.003), grid=TRUE, ylab="Predicted probability of 'quite'")
prop.table(with(mingermoutl, table(QUITE, L1)), 2)
##      L1
## QUITE          EN          SP          FR          NO          GE
##   no  0.998836543 0.998581906 0.997826663 0.998003039 0.997826294
##   yes 0.001163457 0.001418094 0.002173337 0.001996961 0.002173706
# glmers ##########################################################################################
## data frame x: all in 1 data set
summary(model.l1s.glmer.x <- glmer(QUITE~L1+(1|FILE), family=binomial, data=x))
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: QUITE~L1+(1 | FILE)
##    Data: x
##
##       AIC       BIC    logLik  deviance  df.resid
##    7811.7    7883.7   -3899.9    7799.7   1197531
##
## Scaled residuals:
##    Min     1Q Median     3Q    Max
## -0.090 -0.015 -0.012 -0.010 70.262
##
## Random effects:
##  Groups Name        Variance Std.Dev.
##  FILE   (Intercept) 1.819    1.349
## Number of obs: 1197537, groups:  FILE, 1764
##
## Fixed effects:
##                       Estimate Std. Error z value Pr(>|z|)
## (Intercept)            -8.7274     0.1137 -76.790  < 2e-16 ***
## L1native_vs_nonnative  -0.6513     0.1667  -3.907 9.36e-05 ***
## L1rom_vs_germ          -0.3940     0.1442  -2.732   0.0063 **
## L1span_vs_fren         -0.1004     0.2219  -0.453   0.6508
## L1norw_vs_germ         -0.2807     0.1849  -1.518   0.1289
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
##             (Intr) L1nt__ L1rm__ L1sp__
## L1ntv_vs_nn  0.016
## L1rm_vs_grm  0.101 -0.077
## L1spn_vs_fr  0.040 -0.051  0.108
## L1nrw_vs_gr  0.050 -0.045 -0.100  0.001
drop1(model.l1s.glmer.x, test="Chisq")
## Single term deletions
##
## Model:
## QUITE~L1+(1 | FILE)
##        Df    AIC    LRT   Pr(Chi)
## <none>    7811.7
## L1      4 7831.6 27.847 1.339e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
r.squaredLR(model.l1s.glmer.x)
## [1] 0.0001784826
## attr(,"adj.r.squared")
## [1] 0.02676184
(data.frame(l1eff <- effect("L1", model.l1s.glmer.x)))
##   L1          fit         se        lower        upper
## 1 EN 9.625203e-05 0.02800289 9.111209e-05 0.0001016819
## 2 SP 1.441697e-04 0.03536275 1.345171e-04 0.0001545149
## 3 FR 1.593995e-04 0.03305301 1.494021e-04 0.0001700659
## 4 NO 1.953535e-04 0.03392135 1.827902e-04 0.0002087802
## 5 GE 2.586420e-04 0.03205177 2.428977e-04 0.0002754065
# plot(l1eff, type="response", ylim=c(0, 0.003), grid=TRUE, ylab="Predicted probability of 'quite'")

### excursus: what about random slopes tho?
### fitting a model with random slopes is not that useful here given that >95% of the files have 0 or 1 quite in them ...
summary(model.l1s.glmer.x.rslopes <- glmer(QUITE~L1+(1+L1|FILE), family=binomial, data=x), correlation=FALSE)
## Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: QUITE~L1+(1 + L1 | FILE)
##    Data: x
##
##       AIC       BIC    logLik  deviance  df.resid
##    7836.7    8076.6   -3898.4    7796.7   1197517
##
## Scaled residuals:
##    Min     1Q Median     3Q    Max
## -0.088 -0.016 -0.012 -0.009 70.083
##
## Random effects:
##  Groups Name                  Variance Std.Dev. Corr
##  FILE   (Intercept)           1.1498   1.0723
##         L1native_vs_nonnative 1.3212   1.1494    0.01
##         L1rom_vs_germ         1.5473   1.2439    0.22  0.36
##         L1span_vs_fren        2.1636   1.4709   -0.46 -0.32 -0.06
##         L1norw_vs_germ        0.8328   0.9126    0.21  0.09  0.12 -0.05
## Number of obs: 1197537, groups:  FILE, 1764
##
## Fixed effects:
##                       Estimate Std. Error z value Pr(>|z|)
## (Intercept)            -8.7748     0.1236 -70.969   <2e-16 ***
## L1native_vs_nonnative  -0.6827     0.3348  -2.039   0.0415 *
## L1rom_vs_germ          -0.6437     0.2650  -2.429   0.0151 *
## L1span_vs_fren          0.3594     0.4330   0.830   0.4065
## L1norw_vs_germ         -0.3708     0.3104  -1.195   0.2322
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## convergence code: 0
## unable to evaluate scaled gradient
## Model failed to converge: degenerate  Hessian with 1 negative eigenvalues
drop1(model.l1s.glmer.x.rslopes, test="Chisq")
## Single term deletions
##
## Model:
## QUITE~L1+(1 + L1 | FILE)
##        Df    AIC    LRT  Pr(Chi)
## <none>    7836.7
## L1      4 7843.1 14.382 0.006172 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
r.squaredLR(model.l1s.glmer.x.rslopes)
## [1] 0.0001809752
## attr(,"adj.r.squared")
## [1] 0.02713559
(data.frame(l1eff <- effect("L1", model.l1s.glmer.x.rslopes)))
##   L1          fit         se        lower        upper
## 1 EN 8.952063e-05 0.02800289 8.474013e-05 9.457079e-05
## 2 SP 1.536900e-04 0.03536275 1.434000e-04 1.647182e-04
## 3 FR 1.072949e-04 0.03305301 1.005651e-04 1.144750e-04
## 4 NO 2.030534e-04 0.03392135 1.899949e-04 2.170092e-04
## 5 GE 2.941794e-04 0.03205177 2.762724e-04 3.132467e-04
# plot(l1eff, type="response", ylim=c(0, 0.003), grid=TRUE, ylab="Predicted probability of 'quite'")

## data frame min1: data with only the files that contain at least 1 quite
summary(model.l1s.glmer.min1 <- glmer(QUITE~L1+(1|FILE), family=binomial, data=min1), correlation=FALSE)
## Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: QUITE~L1+(1 | FILE)
##    Data: min1
##
##      AIC      BIC   logLik deviance df.resid
##   6583.1   6645.7  -3285.6   6571.1   250415
##
## Scaled residuals:
##     Min      1Q  Median      3Q     Max
## -0.0479 -0.0479 -0.0447 -0.0377 29.3003
##
## Random effects:
##  Groups Name        Variance Std.Dev.
##  FILE   (Intercept) 0        0
## Number of obs: 250421, groups:  FILE, 321
##
## Fixed effects:
##                       Estimate Std. Error  z value Pr(>|z|)
## (Intercept)           -6.34698    0.04892 -129.730  < 2e-16 ***
## L1native_vs_nonnative -0.51027    0.12912   -3.952 7.76e-05 ***
## L1rom_vs_germ         -0.19649    0.10628   -1.849  0.06448 .
## L1span_vs_fren        -0.42771    0.16567   -2.582  0.00983 **
## L1norw_vs_germ        -0.13490    0.13309   -1.014  0.31077
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
drop1(model.l1s.glmer.min1, test="Chisq")
## Single term deletions
##
## Model:
## QUITE~L1+(1 | FILE)
##        Df    AIC    LRT   Pr(Chi)
## <none>    6583.1
## L1      4 6605.3 30.212 4.432e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
r.squaredLR(model.l1s.glmer.min1)
## [1] 0.000120637
## attr(,"adj.r.squared")
## [1] 0.004636952
(data.frame(l1eff <- effect("L1", model.l1s.glmer.min1)))
##   L1         fit         se       lower       upper
## 1 EN 0.001163457 0.02469337 0.001108550 0.001221081
## 2 SP 0.001418094 0.02783433 0.001342904 0.001497487
## 3 FR 0.002173337 0.03025125 0.002048479 0.002305788
## 4 NO 0.001996961 0.02741664 0.001892683 0.002106972
## 5 GE 0.002284713 0.02322334 0.002183273 0.002390854
# plot(l1eff, type="response", ylim=c(0, 0.003), grid=TRUE, ylab="Predicted probability of 'quite'")

## data frame min1lrn:  with all EN files and only the learner files that contain at least 1 quite
summary(model.l1s.glmer.min1lrn <- glmer(QUITE~L1+(1|FILE), family=binomial, data=min1lrn), correlation=FALSE)
## Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: QUITE~L1+(1 | FILE)
##    Data: min1lrn
##
##       AIC       BIC    logLik  deviance  df.resid
##    7811.7    7883.7   -3899.9    7799.7   1197531
##
## Scaled residuals:
##    Min     1Q Median     3Q    Max
## -0.090 -0.015 -0.012 -0.010 70.262
##
## Random effects:
##  Groups Name        Variance Std.Dev.
##  FILE   (Intercept) 1.819    1.349
## Number of obs: 1197537, groups:  FILE, 1764
##
## Fixed effects:
##                       Estimate Std. Error z value Pr(>|z|)
## (Intercept)            -8.7274     0.1150 -75.880  < 2e-16 ***
## L1native_vs_nonnative  -0.6513     0.1701  -3.829 0.000129 ***
## L1rom_vs_germ          -0.3940     0.1441  -2.735 0.006235 **
## L1span_vs_fren         -0.1005     0.2193  -0.458 0.646941
## L1norw_vs_germ         -0.2807     0.1858  -1.510 0.130943
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
drop1(model.l1s.glmer.min1lrn, test="Chisq")
## Single term deletions
##
## Model:
## QUITE~L1+(1 | FILE)
##        Df    AIC    LRT   Pr(Chi)
## <none>    7811.7
## L1      4 7831.6 27.847 1.339e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
r.squaredLR(model.l1s.glmer.min1lrn)
## [1] 0.0001784826
## attr(,"adj.r.squared")
## [1] 0.02676184
(data.frame(l1eff <- effect("L1", model.l1s.glmer.min1lrn)))
##   L1          fit         se        lower        upper
## 1 EN 9.625089e-05 0.02800289 9.111102e-05 0.0001016807
## 2 SP 1.441674e-04 0.03536275 1.345149e-04 0.0001545125
## 3 FR 1.594000e-04 0.03305301 1.494025e-04 0.0001700664
## 4 NO 1.953528e-04 0.03392135 1.827895e-04 0.0002087794
## 5 GE 2.586426e-04 0.03205177 2.428982e-04 0.0002754072
# plot(l1eff, type="response", ylim=c(0, 0.003), grid=TRUE, ylab="Predicted probability of 'quite'")

## data frame mingermoutl: min1 data without the one highest German outlier
summary(model.l1s.glmer.mingermoutl <- glmer(QUITE~L1+(1|FILE), family=binomial, data=mingermoutl), correlation=FALSE)
## Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: QUITE~L1+(1 | FILE)
##    Data: mingermoutl
##
##      AIC      BIC   logLik deviance df.resid
##   6469.3   6531.9  -3228.7   6457.3   249538
##
## Scaled residuals:
##     Min      1Q  Median      3Q     Max
## -0.0467 -0.0467 -0.0447 -0.0377 29.3003
##
## Random effects:
##  Groups Name        Variance Std.Dev.
##  FILE   (Intercept) 0        0
## Number of obs: 249544, groups:  FILE, 320
##
## Fixed effects:
##                       Estimate Std. Error  z value Pr(>|z|)
## (Intercept)           -6.35696    0.04913 -129.391  < 2e-16 ***
## L1native_vs_nonnative -0.49779    0.12757   -3.902 9.53e-05 ***
## L1rom_vs_germ         -0.17153    0.10653   -1.610  0.10735
## L1span_vs_fren        -0.42771    0.16574   -2.581  0.00986 **
## L1norw_vs_germ        -0.08498    0.13417   -0.633  0.52646
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
drop1(model.l1s.glmer.mingermoutl, test="Chisq")
## Single term deletions
##
## Model:
## QUITE~L1+(1 | FILE)
##        Df    AIC    LRT   Pr(Chi)
## <none>    6469.3
## L1      4 6488.3 26.994 1.994e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
r.squaredLR(model.l1s.glmer.mingermoutl)
## [1] 0.0001081666
## attr(,"adj.r.squared")
## [1] 0.004217032
(data.frame(l1eff <- effect("L1", model.l1s.glmer.mingermoutl)))
##   L1         fit         se       lower       upper
## 1 EN 0.001163457 0.02469337 0.001108550 0.001221081
## 2 SP 0.001418094 0.02783433 0.001342904 0.001497487
## 3 FR 0.002173337 0.03025125 0.002048479 0.002305788
## 4 NO 0.001996961 0.02741664 0.001892683 0.002106972
## 5 GE 0.002173706 0.02340591 0.002076443 0.002275515
# plot(l1eff, type="response", ylim=c(0, 0.003), grid=TRUE, ylab="Predicted probability of 'quite'")