-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathvalidationStudies.R
More file actions
93 lines (77 loc) · 4.29 KB
/
validationStudies.R
File metadata and controls
93 lines (77 loc) · 4.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
require(ConfIntVariance)
# Confirm that the variance of the sample variance is unbiased even on small samples, for instance tossing a dice with n = 5, known to have:
# true central fourth moment mu_4 is 707/48=14.72917, or empirically mean(((1:6) - 3.5)^4)
mu4 <- 707/48
variance <- 35/12
n <- 20
# Cramér's formula for the true variance of the sample variance
# We could of course also estimate that value empirically
print(trueVarianceOfSampleVariance <- mu4/n - variance^2*(n-3)/n/(n-1))
# Confirms that the estimator is unbiased: t-test confidence interval contains the true value
print(t.test(sapply(1:1e3, function(i) attr(varwci(sample(6, n, replace=TRUE)), "var.SampleVariance"))))
# the same for the normal
# mu4 = 3
mu4 <- 3
variance <- 1
n <- 20
print(trueVarianceOfSampleVariance <- mu4/n - variance^2*(n-3)/n/(n-1))
# confirms that the estimator is unbiased: t-test confidence interval contains the true value
print(t.test(sapply(1:1e5, function(i) attr(varwci(rnorm(n)), "var.SampleVariance"))))
# and so on
N <- 1e4
trueValueCovered <- sapply(
1:N,
function(i) {
# throw a dice 100 times
x <- sample(6, 100, replace=TRUE)
# compute our confidence interval
ci <- varwci(x)
# We know that the true variance of the dice is 35/12 = 2.916666...
# Record the boolean whether the confidence interval contains the correct value
trueValueCovered[i] <- (35/12 > ci[1] && 35/12 < ci[2])
}
)
# Result of simulation study: Close to 0.95, works, confirms validity
print(paste("Simulated coverage probability:", mean(trueValueCovered)))
# compare with the competitors
# competitor one: function varTest in package EnvStats
# competiror two: parameters of the structual equation model in package lavaan
require(EnvStats)
require(lavaan)
lavaanCI <- function(x, conf.level) {
df <- as.data.frame(x)
names(df) <- "x"
model <- 'x ~~ x'
fit <- sem(model, data=df, likelihood = "wishart" )
pe <- parameterEstimates(fit, level=conf.level)
c(pe$ci.lower, pe$ci.upper)
}
N <- 1e4
trueValueCovered <- sapply(
1:N,
function(i) {
x <- sample(6, 100, replace=TRUE)
proposedMethod <- varwci(x, conf.level=0.95)
# competitor one: ci from function varTest
literatureMethodCS <- varTest(x, conf.level=0.95)$conf.int
# competitor two: ci from structual equation model
literatureMethodLH <- lavaanCI(x, conf.level=0.95)
covered1 <- (35/12 > proposedMethod[1] && 35/12 < proposedMethod[2])
covered2 <- (35/12 > literatureMethodCS[1] && 35/12 < literatureMethodCS[2])
covered3 <- (35/12 > literatureMethodLH[1] && 35/12 < literatureMethodLH[2])
c(
covered1, covered2, covered3,
proposedMethod[2] - proposedMethod[1], literatureMethodCS[2] - literatureMethodCS[1], literatureMethodLH[2] - literatureMethodLH[1]
)
}
)
res <- rowMeans(trueValueCovered)
# close to the nominal 95%.
print(paste("Coverage probability of proposed method", res[1]))
# close to 99%, i.e., on the conservative side. Confidence interval too big.
print(paste("Coverage probability of varTest", res[2]))
# close to 99%, i.e., on the conservative side. Confidence interval too big.
print(paste("Coverage probability of lavaan", res[3]))
print(paste("Average length of proposed confidence interval", res[4]))
print(paste("Average length of chi-square confidence interval", res[5]))
print(paste("Average length of structual equation confidence interval", res[6]))