目的:simulate data with R using random number generators of different kinds of mixture variables we control.
set.seed(198911)
vecpoisson=rpois(100,5)
mean(vecpoisson)
generate 5 samples from an exponential with a rate parameter 0.1 and sum them together.
reps <- 50000
nexps <- 5
rate <- 0.1
set.seed(0)
system.time( x1 <- replicate(reps, sum(rexp(n=nexps, rate=rate))) ) # replicate
make a histogram or a qqplot of the simulation and compare it to what we know is the truth.
require(ggplot2)
ggplot(data.frame(x1), aes(x1)) + geom_histogram(aes(y=..density..)) +
stat_function(fun=function(x)dgamma(x, shape=nexps, scale=1/rate),
color="red", size=2)
set.seed(0)
system.time(x1 <- sapply(1:reps, function(i){sum(rexp(n=nexps, rate=rate))})) # simple apply
set.seed(0)
system.time(x1 <- lapply(1:reps, function(i){sum(rexp(n=nexps, rate=rate))})) # list apply
set.seed(0)
system.time(x1 <- apply(matrix(rexp(n=nexps*reps, rate=rate), nrow=nexps),2,sum)) # apply on a matrix
set.seed(0)
system.time(x1 <- colSums(matrix(rexp(n=nexps*reps, rate=rate), nrow=nexps))) # using colSums
require(parallel)
set.seed(0)
system.time(x1 <- mclapply(1:reps, function(i){sum(rexp(n=nexps, rate=rate))})) # multi-cluster apply
require(ggplot2)
sampa=rnorm(1000000,0,1)
sampb=rnorm(1500000,3,1)
combined = c(sampa, sampb)
plt = ggplot(data.frame(combined), aes(x=combined)) + stat_bin(binwidth=0.25, position="identity")
plt
require(MASS)
Sigma=matrix(c(5,3,3,2),2,2)
ex1=mvrnorm(100000,rep(0,2),Sigma)
Sigma=matrix(c(9,-5,-1,5),2,2)
ex2=mvrnorm(n=100000, rep(3, 2), Sigma)
Monte carlo simulation
how to compute the probability of simple events using simulation.
rolled two fair dice. What is the probability that their sum is at least 7?
We will approach this by simulating many throws of two fair dice, and then computing the fraction of those trials whose sum is at least 7
isEvent = function(numDice, numSides, targetValue, numTrials){
apply(matrix(sample(1:numSides, numDice*numTrials, replace=TRUE), nrow=numDice), 2, sum) >= targetValue}
set.seed(0)#try 5 trials
outcomes = isEvent(2, 6, 7, 5)
mean(outcomes)
set.seed(0)
outcomes = isEvent(2, 6, 7, 10000)
mean(outcomes)
require(parallel)
isEventPar = function(numDice, numSides, targetValue, trialIndices){
sapply(1:length(trialIndices), function(x) sum(sample(1:numSides, numDice, replace=TRUE)) >= targetValue)}
set.seed(0)
outcomes = pvec(1:10000, function(x) isEventPar(2, 6, 7, x))
mean(outcomes)
Power calculation
The power of a statistical test is the probability that the test rejects the null hypothesis
if the alternative is true.
There is rarely a closed form for the power, so we resort to simulation.
An important question in many clinical trials is how many subjects (samples)
do we need to achieve a certain amount of power?
compute_power = function(n, sigma, numTrials){ sampa = matrix(rnorm(n*numTrials, 1, sigma), ncol=numTrials)
sampb= matrix(rnorm(n*numTrials, 2, sigma), ncol=numTrials)
statistics = (apply(sampa, 2, mean) - apply(sampb, 2, mean))/sqrt(2*sigma^2/n)
return (mean(abs(statistics) >= qnorm(0.975)))}
set.seed(0)
compute_power(3, 0.5, 10000)
本文通过R语言演示了如何使用不同的随机数生成器来模拟各种混合变量的数据,并进行了直方图和QQ图的绘制。同时,文章还介绍了如何通过蒙特卡洛模拟计算简单事件的概率,并探讨了统计检验功效的计算方法。
8784

被折叠的 条评论
为什么被折叠?



