Simulations in R_simulation in r-优快云博客

本文通过R语言演示了如何使用不同的随机数生成器来模拟各种混合变量的数据，并进行了直方图和QQ图的绘制。同时，文章还介绍了如何通过蒙特卡洛模拟计算简单事件的概率，并探讨了统计检验功效的计算方法。

目的：simulate data with R using random number generators of different kinds of mixture variables we control.

set.seed(198911)

vecpoisson=rpois(100,5)

mean(vecpoisson)

generate 5 samples from an exponential with a rate parameter 0.1 and sum them together.

reps <- 50000

nexps <- 5

rate <- 0.1

set.seed(0)

system.time( x1 <- replicate(reps, sum(rexp(n=nexps, rate=rate))) ) # replicate

make a histogram or a qqplot of the simulation and compare it to what we know is the truth.

require(ggplot2)

ggplot(data.frame(x1), aes(x1)) + geom_histogram(aes(y=..density..)) +

stat_function(fun=function(x)dgamma(x, shape=nexps, scale=1/rate),

color="red", size=2)

set.seed(0)

system.time(x1 <- sapply(1:reps, function(i){sum(rexp(n=nexps, rate=rate))})) # simple apply

set.seed(0)

system.time(x1 <- lapply(1:reps, function(i){sum(rexp(n=nexps, rate=rate))})) # list apply

set.seed(0)

system.time(x1 <- apply(matrix(rexp(n=nexps*reps, rate=rate), nrow=nexps),2,sum)) # apply on a matrix

set.seed(0)

system.time(x1 <- colSums(matrix(rexp(n=nexps*reps, rate=rate), nrow=nexps))) # using colSums

require(parallel)

set.seed(0)

system.time(x1 <- mclapply(1:reps, function(i){sum(rexp(n=nexps, rate=rate))})) # multi-cluster apply

require(ggplot2)

sampa=rnorm(1000000,0,1)

sampb=rnorm(1500000,3,1)

combined = c(sampa, sampb)

plt = ggplot(data.frame(combined), aes(x=combined)) + stat_bin(binwidth=0.25, position="identity")

plt

require(MASS)

Sigma=matrix(c(5,3,3,2),2,2)

ex1=mvrnorm(100000,rep(0,2),Sigma)

Sigma=matrix(c(9,-5,-1,5),2,2)

ex2=mvrnorm(n=100000, rep(3, 2), Sigma)

Monte carlo simulation

how to compute the probability of simple events using simulation.

rolled two fair dice. What is the probability that their sum is at least 7?

We will approach this by simulating many throws of two fair dice, and then computing the fraction of those trials whose sum is at least 7

isEvent = function(numDice, numSides, targetValue, numTrials){

apply(matrix(sample(1:numSides, numDice*numTrials, replace=TRUE), nrow=numDice), 2, sum) >= targetValue}

set.seed(0)#try 5 trials

outcomes = isEvent(2, 6, 7, 5)

mean(outcomes)

set.seed(0)

outcomes = isEvent(2, 6, 7, 10000)

mean(outcomes)

require(parallel)

isEventPar = function(numDice, numSides, targetValue, trialIndices){

sapply(1:length(trialIndices), function(x) sum(sample(1:numSides, numDice, replace=TRUE)) >= targetValue)}

set.seed(0)

outcomes = pvec(1:10000, function(x) isEventPar(2, 6, 7, x))

mean(outcomes)

Power calculation

The power of a statistical test is the probability that the test rejects the null hypothesis

if the alternative is true.

There is rarely a closed form for the power, so we resort to simulation.

An important question in many clinical trials is how many subjects (samples)

do we need to achieve a certain amount of power?

compute_power = function(n, sigma, numTrials){ sampa = matrix(rnorm(n*numTrials, 1, sigma), ncol=numTrials)

sampb= matrix(rnorm(n*numTrials, 2, sigma), ncol=numTrials)

statistics = (apply(sampa, 2, mean) - apply(sampb, 2, mean))/sqrt(2*sigma^2/n)

return (mean(abs(statistics) >= qnorm(0.975)))}

set.seed(0)

compute_power(3, 0.5, 10000)