모집단에서 표본을 추출하는 대표적인 단순임의추출, 층화추출, 군집추출, 다단계추출 4가지 방법에 대한 개념을 시각적으로 확인해보자.
1 Shiny 앱
#| label: shinylive-sampling
#| viewerWidth: 800
#| viewerHeight: 700
#| standalone: true
library(shiny)
library(openintro)
data(COL)
library(showtext)
showtext_auto()
# _____ Simple Random _____ #
build_srs <- function(n, N) {
colSamp <- COL[4]
PCH <- rep(c(1, 3, 20)[3], 3)
col <- rep(COL[1], N)
pch <- PCH[match(col, COL)]
plot(0, xlim = c(0,2), ylim = 0:1, type = 'n', axes = FALSE, xlab = "", ylab = "")
box()
x <- runif(N, 0, 2)
y <- runif(N)
inc <- n
points(x, y, col = col, pch = pch)
these <- sample(N, n)
points(x[these], y[these], pch = 20, cex = 0.8, col = colSamp)
points(x[these], y[these], cex = 1.4, col = colSamp)
}
# _____ Stratified _____ #
build_stratified <- function(N, numStrata, sampleSizePerStratum) {
colSamp <- COL[4]
col <- rep(COL[1], N)
PCH <- rep(c(1, 3, 20)[3], 3)
plot(0, xlim = c(0, 2), ylim = 0:1 + 0.01,
type = 'n', axes = FALSE, xlab = "", ylab = "")
box()
X <- seq(0.1, 1.9, length.out = numStrata)
Y <- rep(0.5, numStrata)
# 각 계층의 크기를 무작위로 생성
strataSizes <- sample(ceiling(N/numStrata) * 0.5 + c(-1, 1) * ceiling(N/numStrata) * 0.25, numStrata, replace = TRUE)
strataSizes <- round(strataSizes / sum(strataSizes) * N)
R <- sqrt(strataSizes / 500)
above <- rep(1, numStrata)
currentIndex <- 1
for (i in 1:numStrata) {
hold <- seq(0, 2 * pi, length.out = 99)
x <- X[i] + (R[i] + 0.01) * cos(hold)
y <- Y[i] + (R[i] + 0.01) * sin(hold)
polygon(x, y, border = COL[5, 4])
x <- rep(NA, strataSizes[i])
y <- rep(NA, strataSizes[i])
for (j in 1:strataSizes[i]) {
inside <- FALSE
while (!inside) {
xx <- runif(1, -R[i], R[i])
yy <- runif(1, -R[i], R[i])
if (sqrt(xx^2 + yy^2) < R[i]) {
inside <- TRUE
x[j] <- xx
y[j] <- yy
}
}
}
type <- sample(1, strataSizes[i], TRUE)
pch <- PCH[type]
col <- COL[type]
x <- X[i] + x
y <- Y[i] + y
points(x, y, pch = pch, col = col)
these <- sample(strataSizes[i], min(sampleSizePerStratum, strataSizes[i]))
points(x[these], y[these],
pch = 20, cex = 0.8, col = colSamp)
points(x[these], y[these], cex = 1.4, col = colSamp)
currentIndex <- currentIndex + strataSizes[i]
}
text(X, Y + above * (R),
paste("계층", 1:numStrata),
pos = 2 + above,
cex = 1.3)
}
# _____ Cluster _____ #
build_cluster <- function(numClusters, clusterSizes, selectedClusters) {
colSamp <- COL[4]
PCH <- rep(c(1, 3, 20)[3], 3)
plot(0, xlim = c(0, 2), ylim = c(0.01, 1.04), type = 'n', axes = FALSE, xlab = "", ylab = "")
box()
X <- seq(0.1, 1.9, length.out = numClusters)
Y <- runif(numClusters, 0.2, 0.8)
R <- sqrt(clusterSizes / 500)
above <- ifelse(Y > 0.5, 1, -1)
for (i in 1:numClusters) {
hold <- seq(0, 2 * pi, length.out = 99)
x <- X[i] + (R[i] + 0.02) * cos(hold)
y <- Y[i] + (R[i] + 0.02) * sin(hold)
polygon(x, y, border = COL[5, 4])
if (i %in% selectedClusters) {
polygon(x, y, border = COL[4], lty = 2, lwd = 1.5)
}
x <- rep(NA, clusterSizes[i])
y <- rep(NA, clusterSizes[i])
for (j in 1:clusterSizes[i]) {
inside <- FALSE
while (!inside) {
xx <- runif(1, -R[i], R[i])
yy <- runif(1, -R[i], R[i])
if (sqrt(xx^2 + yy^2) < R[i]) {
inside <- TRUE
x[j] <- xx
y[j] <- yy
}
}
}
type <- sample(1, clusterSizes[i], TRUE)
pch <- PCH[type]
col <- COL[type]
x <- X[i] + x
y <- Y[i] + y
points(x, y, pch = pch, col = col)
if (i %in% selectedClusters) {
points(x, y, pch = 20, cex = 0.8, col = colSamp)
points(x, y, cex = 1.4, col = colSamp)
}
}
text(X, Y + above * (R + 0.01),
paste("군집", 1:numClusters),
pos = 2 + above,
cex = 1.3)
}
# _____ Multistage Sampling _____ #
build_multistage <- function(numClusters, sampleSizePerCluster, clusterSizes) {
colSamp <- COL[4]
PCH <- rep(c(1, 3, 20)[3], 3)
plot(0, xlim = c(0, 2), ylim = c(0.01, 1.04), type = 'n', axes = FALSE, xlab = "", ylab = "")
box()
X <- seq(0.1, 1.9, length.out = numClusters)
Y <- runif(numClusters, 0.2, 0.8)
R <- sqrt(clusterSizes / 500)
above <- ifelse(Y > 0.5, 1, -1)
for (i in 1:numClusters) {
hold <- seq(0, 2 * pi, length.out = 99)
x <- X[i] + (R[i] + 0.02) * cos(hold)
y <- Y[i] + (R[i] + 0.02) * sin(hold)
polygon(x, y, border = COL[5, 4])
x <- rep(NA, clusterSizes[i])
y <- rep(NA, clusterSizes[i])
for (j in 1:clusterSizes[i]) {
inside <- FALSE
while (!inside) {
xx <- runif(1, -R[i], R[i])
yy <- runif(1, -R[i], R[i])
if (sqrt(xx^2 + yy^2) < R[i]) {
inside <- TRUE
x[j] <- xx
y[j] <- yy
}
}
}
type <- sample(1, clusterSizes[i], TRUE)
pch <- PCH[type]
col <- COL[type]
x <- X[i] + x
y <- Y[i] + y
points(x, y, pch = pch, col = col)
these <- sample(clusterSizes[i], min(sampleSizePerCluster, clusterSizes[i]))
points(x[these], y[these], pch = 20, cex = 0.8, col = colSamp)
points(x[these], y[these], cex = 1.4, col = colSamp)
}
text(X, Y + above * (R + 0.01),
paste("군집", 1:numClusters),
pos = 2 + above, cex = 1.3)
}
# ui -----
ui <- fluidPage(
titlePanel("표본 추출 방법 시각화"),
sidebarLayout(
sidebarPanel(
radioButtons("method", "표본 추출 방법 선택:",
c("단순 무작위 표본 추출" = "srs",
"층화 표본 추출" = "stratified",
"군집 표본 추출" = "cluster",
"다단계 표본 추출" = "multistage"),
inline = TRUE),
conditionalPanel(
condition = "input.method == 'srs'",
sliderInput("sampleSize", "표본 크기:", min = 1, max = 100, value = 10)
),
conditionalPanel(
condition = "input.method == 'stratified'",
sliderInput("numStrata", "층(Stratum) 수:", min = 1, max = 5, value = 3),
sliderInput("sampleSizePerStratum", "층 내부 표본 크기:", min = 1, max = 10, value = 3)
),
conditionalPanel(
condition = "input.method == 'cluster'",
sliderInput("numClusters", "군집 수:", min = 1, max = 9, value = 3)
),
conditionalPanel(
condition = "input.method == 'multistage'",
sliderInput("numClustersMultistage", "군집 수:", min = 1, max = 9, value = 3),
sliderInput("sampleSizePerClusterMultistage", "군집 내부 표본 크기:", min = 1, max = 10, value = 3)
)
),
mainPanel(
plotOutput("samplingPlot")
)
)
)
server <- function(input, output) {
output$samplingPlot <- renderPlot({
if (input$method == "srs") {
build_srs(n = input$sampleSize, N = 100)
} else if (input$method == "stratified") {
build_stratified(N = 100, numStrata = input$numStrata, sampleSizePerStratum = input$sampleSizePerStratum)
} else if (input$method == "cluster") {
clusterSizeMin <- sample(5:30, 1)
clusterSizeMax <- sample((clusterSizeMin+5):50, 1)
clusterSizes <- sample(clusterSizeMin:clusterSizeMax, input$numClusters, replace = TRUE)
selectedClusters <- sample(1:input$numClusters, round(input$numClusters/3))
build_cluster(numClusters = input$numClusters, clusterSizes = clusterSizes, selectedClusters = selectedClusters)
} else if (input$method == "multistage") {
clusterSizeMin <- sample(5:30, 1)
clusterSizeMax <- sample((clusterSizeMin+5):50, 1)
clusterSizes <- sample(clusterSizeMin:clusterSizeMax, input$numClustersMultistage, replace = TRUE)
build_multistage(numClusters = input$numClustersMultistage, sampleSizePerCluster = input$sampleSizePerClusterMultistage, clusterSizes = clusterSizes)
}
})
}
shinyApp(ui, server)
2 코딩
라이센스
CC BY-SA-NC & GPL-3