사용자가 선택한 데이터에 대해 연속확률분포의 적합도를 분석하고 시각화할 수 있는 도구입니다. 이 애플리케이션을 통해 통계학적 분석을 수행하고 결과를 실시간으로 탐색할 수 있습니다.
Shiny 앱: 데이터가 어떤 이론적 분포를 따르는지 확인하기 위해 히스토그램과 이론적 확률밀도함수를 비교합니다. Q-Q 플롯을 통해 데이터의 분위수가 이론적 분위수와 얼마나 일치하는지 시각적으로 평가합니다. 또한, 데이터의 누적분포함수와 이론적 누적분포함수를 비교하여 데이터의 적합도를 평가합니다. 균일분포는 Kolmogorov-Smirnov 검정을 통해, 정규분포는 Shapiro-Wilk 검정을 통해, 지수분포는 Kolmogorov-Smirnov 검정을 통해 데이터의 적합도를 평가하고 검정 결과의 p-값 통해 분포의 적합성 여부를 결정합니다.
코딩:
1 Shiny 앱
#| label: shinylive-continous-distribution-fitting
#| viewerWidth: 800
#| viewerHeight: 700
#| standalone: true
library(ggplot2)
library(moments)
library(palmerpenguins)
ui <- fluidPage(
titlePanel("Continuous Distribution Fitting"),
sidebarLayout(
sidebarPanel(
radioButtons("data_source", "Select Data Source",
choices = c("Upload CSV", "penguins", "mtcars"),
inline = TRUE),
conditionalPanel(
condition = "input.data_source == 'Upload CSV'",
fileInput("file", "Choose CSV File",
accept = c("text/csv", "text/comma-separated-values,text/plain", ".csv"))
),
selectInput("variable", "Select Variable", choices = NULL),
radioButtons("distribution", "Select Distribution",
choices = c("Uniform", "Normal", "Exponential"))
),
mainPanel(
tabsetPanel(
tabPanel("Summary", verbatimTextOutput("summary")),
tabPanel("Histogram", plotOutput("histogram")),
tabPanel("Q-Q Plot", plotOutput("qqplot")),
tabPanel("CDF Plot", plotOutput("cdfplot"))
),
h4("Usage Instructions:"),
tags$ol(
tags$li("Select a data source (Upload CSV, penguins, or mtcars)."),
tags$li("If 'Upload CSV' is selected, choose a CSV file to upload."),
tags$li("Select a continuous variable from the dropdown menu."),
tags$li("Choose a distribution to fit (Uniform, Normal, or Exponential)."),
tags$li("The fitting results will be automatically updated based on your selection."),
tags$li("Explore the summary statistics, histogram, Q-Q plot, and CDF plot in the respective tabs.")
)
)
)
)
server <- function(input, output, session) {
data <- reactive({
if (input$data_source == "Upload CSV") {
req(input$file)
read.csv(input$file$datapath)
} else if (input$data_source == "penguins") {
penguins[, c("bill_length_mm", "bill_depth_mm", "flipper_length_mm", "body_mass_g")]
} else if (input$data_source == "mtcars") {
mtcars[, c("mpg", "disp", "hp", "drat", "wt", "qsec")]
}
})
observe({
req(data())
updateSelectInput(session, "variable", choices = names(data()))
})
variable <- reactive({
req(input$variable)
data()[[input$variable]]
})
fit_result <- reactive({
req(input$distribution, variable())
distribution <- tolower(input$distribution)
if (distribution == "uniform") {
min_val <- min(variable(), na.rm = TRUE)
max_val <- max(variable(), na.rm = TRUE)
test_result <- ks.test(variable(), "punif", min = min_val, max = max_val)
} else if (distribution == "normal") {
test_result <- shapiro.test(variable())
} else if (distribution == "exponential") {
rate_val <- 1/mean(variable(), na.rm = TRUE)
test_result <- ks.test(variable(), "pexp", rate = rate_val)
}
list(
distribution = distribution,
test_result = test_result,
params = switch(distribution,
"uniform" = list(min = min_val, max = max_val),
"normal" = list(mean = mean(variable(), na.rm = TRUE), sd = sd(variable(), na.rm = TRUE)),
"exponential" = list(rate = rate_val))
)
})
output$summary <- renderPrint({
req(fit_result())
cat("Selected Distribution:", fit_result()$distribution, "\n\n")
cat("Descriptive Statistics:\n")
print(summary(variable()))
cat("\nSkewness:", skewness(variable()), "\n")
cat("Kurtosis:", kurtosis(variable()), "\n\n")
if (fit_result()$distribution == "uniform") {
cat("Kolmogorov-Smirnov Test for Uniform Distribution\n")
} else if (fit_result()$distribution == "normal") {
cat("Shapiro-Wilk Normality Test\n")
} else if (fit_result()$distribution == "exponential") {
cat("Kolmogorov-Smirnov Test for Exponential Distribution\n")
}
cat("p-value:", fit_result()$test_result$p.value, "\n")
})
output$histogram <- renderPlot({
req(fit_result())
distribution <- fit_result()$distribution
params <- fit_result()$params
ggplot(data.frame(x = variable()), aes(x)) +
geom_histogram(aes(y = ..density..), bins = 30, color = "black", fill = "lightblue") +
stat_function(fun = switch(distribution,
"uniform" = dunif,
"normal" = dnorm,
"exponential" = dexp),
args = params,
color = "red", size = 1) +
labs(title = "Histogram with Fitted Distribution",
x = input$variable, y = "Density")
})
output$qqplot <- renderPlot({
req(fit_result())
distribution <- fit_result()$distribution
params <- fit_result()$params
ggplot(data.frame(x = variable()), aes(sample = x)) +
stat_qq(distribution = switch(distribution,
"uniform" = qunif,
"normal" = qnorm,
"exponential" = qexp),
dparams = unlist(params),
color = "blue") +
stat_qq_line(distribution = switch(distribution,
"uniform" = qunif,
"normal" = qnorm,
"exponential" = qexp),
dparams = unlist(params),
color = "red") +
labs(title = paste("Q-Q Plot for", distribution),
x = "Theoretical Quantiles", y = "Sample Quantiles")
})
output$cdfplot <- renderPlot({
req(fit_result())
distribution <- fit_result()$distribution
params <- fit_result()$params
ggplot(data.frame(x = variable()), aes(x)) +
stat_ecdf(geom = "step", color = "blue", size = 1) +
stat_function(fun = switch(distribution,
"uniform" = punif,
"normal" = pnorm,
"exponential" = pexp),
args = unlist(params),
color = "red", size = 1) +
labs(title = paste("Empirical and Theoretical CDF for", distribution),
x = input$variable, y = "Cumulative Probability")
})
}
shinyApp(ui, server)
2 코딩
라이센스
CC BY-SA-NC & GPL-3