library(bitData)
#> Warning: replacing previous import 'jsonlite::flatten' by 'purrr::flatten' when
#> loading 'bitData'
#>
#> Attaching package: 'bitData'
#> The following object is masked from 'package:datasets':
#>
#> co2
library(tidyverse)
#> ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
#> ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
#> ✔ tibble 3.1.8 ✔ dplyr 1.0.10
#> ✔ tidyr 1.2.1 ✔ stringr 1.4.1
#> ✔ readr 2.1.2 ✔ forcats 0.5.2
#> Warning: package 'tibble' was built under R version 4.2.1
#> Warning: package 'tidyr' was built under R version 4.2.1
#> Warning: package 'dplyr' was built under R version 4.2.1
#> Warning: package 'stringr' was built under R version 4.2.1
#> Warning: package 'forcats' was built under R version 4.2.1
#> ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
#> ✖ dplyr::filter() masks stats::filter()
#> ✖ dplyr::lag() masks stats::lag()
데이터 살펴보기
Data summary
Name |
Piped data |
Number of rows |
72 |
Number of columns |
8 |
_______________________ |
|
Column type frequency: |
|
character |
1 |
numeric |
7 |
________________________ |
|
Group variables |
None |
Variable type: character
Variable type: numeric
id |
0 |
1 |
36.50 |
20.93 |
1 |
18.75 |
36.50 |
54.25 |
72 |
▇▇▇▇▇ |
year |
0 |
1 |
2.11 |
0.62 |
1 |
2.00 |
2.00 |
2.00 |
4 |
▁▇▁▁▁ |
attendance |
0 |
1 |
11.65 |
1.19 |
4 |
12.00 |
12.00 |
12.00 |
12 |
▁▁▁▁▇ |
participation |
0 |
1 |
5.89 |
3.17 |
0 |
2.00 |
6.00 |
9.00 |
10 |
▇▂▆▇▇ |
homework |
0 |
1 |
9.29 |
2.10 |
0 |
10.00 |
10.00 |
10.00 |
10 |
▁▁▁▁▇ |
midterm |
0 |
1 |
22.16 |
6.81 |
0 |
18.00 |
22.85 |
27.50 |
33 |
▁▁▅▇▆ |
final |
0 |
1 |
23.60 |
7.54 |
0 |
19.75 |
25.75 |
29.00 |
34 |
▁▁▃▅▇ |
탐색적 데이터 분석
exam_score <- exam_score %>%
mutate(total = attendance + participation + homework + midterm + final) %>%
mutate(grade = case_when( total > 90 ~ "A",
total > 80 & total <= 90 ~ "B",
total > 70 & total <= 80 ~ "C",
total > 60 & total <= 70 ~ "D",
TRUE ~ "F"))
two_grades <- exam_score %>%
filter(grade %in% c("A", "F"))
two_grades %>%
count(grade)
#> # A tibble: 2 × 2
#> grade n
#> <chr> <int>
#> 1 A 8
#> 2 F 13
two_grades %>%
pivot_longer(cols = attendance:final,
names_to = "구분",
values_to = "점수") %>%
group_by(grade, 구분) %>%
summarise(평균 = mean(점수)) %>%
ggplot(aes(x = grade, y = 평균, fill = 구분)) +
geom_col(position = "fill") +
theme_bw(base_family = "AppleGothic") +
labs(title = "A와 F 학생 평균 비교")
#> `summarise()` has grouped output by 'grade'. You can override using the
#> `.groups` argument.