한국갤럽 PDF

전국지표조사(NBS) 수도권 역대 지지율을 살펴보자.

저자
소속

1 데이터

1.1 자료실 목록

코드
library(tidyverse)
library(rvest)

nbs_url <- "http://nbsurvey.kr/files"

nbs_bbs <- read_html(nbs_url)

nbs_bbs_raw <- nbs_bbs |> 
  html_nodes('.pagination-large') |> 
  html_nodes('.btn-page') |> 
  html_nodes('a') |> 
  html_attr('href')

nbs_bbs_lists <- c('http://nbsurvey.kr/files?mode=list', nbs_bbs_raw[2:length(nbs_bbs_raw)], 'http://nbsurvey.kr/files?mode=list&board_page=11')

nbs_bbs_lists

1.2 페이지별 보고서 링크

코드

page_html <- read_html(nbs_bbs_list[2])

page_links <- page_html |> 
  html_nodes("a") |> 
  html_attr('href') 

page_report_links_raw <- page_links[str_detect(page_links, "vid=")] 

page_report_links <- page_report_links_raw[!is.na(page_report_links_raw)]

page_report_links

1.3 보고서 파일

코드
library(httr)

report_html <- read_html(page_report_links[1])

report_filename <- report_html |> 
  html_nodes('a.file-download') |> 
  html_text() |> 
  str_extract(pattern = "\\((NBS)\\)_\\d+차_결과\\.zip")


report_download_url <- glue::glue("http://nbsurvey.kr/wp-content/plugins/mangboard/includes/mb-file.php",
                                  "?path=MjAyMy8wMy8zMC9GMjc1XyhOQlMpXzkz7LCoX%2BqysOqzvC56aXA%3D",
                                  "&type=download",
                                  "&file_name={I(report_filename)}")

# fs::dir_create("data/NBS")

download.file(report_download_url, mode='wb', destfile = glue::glue("data/NBS/{report_filename}"))

2 전체 파일 다운로드

2.1 한 페이지

코드

# for(i in 1:length(page_report_links)) {
#   cat("\n", i, "----------", page_report_links[i], "\n")
#   download_zip_report(page_report_links[i])
# }

download_zip_report <- function(page_report_link) {
  
  report_html <- read_html(page_report_link)

  report_filename <- report_html |> 
    html_nodes('a.file-download') |> 
    html_text() |> 
    str_extract(pattern = "\\((NBS)\\)_\\d+차_결과\\.zip")
  
  
  report_download_url <- glue::glue("http://nbsurvey.kr/wp-content/plugins/mangboard/includes/mb-file.php",
                                    "?path=MjAyMy8wMy8zMC9GMjc1XyhOQlMpXzkz7LCoX%2BqysOqzvC56aXA%3D",
                                    "&type=download",
                                    "&file_name={I(report_filename)}")
  
  download.file(report_download_url, mode='wb', destfile = glue::glue("data/NBS/{report_filename}"))
}

walk(page_report_links, download_zip_report)

2.2 젠체 페이지

코드

nbs_bbs_lists

get_page_links <- function(nbs_bbs_list) {
  
  page_html <- read_html(nbs_bbs_list)

  page_links <- page_html |> 
    html_nodes("a") |> 
    html_attr('href') 
  
  page_report_links_raw <- page_links[str_detect(page_links, "vid=")] 
  
  page_report_links <- page_report_links_raw[!is.na(page_report_links_raw)]
  
  page_report_links
}

nbs_bbs_tbl <- nbs_bbs_lists |> 
  enframe(value = "page_url") |> 
  mutate(data = map(page_url, get_page_links))

nbs_bbs_tbl |> 
  unnest(data)


download_zip_report <- function(page_report_link) {
  
  report_html <- read_html(page_report_link)

  report_filename <- report_html |> 
    html_nodes('a.file-download') |> 
    html_text() |> 
    str_extract(pattern = "\\((NBS)\\)_\\d+차_결과\\.zip")
  
  
  report_download_url <- glue::glue("http://nbsurvey.kr/wp-content/plugins/mangboard/includes/mb-file.php",
                                    "?path=MjAyMy8wMy8zMC9GMjc1XyhOQlMpXzkz7LCoX%2BqysOqzvC56aXA%3D",
                                    "&type=download",
                                    "&file_name={I(report_filename)}")
  
  download.file(report_download_url, mode='wb', destfile = glue::glue("data/NBS/{report_filename}"))
}

walk(page_report_links, download_zip_report)