-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathscrapeVaccinationData20170223_2.R
More file actions
29 lines (23 loc) · 1.09 KB
/
scrapeVaccinationData20170223_2.R
File metadata and controls
29 lines (23 loc) · 1.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# I am attempting to scrape vaccinations recommendations
# from a Travel Health Pro website on a particular country.
# the example here is travel health pro page about afghanistan
# this is the page
# http://travelhealthpro.org.uk/country/1/afghanistan#Vaccine_recommendations
# install.packages("rvest")
library("rvest")
library(curl)
# step 1 scrap the page.
web_content <- read_html(curl('http://travelhealthpro.org.uk/country/1/afghanistan#Vaccine_recommendations', handle = new_handle("useragent" = "Chrome")))
# handle is required as extra data and curl package is required too
# extracting the data using pipes
vac_list <- web_content %>%
html_nodes(".accordion") %>%
html_nodes(".accordion-item") %>%
html_nodes("p") %>%
html_text(trim = FALSE)
# using gsub to remove the spaces and the line brake symbol
vac_list <- gsub("\n", "", vac_list)
vac_list <- gsub(" ", "", vac_list)
# this works and returns a list.
# check with albania
web_content <- read_html(curl('http://travelhealthpro.org.uk/country/2/albania#Vaccine_recommendations', handle = new_handle("useragent" = "Chrome")))