Get the coordinates of Volcanos from Wikipedia page
You can find many information in Wikipedia pages but sometimes in can be long to gather them manually. R can be used as a sucker of information. Let see how it can retrieve the coordinates of volcanos in Central America. The web pages are here:
https://en.wikipedia.org/wiki/List_of_volcanoes_in_Mexico
https://en.wikipedia.org/wiki/List_of_volcanoes_in_Guatemala
https://en.wikipedia.org/wiki/List_of_volcanoes_in_El_Salvador
https://en.wikipedia.org/wiki/List_of_volcanoes_in_Nicaragua
https://en.wikipedia.org/wiki/List_of_volcanoes_in_Costa_Rica
library(maps)
library(mapdata)
library(maptools)
library(XML)
# Note a problem in readHTMLTable. It cannot be used directly with url:
# readHTMLTable(url, header=FALSE, stringsAsFactors = FALSE) generates an error # load a file in temporary directory
url <- "https://en.wikipedia.org/wiki/List_of_volcanoes_in_Mexico"
dest <- paste(tempdir(), "/List_of_volcanoes_in_Mexico.html", sep="")
download.file(url, dest)
tables <- readHTMLTable(dest, header=FALSE, stringsAsFactors = FALSE)
tables <- tables[unlist(lapply(tables, function(x) !is.null(x)))]
Volcano <- cbind(tables[[3]][-c(1,2),], Country="Mexico")
url <- "https://en.wikipedia.org/wiki/List_of_volcanoes_in_Guatemala"
dest <- paste(tempdir(), "/List_of_volcanoes_in_Guatemala.html", sep="")
download.file(url, dest)
tables <- readHTMLTable(dest, header=FALSE, stringsAsFactors = FALSE)
tables <- tables[unlist(lapply(tables, function(x) !is.null(x)))]
Volcano <- rbind(Volcano, cbind(tables[[2]][-1,], Country="Guatemala"))
url <- "https://en.wikipedia.org/wiki/List_of_volcanoes_in_El_Salvador"
dest <- paste(tempdir(), "/List_of_volcanoes_in_El_Salvador.html", sep="")
download.file(url, dest)
tables <- readHTMLTable(dest, header=FALSE, stringsAsFactors = FALSE)
tables <- tables[unlist(lapply(tables, function(x) !is.null(x)))]
Volcano <- rbind(Volcano, cbind(tables[[2]][-1,], Country="El Salvador"))
url <- "https://en.wikipedia.org/wiki/List_of_volcanoes_in_Nicaragua"
dest <- paste(tempdir(), "/List_of_volcanoes_in_Nicaragua.html", sep="")
download.file(url, dest)
tables <- readHTMLTable(dest, header=FALSE, stringsAsFactors = FALSE)
tables <- tables[unlist(lapply(tables, function(x) !is.null(x)))]
Volcano <- rbind(Volcano, cbind(tables[[2]][-1,], Country="Nicaragua"))
url <- "https://en.wikipedia.org/wiki/List_of_volcanoes_in_Costa_Rica"
dest <- paste(tempdir(), "/List_of_volcanoes_in_Costa_Rica", sep="")
download.file(url, dest)
tables <- readHTMLTable(dest, header=FALSE, stringsAsFactors = FALSE)
tables <- tables[unlist(lapply(tables, function(x) !is.null(x)))]
Volcano <- rbind(Volcano, cbind(tables[[2]][-c(1,2),], Country="Costa Rica"))
colnames(Volcano) <- c("Name", "Altitude", "Elevation", "Coordinates", "Eruption", "Country")
row.names(Volcano) <- as.character(seq(from=1, to=nrow(Volcano), by=1))
Volcano[Volcano[, "Coordinates"]=="-" | Volcano[, "Coordinates"]=="—" | is.na(Volcano[, "Coordinates"]), "Coordinates"] <- NA
Volcano <- Volcano[!is.na(Volcano[, "Coordinates"]), ]
lcoordinates <- strsplit(Volcano[, "Coordinates"], " / ")
lcoordinates2 <- lapply(lcoordinates, function(x) gsub(" \\(.+\\)", "", x[3]))
lcoordinates3 <- lapply(lcoordinates2, function(x) unlist(strsplit(x, "; ")))
lcoordinates4 <- lapply(lcoordinates3, function(x) ifelse(is.na(x), return(c(NA, NA)), x))
lcoordinates5 <- lapply(lcoordinates4, function(x) iconv(x, "", "ASCII", ""))
lc <- matrix(unlist(lcoordinates5), ncol=2, byrow=TRUE)
Volcano <- cbind(Volcano, latitude=as.numeric(lc[,1]), longitude=as.numeric(lc[,2]))
map('worldHires', xlim=range(Volcano[,"longitude"], na.rm=TRUE), ylim=range(Volcano[,"latitude"], na.rm=TRUE), mar=c(4, 5, 1, 1))
points(Volcano[,"longitude"], Volcano[,"latitude"], col="red", pch=19)
grid()
degAxis(1)
degAxis(2, las=1)
Commentaires
Enregistrer un commentaire