Get the coordinates of Volcanos from Wikipedia page

You can find many information in Wikipedia pages but sometimes in can be long to gather them manually. R can be used as a sucker of information. Let see how it can retrieve the coordinates of volcanos in Central America. The web pages are here:


# Note a problem in readHTMLTable. It cannot be used directly with url:

# readHTMLTable(url, header=FALSE, stringsAsFactors = FALSE) generates an error # load a file in temporary directory
url <- ""
dest <- paste(tempdir(), "/List_of_volcanoes_in_Mexico.html", sep="")
download.file(url, dest)

tables <- readHTMLTable(dest, header=FALSE, stringsAsFactors = FALSE)
tables <- tables[unlist(lapply(tables, function(x) !is.null(x)))]
Volcano <- cbind(tables[[3]][-c(1,2),], Country="Mexico")

url <- ""
dest <- paste(tempdir(), "/List_of_volcanoes_in_Guatemala.html", sep="")
download.file(url, dest)

tables <- readHTMLTable(dest, header=FALSE, stringsAsFactors = FALSE)
tables <- tables[unlist(lapply(tables, function(x) !is.null(x)))]
Volcano <- rbind(Volcano, cbind(tables[[2]][-1,], Country="Guatemala"))

url <- ""
dest <- paste(tempdir(), "/List_of_volcanoes_in_El_Salvador.html", sep="")
download.file(url, dest)

tables <- readHTMLTable(dest, header=FALSE, stringsAsFactors = FALSE)
tables <- tables[unlist(lapply(tables, function(x) !is.null(x)))]
Volcano <- rbind(Volcano, cbind(tables[[2]][-1,], Country="El Salvador"))

url <- ""
dest <- paste(tempdir(), "/List_of_volcanoes_in_Nicaragua.html", sep="")
download.file(url, dest)

tables <- readHTMLTable(dest, header=FALSE, stringsAsFactors = FALSE)
tables <- tables[unlist(lapply(tables, function(x) !is.null(x)))]
Volcano <- rbind(Volcano, cbind(tables[[2]][-1,], Country="Nicaragua"))

url <- ""
dest <- paste(tempdir(), "/List_of_volcanoes_in_Costa_Rica", sep="")
download.file(url, dest)

tables <- readHTMLTable(dest, header=FALSE, stringsAsFactors = FALSE)
tables <- tables[unlist(lapply(tables, function(x) !is.null(x)))]
Volcano <- rbind(Volcano, cbind(tables[[2]][-c(1,2),], Country="Costa Rica"))

colnames(Volcano) <- c("Name", "Altitude", "Elevation", "Coordinates", "Eruption", "Country")
row.names(Volcano) <- as.character(seq(from=1, to=nrow(Volcano), by=1))
Volcano[Volcano[, "Coordinates"]=="-" | Volcano[, "Coordinates"]=="—" |[, "Coordinates"]), "Coordinates"] <- NA
Volcano <- Volcano[![, "Coordinates"]), ]

lcoordinates <- strsplit(Volcano[, "Coordinates"], " / ")
lcoordinates2 <- lapply(lcoordinates, function(x) gsub(" \\(.+\\)", "", x[3]))
lcoordinates3 <- lapply(lcoordinates2, function(x) unlist(strsplit(x, "; ")))
lcoordinates4 <- lapply(lcoordinates3, function(x) ifelse(, return(c(NA, NA)), x))
lcoordinates5 <- lapply(lcoordinates4, function(x) iconv(x, "", "ASCII", ""))
lc <- matrix(unlist(lcoordinates5), ncol=2, byrow=TRUE)
Volcano <- cbind(Volcano, latitude=as.numeric(lc[,1]), longitude=as.numeric(lc[,2]))

map('worldHires', xlim=range(Volcano[,"longitude"], na.rm=TRUE), ylim=range(Volcano[,"latitude"], na.rm=TRUE), mar=c(4, 5, 1, 1))
points(Volcano[,"longitude"], Volcano[,"latitude"], col="red", pch=19)

degAxis(2, las=1)


Posts les plus consultés de ce blog

Standard error from Hessian Matrix... what can be done when problem occurs

Install treemix in ubuntu 20.04

stepAIC from package MASS with AICc