Vignette Rmd source code (Not on CRAN to reduce load on DWD server through daily new builds and checks of the vignette)
library(rdwd)
links <- selectDWD(res="daily", var="more_precip", per="hist")
length(links) # 5583 stations - would take very long to download
## [1] 5583
# select only the relevant files:
data("metaIndex")
myIndex <- metaIndex[
metaIndex$von_datum < 20140101 &
metaIndex$bis_datum > 20161231 & metaIndex$hasfile , ]
data("fileIndex")
links <- fileIndex[
suppressWarnings(as.numeric(fileIndex$id)) %in% myIndex$Stations_id &
fileIndex$res=="daily" &
fileIndex$var=="more_precip" &
fileIndex$per=="historical" , "path" ]
links <- paste0("ftp://ftp-cdc.dwd.de/pub/CDC/observations_germany/climate/", links)
length(links) # 2001 elements - much better
## [1] 2014
In real life, use a permanent folder instead of tempdir()
. If some downloads fail (mostly because you’ll get kicked off the FTP server), you can just run the same code again and only the missing files will be downloaded.
If you really want to download 2k historical (large!) datasets, you definitely want to set sleep
to a much higher value.
For speed, we’ll only work with the first 3 urls.
localfiles <- dataDWD(links[1:3], dir=tempdir(), sleep=0.2, read=FALSE)
2k large datasets probably is way too much for memory, so we’ll use a custom reading function. It will only select the relevant time section and rainfall column. The latter will be named with the id extracted from the filename.
readVars(localfiles[1])[,-3] # we want the RS column
## Par Kurz Einheit
## RS RS Niederschlagshoehe mm
## RSF RSF Niederschlagsform nummerischer Code
## SH_TAG SH_TAG Schneehoehe cm
read2014_2016 <- function(file, fread=TRUE, ...)
{
out <- readDWD(file, fread=fread, ...)
out <- out[out$MESS_DATUM > as.POSIXct(as.Date("2014-01-01")) &
out$MESS_DATUM < as.POSIXct(as.Date("2016-12-31")) , ]
out <- out[ , c("MESS_DATUM", "RS")]
out$MESS_DATUM <- as.Date(out$MESS_DATUM) # might save some memory space...
# Station id as column name:
idstringloc <- unlist(gregexpr(pattern="tageswerte_RR_", file))
idstring <- substring(file, idstringloc+14, idstringloc+18)
colnames(out) <- c("date", idstring)
return(out)
}
str(read2014_2016(localfiles[1])) # test looks good
## 'data.frame': 1094 obs. of 2 variables:
## $ date : Date, format: "2014-01-02" "2014-01-03" ...
## $ 00006: num 1.8 0.4 2.3 0.7 0.2 0 0 8.3 0 4 ...
Now let’s apply this to all our files and merge the result.
library(pbapply) # progress bar for lapply loop
rain_list <- pblapply(localfiles, read2014_2016)
rain_df <- Reduce(function(...) merge(..., all=T), rain_list)
str(rain_df) # looks nice!
## 'data.frame': 1094 obs. of 4 variables:
## $ date : Date, format: "2014-01-02" "2014-01-03" ...
## $ 00006: num 1.8 0.4 2.3 0.7 0.2 0 0 8.3 0 4 ...
## $ 00015: num 1.2 0.2 1.5 1.5 0 0 0 5.1 0.3 0.6 ...
## $ 00019: num 3.3 0.4 2.9 0 0.2 0.1 0 6.3 0.2 3.1 ...
summary(rain_df) # 9 NAs in station 00006
## date 00006 00015 00019
## Min. :2014-01-02 Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.:2014-10-02 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000
## Median :2015-07-02 Median : 0.100 Median : 0.000 Median : 0.100
## Mean :2015-07-02 Mean : 2.149 Mean : 1.647 Mean : 2.152
## 3rd Qu.:2016-03-31 3rd Qu.: 1.900 3rd Qu.: 1.500 3rd Qu.: 1.900
## Max. :2016-12-30 Max. :68.400 Max. :54.000 Max. :53.600
## NA's :9
plot(rain_df$date, rain_df[,2], type="n", ylim=range(rain_df[,-1], na.rm=T),
las=1, xaxt="n", xlab="Date", ylab="Daily rainfall sum [mm]")
berryFunctions::monthAxis()
for(i in 2:ncol(rain_df)) lines(rain_df$date, rain_df[,i], col=sample(colours(), size=1))
plot(rain_df[,2:4]) # correlation plot only works for a few columns!
Let’s see the locations of our stations in an interactive map.
if(requireNamespace("leaflet", quietly=TRUE)){
data(geoIndex) ; library(leaflet)
mygeoIndex <- geoIndex[geoIndex$id %in% as.numeric(colnames(rain_df)[-1]),]
leaflet(data=mygeoIndex) %>% addTiles() %>%
addCircleMarkers(~lon, ~lat, popup=~display, stroke=T)
}
For a static map with scaleBar, OSMscale works nicely but currently still has a Java dependency, see https://github.com/brry/OSMscale#installation
if(requireNamespace("OSMscale")){
library(OSMscale)
pointsMap("lat", "lon", mygeoIndex, fx=2, fy=1, pargs=list(lwd=3),
col="blue", zoom=5)
}
Any feedback on this package (or this vignette) is very welcome via github or berry-b@gmx.de!