library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
df <- read_csv("data/kb_metadata_dan_filtered_openrefine_v_1.csv")
## Rows: 87221 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): author_st, title, publisher, publisher_st, place, place_st, misc_co...
## dbl (1): year_st
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
subset <- df %>% select(year_st, place_st)
subset <- subset %>% 
  drop_na()
subset <- subset %>% 
  filter(year_st >= 1600 & year_st <= 1900)
subset <- subset %>% 
  arrange(year_st)

Find unikke lokationer

unique_places <- subset %>% 
  select(place_st) %>% 
  distinct() 

Så skal der geokodes

library(tidygeocoder)
geo_coded_places <- unique_places %>% 
  geocode(place_st)

Det tager en krig, så vi har gemt resultatet, og indlæser det her:

geo_coded_places <- read_csv("data/geo_data.csv")
## Rows: 413 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): place_st
## dbl (2): lat, long
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_geo <- geo_coded_places
df_geo <- df_geo %>% 
  drop_na()

Take a look at the geodata:

df_geo
## # A tibble: 309 × 3
##    place_st     lat   long
##    <chr>      <dbl>  <dbl>
##  1 København   55.7  12.6 
##  2 Lybeck      53.9  10.7 
##  3 Leyden      43.5 -75.4 
##  4 Rostock     54.1  12.1 
##  5 Braniewo    54.4  19.8 
##  6 Helsingør   56.0  12.6 
##  7 Wittenberg  51.9  12.6 
##  8 Strassburg  48.6   7.75
##  9 Bremen      53.1   8.81
## 10 Aarhus      56.1  10.2 
## # ℹ 299 more rows

Join subsetted data with geo-data:

subset_geocoded <- inner_join(subset, df_geo, by = join_by(place_st))
library(maps)
## Warning: pakke 'maps' blev bygget under R version 4.4.1
## 
## Vedhæfter pakke: 'maps'
## Det følgende objekt er maskeret fra 'package:purrr':
## 
##     map
ggplot(subset_geocoded, aes(long, lat), color = "grey99") +
  borders("world") + geom_point()