library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
df <- read_csv("data/kb_metadata_dan_filtered_openrefine_v_1.csv")
## Rows: 87221 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): author_st, title, publisher, publisher_st, place, place_st, misc_co...
## dbl (1): year_st
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
subset <- df %>% select(year_st, place_st)
subset <- subset %>%
drop_na()
subset <- subset %>%
filter(year_st >= 1600 & year_st <= 1900)
subset <- subset %>%
arrange(year_st)
Find unikke lokationer
unique_places <- subset %>%
select(place_st) %>%
distinct()
Så skal der geokodes
library(tidygeocoder)
geo_coded_places <- unique_places %>%
geocode(place_st)
Det tager en krig, så vi har gemt resultatet, og indlæser det her:
geo_coded_places <- read_csv("data/geo_data.csv")
## Rows: 413 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): place_st
## dbl (2): lat, long
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_geo <- geo_coded_places
df_geo <- df_geo %>%
drop_na()
Take a look at the geodata:
df_geo
## # A tibble: 309 × 3
## place_st lat long
## <chr> <dbl> <dbl>
## 1 København 55.7 12.6
## 2 Lybeck 53.9 10.7
## 3 Leyden 43.5 -75.4
## 4 Rostock 54.1 12.1
## 5 Braniewo 54.4 19.8
## 6 Helsingør 56.0 12.6
## 7 Wittenberg 51.9 12.6
## 8 Strassburg 48.6 7.75
## 9 Bremen 53.1 8.81
## 10 Aarhus 56.1 10.2
## # ℹ 299 more rows
Join subsetted data with geo-data:
subset_geocoded <- inner_join(subset, df_geo, by = join_by(place_st))
library(maps)
## Warning: pakke 'maps' blev bygget under R version 4.4.1
##
## Vedhæfter pakke: 'maps'
## Det følgende objekt er maskeret fra 'package:purrr':
##
## map
ggplot(subset_geocoded, aes(long, lat), color = "grey99") +
borders("world") + geom_point()