This section shows some diagnostic plots for assessing the transformations and concordances.
## import country name lookup
lookup_matched <- read_rds(here::here("data/interim/003-matched_country_lookup.Rds"))
## import matched_series with country names
matched_series <- read_rds(here::here("data/interim/003-matched_tpp_indstat.Rds")) %>%
left_join(x = .,
y = lookup_matched,
by = "country.match") %>%
mutate(series.label = paste0(country.match, "~", name)) %>%
mutate(data_source = factor(data_source, ordered = TRUE,
levels = c("TPP", "INDSTAT.REV3", "INDSTAT.REV4"),
labels = c("TPP", "REV3", "REV4")))
## overall data-source trend -- add short name??
matched_byYear <-
matched_series %>%
group_by(country.match, year, data_source) %>%
summarise(n_ISIC2.3 = n(),
total.value = sum(value.3),
.groups = "drop_last") %>%
ungroup() %>%
group_by(country.match) %>%
mutate(n_data_source = n_distinct(data_source)) %>%
ungroup() %>%
mutate(n_data_source = as.factor(n_data_source)) %>%
left_join(x = .,
y = lookup_matched,
by = "country.match")
p.yearTrends_by_source <-
matched_byYear %>%
ggplot(aes(x = year,
y = total.value,
colour = data_source)) +
scale_color_discrete() +
geom_line(size = 0.7, alpha = 0.3) +
geom_point(size = 1, alpha = 0.7) +
facet_wrap(vars(name, country.match), scales = "free_y", ncol = 1, strip.position = "right")
p.yearTrends_by_source +
theme(legend.position = "top")
.csv
lookup_matched %>% write_csv(here::here("data/final/matched_country_lookup.csv"))
matched_series %>%
select(data_source, country.match, year, ISIC2.3, value.3) %>%
write_csv(here::here("data/final/matched_output.csv"))