This section shows some diagnostic plots for assessing the transformations and concordances.

## import country name lookup
lookup_matched <- read_rds(here::here("data/interim/003-matched_country_lookup.Rds"))

## import matched_series with country names
matched_series <- read_rds(here::here("data/interim/003-matched_tpp_indstat.Rds")) %>%
  left_join(x = .,
            y = lookup_matched,
            by = "country.match") %>%
  mutate(series.label = paste0(country.match, "~", name)) %>%
  mutate(data_source = factor(data_source, ordered = TRUE,
                              levels = c("TPP", "INDSTAT.REV3", "INDSTAT.REV4"),
                              labels = c("TPP", "REV3", "REV4")))

Comparison Plots

Annual Aggregate Gross Output by country and data source

Calculate annual gross output by country for each of the datasets (TPP, INSTAT Rev 3 and INDSTAT Rev 4)
Line-graph of annual gross output by country and datasets
For each country, the closer the series line-up the better the match and quality of final output series

## overall data-source trend -- add short name??
matched_byYear <- 
  matched_series %>%
  group_by(country.match, year, data_source) %>%
  summarise(n_ISIC2.3 = n(),
            total.value = sum(value.3),
            .groups = "drop_last") %>%
  ungroup() %>%
  group_by(country.match) %>%
  mutate(n_data_source = n_distinct(data_source)) %>%
  ungroup() %>%
  mutate(n_data_source = as.factor(n_data_source)) %>%
  left_join(x = .,
            y = lookup_matched,
            by = "country.match")

p.yearTrends_by_source <- 
  matched_byYear %>%
  ggplot(aes(x = year, 
             y = total.value, 
             colour = data_source)) +
  scale_color_discrete() +
  geom_line(size = 0.7, alpha = 0.3) +
  geom_point(size = 1, alpha = 0.7) +
  facet_wrap(vars(name, country.match), scales = "free_y", ncol = 1, strip.position = "right") 

p.yearTrends_by_source +
  theme(legend.position = "top")

Sectoral shares by country and dataset in overlapping years

For each country and year, the more similar the colour gradient of side by side bars the better the match and quality of final output series

p.sectorShares_by_year_source <- matched_series %>%
  group_by(country.match, year) %>%
  mutate(n_data_source = n_distinct(data_source)) %>%
  ungroup() %>%
  filter(n_data_source != 1) %>%
  ggplot(data = ., aes(x = data_source, y = value.3)) +
  geom_bar(position = "fill", stat = "identity", mapping = aes(fill = ISIC2.3)) +
  scale_fill_discrete() +
  facet_grid(rows = vars(country.match), cols = vars(year), scales = "free_y")

p.sectorShares_by_year_source +
  theme(axis.text.x = element_text(angle = 90),
        panel.spacing.x = unit(1, "points"),
        panel.border = element_rect(linetype = "solid", fill = NA),
        panel.background = element_rect(fill = NA),
        legend.position = "top")

Reference Table: Country Codes

Reference Code: Final Data Export

export data used in these plots as .csv

lookup_matched %>% write_csv(here::here("data/final/matched_country_lookup.csv"))
matched_series %>%
  select(data_source, country.match, year, ISIC2.3, value.3) %>%
  write_csv(here::here("data/final/matched_output.csv"))

TPP & INDSTAT Comparison Plots

Last update: 13 February, 2022

Comparison Plots

Annual Aggregate Gross Output by country and data source

Sectoral shares by country and dataset in overlapping years

Reference Table: Country Codes

Reference Code: Final Data Export