plots

################
### Imports ###
##############
# Output of 00_data_preparation.R

if (
    !exists("pqtfilepath_full") ||
        !exists("pqtfilepath_1423") ||
        !exists("plot_output_dir") ||
        !exists("m1predictspath")
) {
    source("10_runner.R")
}


if (
    !file.exists(pqtfilepath_full) ||
        !file.exists(pqtfilepath_1423)
) {
    source("00_data_preparation.R")
}

if (!file.exists(m1predictspath)) {
    source("02_predictmodels.R")
}

df <- arrow::read_parquet(pqtfilepath_full)

df_predicts <- arrow::read_parquet(m1predictspath)


###############
### Plot 1 ###
#############

### Create dataset
#-----------------

# Take weighted cross-sectional proportions for each year and calculate
# standard errors of those proportions (for a proportion p, this is
# given by √[p(1-p))/n)]: https://www2.sjsu.edu/faculty/gerstman/StatPrimer/conf-prop.htm
#
# The SEs are both quite small (around 0.5%) and difficult to plot without
# introducing excessive clutter. They are hence only included in the hover
# tooltip, but won't be shown with error bars.

pdf_pov <- df |>
    dplyr::filter(!is.na(armypos)) |>
    dplyr::count(year, armypos, wt = wicss) |>
    dplyr::group_by(year) |>
    dplyr::mutate(
        armypos = sjlabelled::to_label(armypos),
        frac = n / sum(n),
        fracpct = frac * 100,
        seprop = 100 * sqrt(frac * (1 - frac) / sum(n)),
        text = paste0(
            "Jahr: ",
            year,
            "\nAnteil Befragte: ",
            round(fracpct, 2),
            "%",
            "\nStandardfehler: +/-",
            round(seprop, 2),
            "%"
        )
    ) |>
    # Plotly does not like tibbles, particularly not grouped ones; make sure
    # this isn't one:
    as.data.frame()

### Plot
#-------

plypov <- plotly::plot_ly(
    data = pdf_pov,
    y = ~fracpct,
    x = ~year,
    type = "scatter",
    text = ~text,
    linetype = ~armypos,
    hoverinfo = "text",
    color = ~armypos,
    colors = "Set1",
    mode = "lines+markers"
) |>
    plotly::layout(
        xaxis = list(title = "Jahr", range = c(1998, 2024)),
        yaxis = list(title = "Anteil Befragte (%)", range = c(10, 60)),
        title = "Präferierte Entwicklung der Armee, 1999-2023",
        legend = list(
            title = list(
                text = "<b>Präferenz</b>"
            )
        ),
        margin = list(b = 100, t = -20),
        annotations = list(
            x = 1998,
            y = 20,
            text = "Daten: SHP, 1999-2023",
            showarrow = F,
            font = list(size = 10, color = "grey"),
            yshift = -150,
            xshift = -10,
            xanchor = "left"
        )
    ) |>
    plotly::config(locale = "de-CH") # For localization of the widget

### Print
#--------
plypov

### Save
#-------

# To be uploaded to wordpress and embedded as iframe;
# pasting the generated src directly triggers crashing
htmlwidgets::saveWidget(
    plypov,
    paste0(plot_output_dir, "jasegl_iframe_ply_pov.html")
)

###############
### Plot 2 ###
#############

# There are several sociodemographic variables (gender, age, nationality) in the
# dataset that are "somewhat interesting" in terms of their relationship with
# armypos, but creating separate plots for them all would clog up the post
# without providing enough meaningful insight to justify the use of space.
#
# To work around this, we'll integrate them into a single plotly object, and
# create buttons to allow users to switch between traces.
#
# Data frames containing the information needed for the subplots are created
# separately and then merged into one long frame.

# Datasets
#---------

### Sex dataset
pdf_sex <- df |>
    dplyr::filter(!is.na(armypos) & !is.na(sex)) |>
    dplyr::count(year, sex, armypos, wt = wicss) |>
    dplyr::group_by(year, sex) |>
    dplyr::mutate(
        sex = sjlabelled::to_label(sex) |>
            forcats::fct_recode("Männer" = "man", "Frauen" = "woman"),
        frac = n / sum(n),
        seprop = 100 * sqrt(frac * (1 - frac) / sum(n)),
        gvar = "sex"
    ) |>
    dplyr::filter(!sex == "other") |>
    dplyr::rename(group = sex)


### Nationality dataset
pdf_isch <- df |>
    dplyr::filter(!is.na(armypos) & !is.na(isCH)) |>
    dplyr::count(isCH, year, armypos, wt = wicss) |>
    dplyr::group_by(isCH, year) |>
    dplyr::mutate(
        isCH = ifelse(isCH == "Yes", "Schweizer:innen", "Ausländer:innen"),
        frac = n / sum(n),
        seprop = 100 * sqrt(frac * (1 - frac) / sum(n)),
        gvar = "isCH"
    ) |>
    dplyr::rename(group = isCH)


### Age dataset
# Initially plotted but subsequently removed in favour of
# cohort
pdf_age <- df |>
    dplyr::filter(!is.na(armypos) & !is.na(agecat)) |>
    dplyr::count(agecat, year, armypos, wt = wicss) |>
    dplyr::group_by(agecat, year) |>
    dplyr::mutate(
        frac = n / sum(n),
        seprop = 100 * sqrt(frac * (1 - frac) / sum(n)),
        gvar = "agecat"
    ) |>
    dplyr::rename(group = agecat) |>
    dplyr::arrange(as.character(group))


### nuts 2
pdf_hsh <- df |>
    dplyr::filter(!is.na(armypos) & !is.na(hsh_lang)) |>
    dplyr::count(hsh_lang, year, armypos, wt = wicss) |>
    dplyr::group_by(hsh_lang, year) |>
    dplyr::mutate(
        frac = n / sum(n),
        seprop = 100 * sqrt(frac * (1 - frac) / sum(n)),
        gvar = "hsh_lang",
        hsh_lang = forcats::fct_recode(
            hsh_lang,
            "Französisch" = "French",
            "Italienisch" = "Italian",
            "Deutsch" = "Swiss German"
        ),
    ) |>
    dplyr::rename(group = hsh_lang) |>
    dplyr::arrange(as.character(group))

### cohort
pdf_coh <- df |>
    dplyr::filter(!is.na(armypos) & !is.na(cohort)) |>
    dplyr::count(cohort, year, armypos, wt = wicss) |>
    dplyr::group_by(cohort, year) |>
    dplyr::mutate(
        frac = n / sum(n),
        seprop = 100 * sqrt(frac * (1 - frac) / sum(n)),
        gvar = "cohort",
        cohort = forcats::fct_relevel(
            cohort,
            "Pre1945",
            "Boomer",
            "GenX",
            "Millenial",
            "Post1996"
        ) |>
            forcats::fct_recode(
                "Vor 1946" = "Pre1945",
                "1946-1964" = "Boomer",
                "1965-1980" = "GenX",
                "1981-1996" = "Millenial",
                "Nach 1996" = "Post1996"
            ),
    ) |>
    dplyr::rename(group = cohort) |>
    dplyr::arrange(group)

### Merge
# Returns a long tibble; "gvar" indicates the grouping variable and can be used
# for subsetting
pdf_multi <- list(pdf_sex, pdf_isch, pdf_age, pdf_hsh, pdf_coh) |>
    purrr::reduce(dplyr::full_join)

# Common operations: ap to label, fracs to pct, legends, hover labels
pdf_multi <- pdf_multi |>
    dplyr::mutate(
        armypos = sjlabelled::to_label(armypos),
        fracpct = frac * 100,
        legendtext = paste0(group, "\n(", armypos, ")"),
        text = paste0(
            "Jahr: ",
            year,
            "\nGruppe: ",
            legendtext,
            "\nAnteil: ",
            round(fracpct, 2),
            "%",
            "\nStandardfehler: +/-",
            round(seprop, 2),
            "%",
            "\nn = "
        )
    ) |>
    as.data.frame() # Plotly can trip up on tibbles if they contain groups

### Trace helper
#---------------

# Constructs different traces based on filter string; the visibility argument is
# included so that one trace (nationality) can be selected for initial display
MakeTrace <- function(p, fstring, visibility = FALSE) {
    plotly::add_trace(
        p = p,
        data = pdf_multi[pdf_multi$gvar == fstring, ],
        x = ~year,
        y = ~fracpct,
        color = ~group,
        mode = "lines+markers",
        type = "scatter",
        linetype = ~armypos,
        legendgroup = ~armypos,
        text = ~text,
        hoverinfo = "text",
        visible = visibility,
        name = ~legendtext
    )
}

### Assemble base plot
#----------------------

p_multi_init <- plotly::plot_ly() |>
    MakeTrace(p = _, fstring = "isCH", visibility = "legendonly") |>
    MakeTrace(p = _, fstring = "sex") |>
    MakeTrace(p = _, fstring = "agecat") |>
    MakeTrace(p = _, fstring = "hsh_lang") |>
    MakeTrace(p = _, fstring = "cohort")


### Visibility helper
#--------------------

# Plotly traces can have three visibility states:
# - FALSE (no line, no legend),
# - TRUE (line, legend)
# -'legendonly' (greyed-out legend, no line, toggles T/F on click)
#
# Initial visibility states are stored (for compiled plotly objects) at
# [obj]$x$data[[tracenumber]]$visible
# The states for the /initially visible plot/ are set once the full plot has
# been assembled.
#
# Plotly buttons are configured via lists passed to plotly's layout() function.
# They can be set to trigger specific visibility states on button press by
# assigning them an object containing these states in the correct order (i.e.
# following the order) of the traces.
#
# AssembleVisVec() generates visibility state objects for updatemenu() by
# parsing the trace names. It takes two arguments:
# - Regex for the group (to be set from FALSE (invisible) to "legendonly")
# - Regex for the subset of the group to be set to TRUE (visible) initially
#
# It returns a list. This is required since type mixing (boolean T/F + string
# "legendonly" is desired; coercion to character vectors results in parsing
# failures and ugly plots
# (see: https://github.com/plotly/plotly.R/issues/1325)

# Name helper: Compile the plot if it isn't already, then grab the names
GetTraceNames <- function(x) {
    if (plotly:::is.evaled(x) == FALSE) {
        x <- plotly::plotly_build(x)
    }
    purrr::map_chr(x$x$data, "name")
}

# Main function
AssembleVisVec <- function(
    input_obj,
    rgx_legend,
    rgx_truesubset
) {
    namevec <- GetTraceNames(input_obj)
    baselist <- rep(list(FALSE), length(namevec))

    set_lo_at_pos <-
        which(
            grepl(
                namevec,
                pattern = rgx_legend,
                perl = TRUE
            )
        )

    set_true_at_pos <- which(
        grepl(
            namevec,
            pattern = rgx_legend,
            perl = TRUE
        ) &
            grepl(
                namevec,
                pattern = rgx_truesubset,
                perl = TRUE
            )
    )

    baselist[set_lo_at_pos] <- "legendonly"
    baselist[set_true_at_pos] <- TRUE

    return(baselist)
}

### Button definition
#---------------------

updmen_multi <- list(
    list(
        active = 0, # Nationality is initially visible, so button must show as selected
        type = "buttons",
        buttons = list(
            list(
                label = "Nationalität",
                method = "update",
                args = list(
                    list(
                        visible = AssembleVisVec(
                            p_multi_init,
                            "(Schweiz|Ausländer)",
                            "Stark"
                        )
                    ),
                    list(
                        title = "Präferierte Entwicklung der Armee nach Nationalität, 1999-2023"
                    )
                )
            ),
            list(
                label = "Geschlecht",
                method = "update",
                args = list(
                    list(
                        visible = AssembleVisVec(
                            p_multi_init,
                            "(Frauen|Männer)",
                            "Stark"
                        )
                    ),
                    list(
                        title = "Präferierte Entwicklung der Armee nach Geschlecht, 1999-2023"
                    )
                )
            ),
            # list(
            #     label = "Altersgruppe",
            #     method = "update",
            #     args = list(
            #         list(
            #             visible = AssembleVisVec(
            #                 p_multi_init,
            #                 "^\\d{2}",
            #                 "Stark"
            #             )
            #         ),
            #         list(
            #             title = "Präferierte Entwicklung der Armee nach Altersgruppe, 1999-2023"
            #         )
            #     )
            # ),
            list(
                label = "Sprachregion",
                method = "update",
                args = list(
                    list(
                        visible = AssembleVisVec(
                            p_multi_init,
                            "Deutsch|Französisch|Italienisch",
                            "Stark"
                        )
                    ),
                    list(
                        title = "Präferierte Entwicklung der Armee nach Sprachregion, 1999-2023"
                    )
                )
            ),
            list(
                label = "Generation",
                method = "update",
                args = list(
                    list(
                        visible = AssembleVisVec(
                            p_multi_init,
                            "19.*",
                            "Stark"
                        )
                    ),
                    list(
                        title = "Präferierte Entwicklung der Armee nach Sprachregion, 1999-2023"
                    )
                )
            )
        )
    )
)

# Final plot assembly
#--------------------

p_multi <- p_multi_init |>
    plotly::layout(
        updatemenus = updmen_multi,
        xaxis = list(title = "Jahr", range = c(1998, 2024)),
        yaxis = list(title = "Anteil (%)", range = c(10, 75)),
        title = "Präferierte Entwicklung der Armee nach Nationalität, 1999-2023",
        legend = list(
            title = list(
                text = '<b>Legende</b><br><span class="small">Zum Anzeigen klicken</span>'
            )
        ),
        margin = list(b = 100, t = -20),
        annotations = list(
            x = 1998,
            y = 20,
            text = "Daten: SHP, 1999-2023",
            showarrow = FALSE,
            font = list(size = 10, color = "grey"),
            yshift = -150,
            xshift = -10,
            xanchor = "left"
        )
    ) |>
    plotly::config(locale = "de-CH")


### Rebuild
p_multi_out <- plotly::plotly_build(p_multi)

### Set the initial visibility state
vec_modvisat <-
    which(grepl(
        GetTraceNames(p_multi_out),
        pattern = "(Ausländer|Schweizer).*Starke"
    ))

for (i in vec_modvisat) {
    p_multi_out$x$data[[i]]$visible <- TRUE
}

### Print
p_multi_out

### Save
htmlwidgets::saveWidget(
    p_multi_out,
    paste0(plot_output_dir, "jasegl_iframe_ply_multi.html")
)
################
### Plot 3 ####
##############

# Dataset
#--------

### Load data from prediction model

ptycolvec <- c(
    "Grüne" = "#99CC33",
    "SP" = "#FF0000",
    "Die Mitte" = "#FF9900",
    "FDP" = "#0066CC",
    "SVP" = "#008000",
    "GLP" = "#acd436",
    "Sonstige" = "#aeaeae"
)


pdf_pty <- df_predicts


pdf_pty <- pdf_pty |>
    # Subset should (maybe, hopefully? help with performance a little bit)
    dplyr::filter(!party == "Other") |>
    dplyr::mutate(
        party = forcats::fct_recode(
            party,
            "Die Mitte" = "DM",
            "Grüne" = "GP"
        ),
        isMale = forcats::fct_recode(
            isMale,
            "Männer" = "Yes",
            "Frauen" = "No"
        ),
        text = paste0(
            "Partei: ",
            party,
            "\nJahr: ",
            year,
            "\nVorausgesagte Zustimmungswahrscheinlichkeit: ",
            round(predicted_pct, 2),
            "%",
            "\n95%-Konfidenzintervall: ",
            round(conf.low_pct, 2),
            "-",
            round(conf.high_pct, 2),
            "%"
        ),
        year = as.numeric(as.character(year))
    )


### Hacky things with crosstalk
#---------------------------------

# We want people to be able to dynamically select the "model individuals" for
# whom predictions are to be displayed based on the grouping variables
# available in the dataset. Given the sheer number of traces involved,
# doing this with native plotly is not an option.
#
# A supremely helpful post by pholzm on Stackoverflow
# (https://stackoverflow.com/questions/71494308/r-plotly-separate-functional-legends/71657272#71657272)
# presents an alternative approach using a dynamically-filtered shared data
# object created with crosstalk, which is implemented below.
#
# Unfortunately, crosstalk does not natively support (1) providing an initial
# selection of data; (2) export to a self-contained widget or (3) suppression
# of the "(All)" filter option.
#
# (1) Is addressed by installing a branch of crosstalk that has been
# lingering for a few years (https://github.com/rstudio/crosstalk/pull/70);
# see 10_runner.R for the package install.
#
# (2) and 83) second can be addressed by using pandoc to create a self-contained
# result, reading the HTML back in and then running ugly (but somehow functional)
# regex-based replacement operations to fix the broken HTML and un-label the
# all button.
#
# There are some inherent performance issues to this approach. First and foremost,
# it involves handing a 41,600 × 15 dataset to the client for processing, which
# takes a second The size is reduced somewhat via the exclusion of the nation-
# ality variable, but still suboptimal. Secondly, it involves the partially
# redundant loading of several js libraries, which is an inherent limitation
# of loading everything via separate iframes.

library(crosstalk)

ctobj <- SharedData$new(pdf_pty)

cto <- crosstalk::bscols(
    widths = c(2, NA),
    list(
        crosstalk::filter_select(
            "age",
            "Alter",
            ctobj,
            ~age,
            allLevels = FALSE,
            multiple = FALSE,
            selected = "45"
        ),
        crosstalk::filter_select(
            "canton",
            "Kanton",
            ctobj,
            ~canton,
            allLevels = FALSE,
            multiple = FALSE,
            selected = "BE"
        ),
        crosstalk::filter_select(
            "gend",
            "Geschlecht",
            ctobj,
            ~isMale,
            allLevels = FALSE,
            multiple = FALSE,
            selected = "Männer"
        ),
        crosstalk::filter_select(
            "outvar",
            "Modellierte Wahrscheinlichkeit",
            ctobj,
            ~outcome,
            allLevels = FALSE,
            multiple = FALSE,
            selected = "Keine Armee"
        )
    ),
    plotly::plot_ly(
        data = ctobj,
        x = ~year,
        y = ~predicted_pct,
        color = ~party,
        colors = ~ptycolvec,
        mode = "lines+markers",
        type = "scatter",
        text = ~text,
        hoverinfo = "text",
        legendgroup = ~party
    ) |>
        plotly::layout(
            xaxis = list(title = "Jahr", range = c(1998, 2024)),
            yaxis = list(
                title = "Zustimmungswahrscheinlichkeit (%)",
                range = c(0, 100)
            ),
            title = "Vorausgesagte Haltung zur Armee, 1999-2023",
            legend = list(
                title = list(
                    text = "<b>Parteisympathie</b>"
                )
            ),
            margin = list(b = 100, t = -20),
            annotations = list(
                x = 2020,
                text = "Daten: SHP, 1999-2023<br>(Voraussagen beziehen sich auf Schweizer:innen)",
                showarrow = FALSE,
                font = list(size = 10, color = "grey"),
                yshift = -120,
                xanchor = "left"
            )
        )
)

### Save
htmltools::save_html(
    cto,
    paste0(plot_output_dir, "jasegl_iframe_ptymenu.html")
)

# Embed the libraries
rmarkdown::pandoc_self_contained_html(
    paste0(plot_output_dir, "jasegl_iframe_ptymenu.html"),
    paste0(plot_output_dir, "jasegl_iframe_ptymenu_sc.html")
)

# The result from pandoc has some issues initially, namely a badly encoded
# header that makes Wordpress refuse to nicely display it as an iframe.
# We work around this by reading it as text, replacing the offending passage,
# and writing it again.
htmlo <- readr::read_file(paste0(
    plot_output_dir,
    "jasegl_iframe_ptymenu_sc.html"
))

htmlo2 <- htmlo |>
    stringr::str_replace(
        stringr::fixed(
            '<p>&lt;!DOCTYPE html&gt; <html lang="en"> <head> <meta charset="utf-8" />
<style>body{background-color:white;}</style>'
        ),
        '<!DOCTYPE html>\n<html>\n<head>\n<meta charset=\"utf-8\" />'
    ) |>
    # While we're at it, empty the (All) button; not pretty, but at least
    # doesn't suggest the existence of an option that isn't really there.
    stringr::str_replace(
        stringr::fixed('{options:[{value:"",label:"(All)"}]'),
        '{options:[{value:"",label:""}]'
    ) |>
    # Make sure to not use a serif font for the button labels...
    stringr::str_replace(
        stringr::fixed(
            ".crosstalk-input .control-label{margin-bottom:0;vertical-align:middle}"
        ),
        ".crosstalk-input .control-label{margin-bottom:0;vertical-align:middle;font-family:sans-serif;}"
    ) |>
    # ...or the buttons themselves
    stringr::str_replace(
        stringr::fixed(
            ".selectize-input input {\ncolor: #333333;\nfont-family: inherit;"
        ),
        ".selectize-input input {\ncolor: #333333;\nfont-family: sans-serif;"
    )

readr::write_file(
    htmlo2,
    paste0(plot_output_dir, "jasegl_iframe_ptymenu_sc.html")
)