## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  warning = FALSE, 
  message = FALSE,
  comment = "#>"
)

## ----eval=FALSE---------------------------------------------------------------
#  install.packages("tntpr")

## ----load_packages------------------------------------------------------------
# NOTE: We are not simply loading `tidyverse` in this vignette due to how vignettes
# are built to create the documentation website. In other contexts, however, we
# would simply use `library(tidyverse)` instead of loading many of the packages individually.
library(tntpr)
library(ggplot2)
library(dplyr)
library(tidyr)
library(purrr)
library(stringr)
library(forcats)
library(ggalt)
library(ggridges)

## -----------------------------------------------------------------------------
# base plot we will reuse in this section
plt <- ggplot(ggplot2::mpg, aes(displ, hwy)) +
  geom_point() +
  labs(
    title = "Cars with higher displacement\nhave a lower MPG",
    subtitle = "Displacement vs. MPG",
    caption = "Data from ggplot's mpg dataset",
    x = "Engine Displ.",
    y = "MPG"
  )

plt +
  tntp_style()

## -----------------------------------------------------------------------------
plt +
  tntp_style(show_axis_titles = TRUE, family = "sans", base_size = 20) +
  theme(plot.subtitle = ggplot2::element_text(family = "serif", size = 25))

## -----------------------------------------------------------------------------
plt +
  tntp_style(show_axis_titles = TRUE) +
  theme(panel.border = element_rect(color = "black", fill = NA))

## -----------------------------------------------------------------------------
tntp_colors("green", "mint")

## -----------------------------------------------------------------------------
show_tntp_colors("green", "moss", "mint")

## -----------------------------------------------------------------------------
# Note: the cex_label parameter is used to adjust the relative font size
show_tntp_colors(cex_label = 0.7)

## -----------------------------------------------------------------------------
tntp_palette("likert_6")

## -----------------------------------------------------------------------------
show_tntp_palette("likert_6", reverse = TRUE)

## -----------------------------------------------------------------------------
show_tntp_palette()

## ----message = FALSE----------------------------------------------------------
# load fake data into global environment
# remove all salaries of 0
county_data <- tntpr::fake_county |> 
  filter(t_salary > 0)

avg_salary <- county_data |> 
  filter(t_salary != 0) |> 
  group_by(school_year) |> 
  summarize(avg_salary = mean(t_salary, na.rm = TRUE), .groups = "drop")

## -----------------------------------------------------------------------------
base_font_size <- 16

## ----message = FALSE----------------------------------------------------------
#Make plot
ggplot(avg_salary, aes(x = school_year, y = avg_salary)) +
  geom_line(colour = tntp_colors("green"), linewidth = 1) +
  scale_y_continuous(labels = scales::dollar, limits = c(0, 5000)) +
  labs(
    title="Average Teacher Salaries",
    subtitle = "Teacher salaries remained constant between 2012 and 2015"
  ) +
    tntp_style(base_size = base_font_size)

## -----------------------------------------------------------------------------
#Prepare data
school_salary <- county_data |> 
  filter(t_salary != 0) |> 
  group_by(school_year, school_name) |> 
  summarize(avg_salary = mean(t_salary, na.rm = TRUE), .groups = "drop")

# create list of school names so we can easily filter data set for the number of schools we want
school_names <- unique(school_salary$school_name)

# only plot two schools
line_plot_schools <- school_salary |> 
  filter(school_name %in% school_names[1:3])


## -----------------------------------------------------------------------------
ggplot(line_plot_schools, aes(x = school_year, y = avg_salary, color = school_name)) +
  geom_line(linewidth = 1) +
  scale_y_continuous(labels = scales::dollar, limits = c(0, 5000)) +
  scale_colour_manual(values = tntp_palette("colorful")) +
  labs(
    title="Average Teacher Salaries",
    subtitle = "Teacher salaries remained constant between 2012 and 2015"
  ) +
  tntp_style(base_size = base_font_size)

## -----------------------------------------------------------------------------
#Prepare data
bar_df <- school_salary |>
  filter(
    school_year == 2015,
    school_name %in% school_names[1:5]
  ) |> 
  # add line breaks for better plotting
  mutate(school_name = str_wrap(school_name, 7))

ggplot(bar_df, aes(x = school_name, y = avg_salary)) +
  geom_bar(stat="identity", 
           position="identity", 
           fill= tntp_colors("gold")) +
  scale_y_continuous(labels = scales::dollar, limits = c(0, 5000)) +
  labs(
    title="Acacia had higher average salaries in 2015",
    subtitle = "Average teacher salaries in 2015 by school"
  ) +
  tntp_style(base_size = base_font_size)

## -----------------------------------------------------------------------------

#prepare data
stacked_df <- county_data |> 
  filter(
    school_year == 2015,
    school_name %in% school_names[1:5]
  ) |> 
  mutate(t_salary_cut = cut(t_salary, 
                    breaks = c(0, 2500, 3500, 4500, 10000),
                    labels = c("under $2.5k", "$2.5k-$3.5k", "$3.5k-$4.5k", "$4.5k+"))) |>
  group_by(school_name, t_salary_cut) |>
  summarise(n_cut_school = n(), .groups = "drop_last") |> 
  mutate(
    n_cut_salary = sum(n_cut_school, na.rm = TRUE),
    perc_in_each_cut = n_cut_school / n_cut_salary
  )  |> 
  # add line breaks for better plotting
  mutate(school_name = str_wrap(school_name, 7))

#set order of stacks by changing factor levels
stacked_df$t_salary_cut = factor(stacked_df$t_salary_cut, levels = rev(levels(stacked_df$t_salary_cut)))

## -----------------------------------------------------------------------------
ggplot(
  data = stacked_df, 
  aes(x = school_name, y = perc_in_each_cut, fill = t_salary_cut)
) +
  geom_bar(stat = "identity", position = "stack") +
  scale_y_continuous(labels = scales::percent, limits = c(0,1)) +
  scale_fill_manual(values = tntp_palette("greens")) +
  labs(title = "Most teachers earn between $2.5K and $4.5K",
       subtitle = "Percentage of teachers by salary range") +
  theme(legend.position = "top", 
        legend.justification = "left") +
  guides(fill = guide_legend(reverse = TRUE)) +
  tntp_style(base_size = base_font_size)

## -----------------------------------------------------------------------------
# only plot the lowest and highest earning groups
earnings_to_keep <- levels(stacked_df$t_salary_cut)[c(4,1)]

# map bar colors to values
colors_to_use <- tntp_colors("yellow", "green") |> 
  set_names(earnings_to_keep)

dodged_df <- stacked_df |> 
  # only plot the lowest and highest earning groups
  filter(t_salary_cut %in% !!earnings_to_keep) |> 
  # switch order of factors so that the lowest earnings plot first
  mutate(t_salary_cut = factor(t_salary_cut, levels = earnings_to_keep)) 

ggplot(dodged_df, aes(x = school_name, y = perc_in_each_cut, fill = t_salary_cut)) +
    geom_bar(stat = "identity", position = position_dodge2(preserve = "single")) +
    scale_y_continuous(labels = scales::percent, limits = c(0,1)) +
    scale_fill_manual(values = colors_to_use) +
    labs(title = "More teachers earn over $4.5k than under $2.5k",
         subtitle = "Percentage of teachers by salary range") +
    tntp_style(base_size = base_font_size)

## -----------------------------------------------------------------------------
# create data set showing school test scores and average school salaries
scores_salary <- county_data |> 
  group_by(school_name) |> 
  summarize(
    avg_test_score = mean(sch_ela_avg + sch_math_avg, na.rm=FALSE),
    avg_salary = mean(t_salary, na.rm = FALSE),
    enrollment = mean(sch_enroll_2015, na.rm = TRUE),
    .groups = "drop"
  ) |> 
  mutate(perc_rank_test_score = percent_rank(avg_test_score)) |> 
  drop_na()

## -----------------------------------------------------------------------------
ggplot(scores_salary, aes(avg_salary, perc_rank_test_score)) +
  geom_point(color = tntp_colors("tangerine"), alpha = 0.9, size = 3) +
  labs(
    title = "Schools with higher salaries do not have higher test scores",
    subtitle = "Relationship between school test scores and salaries",
    x = "Average school salary",
    y = "Percentile rank test score"
  ) +
  scale_x_continuous(labels = scales::dollar) +
  scale_y_continuous(labels = scales::percent) +
  tntp_style(base_size = base_font_size, show_axis_titles = TRUE)


## -----------------------------------------------------------------------------
dumbbell_df <- dodged_df |> 
  pivot_wider(id_cols = "school_name", names_from = "t_salary_cut", values_from = "n_cut_school") |> 
  mutate(across(where(is.numeric), ~replace_na(.x, 0)))

ggplot(dumbbell_df, aes(x = `under $2.5k`, xend = `$4.5k+`, y = fct_reorder(school_name, `under $2.5k`)), group = school_names) + 
  geom_dumbbell(
    color = tntp_colors("light_grey"),
    size = 3,
    colour_x = tntp_colors("gold"),
    colour_xend = tntp_colors("green"),
    show.legend = TRUE
  ) +
  labs(title = "More teachers earn over $4.5k than under $2.5k",
       subtitle = "Number of teachers by salary range",
       x = "Number of teachers earnign a given salary") + 
  tntp_style(base_size = base_font_size)

## ----message = FALSE----------------------------------------------------------
# number of teachers per school
number_teachers_school <- county_data |>
  count(school_year, school_name)

ggplot(number_teachers_school, aes(n)) +
  geom_histogram(binwidth = 5, colour = "white", fill = tntp_colors('navy')) +
  labs(
    title = "Schools have a wide distribution in the number of teachers",
    subtitle = "Total number of teachers per school",
    x = "Number of teacher in school",
    y = "Count"
  ) +
  tntp_style(base_size = base_font_size)

## -----------------------------------------------------------------------------
ggplot(county_data, aes(x = t_salary, y = school_year, group = school_year, fill = factor(school_year))) +
  geom_density_ridges(quantile_lines = TRUE, quantiles = 4) +
  scale_x_continuous(labels = scales::dollar) +
  labs(title = "Income distrubutions for teachers has remained constant",
       subtitle = "Income distrubution and quantiles for teachers") +
  tntp_style(base_size = base_font_size) + 
  theme(legend.position = "none") + 
  scale_fill_manual(values = tntp_palette("colorful"))

## -----------------------------------------------------------------------------
teacher_survey <- tntpr::teacher_survey

# the y-axis will contain text of an entire survey question
# we want to place line breaks in this text so plots look better
axis_line_breaks <- 40

# scales in HE questions, in order starting with the strongest
agree_disagree_scale <- rev(c("Strongly Agree", "Agree", "Somewhat Agree", "Somewhat Disagree", "Disagree", "Strongly Disagree"))

# put survey into long form and clean up question names
teacher_survey_he <- teacher_survey |> 
  select(-timing) |> 
  pivot_longer(cols = everything(), names_to = "question", values_to = "response")

# calculate percentage of responses to each high expectations question
teacher_survey_he_perc <- teacher_survey_he |> 
  drop_na("response") |>
  # calculate the number of responses for each response option
  count(question, response, name = "n_response") |>
  # calculate the number of responses for each question
  group_by(question) |>
  mutate(n_question = sum(n_response)) |>
  ungroup() |>
  # calculate percentages
  mutate(
    # calculate percentages
    percent = n_response / n_question,
    # make a column that is text of the percent for plotting
    percent_pretty = scales::percent(percent, accuracy = 1)
  )

# calculate percentage of strongly agree and agree
teacher_survey_he_perc <- teacher_survey_he_perc |> 
  mutate(scale_strength = ifelse(response %in% !!agree_disagree_scale[c(5,6)], "Strong response", "Weak response")) |>
  group_by(question, scale_strength) |>
  mutate(strong_response_percent = sum(percent)) |>
  ungroup() |> 
  mutate(
    strong_response_percent = ifelse(response == "Agree", strong_response_percent, NA),
    # create line breaks for questions ,which will make plots look better
    question = str_wrap(question, axis_line_breaks),
    response = factor(response, levels = agree_disagree_scale)
  )

## -----------------------------------------------------------------------------
ggplot(teacher_survey_he_perc, aes(percent, question, fill = response)) +
  geom_col() +
  geom_text(
    aes(label = scales::percent(strong_response_percent, accuracy = 1), x = strong_response_percent),
    color = "white", fontface = "bold", family = "Halyard Display", size = 5, hjust = 1.05
  ) +
  scale_x_continuous(labels = scales::percent, limits = c(0,1)) +
  scale_fill_manual(values = tntp_palette("top2_6"), drop = FALSE) +
  guides(fill=guide_legend(nrow=2, byrow=TRUE, reverse = TRUE))  +
  labs(title = "High Expectations Survey Responses") +
  tntp_style(base_size = base_font_size)

## ----eval=FALSE---------------------------------------------------------------
#  geom_col(position = "diverge")

## -----------------------------------------------------------------------------
ggplot(teacher_survey_he_perc, aes(x = percent, y = question, fill = fct_rev(response))) +
  geom_col(position = position_diverge()) +
  scale_fill_manual(
    values = tntp_palette("likert_6"), 
    drop = FALSE,
    breaks = agree_disagree_scale,
    labels = agree_disagree_scale
  ) +
  geom_vline(aes(xintercept = 0), linetype = 1, linewidth = 1.2, alpha = .7) +
  scale_x_continuous(limits = c(-1, 1), breaks = seq(-1, 1, .25), labels = \(x) scales::percent(abs(x))) +
  labs(title = "High Expectations Survey Responses") +
  tntp_style(base_size = base_font_size)


## -----------------------------------------------------------------------------
ggplot(teacher_survey_he_perc, aes(x = percent, y = question, fill = fct_rev(response))) +
  geom_col(position = position_diverge(break_after = "Agree")) +
  geom_text(aes(label = ifelse(percent > 0.1, percent_pretty, "")), 
            position = position_diverge(break_after = "Agree", vjust = 0.5),
            family = "Halyard Display", size = 3) + 
  scale_fill_manual(
    values = tntp_palette("likert_6"), 
    drop = FALSE,
    breaks = agree_disagree_scale,
    labels = agree_disagree_scale
  ) +
  geom_vline(aes(xintercept = 0), linetype = 1, linewidth = 1.2, alpha = .7) +
  scale_x_continuous(limits = c(-1, 1), breaks = seq(-1, 1, .25), labels = \(x) scales::percent(abs(x))) +
  labs(title = "High Expectations Survey Responses") +
  tntp_style(base_size = base_font_size)