Does AI help humans make better decisions?
library(aihuman)
library(tidyr)
library(dplyr)
library(ggplot2)
## set default ggplot theme
theme_set(theme_bw(base_size = 15) + theme(plot.title = element_text(hjust = 0.5)))
Ben-Michael, Eli, D. James Greiner, Melody Huang, Kosuke Imai, Zhichao Jiang, and Sooahn Shin. “Does AI help humans make better decisions?: A statistical evaluation framework for experimental and observational studies”, 2024.
The data used for this paper, and made available here, are interim, based on only half of the observations in the study and (for those observations) only half of the study follow-up period. We use them only to illustrate methods, not to draw substantive conclusions.
In this vignette, we will use the data originally from Imai, Jiang, Greiner, Halen, and Shin (2023). The essential part of the data is:
Z
: A binary treatment \(Z_i
\in \{0,1\}\) (e.g. PSA provision)D
: A binary decision \(D_i
\in \{0,1\}\) (e.g. judge’s release decision)A
: A binary AI recommendation \(A_i \in \{0,1\}\) (e.g. dichotomized
pretrial public safety assessment scores)Y
: A binary outcome \(Y_i \in
\{0,1\}\) (e.g. new criminal activity)The analysis can be conducted based on the following workflow:
We will be using the following data:
# randomized PSA provision (0: none, 1: provided)
Z <- aihuman::NCAdata$Z
# judge's release decision (0: signature, 1: cash)
D <- if_else(aihuman::NCAdata$D == 0, 0, 1)
# dichotomized pretrial public safety assessment scores (0: signature, 1: cash)
A <- aihuman::PSAdata$DMF
# new criminal activity (0: no, 1: yes)
Y <- aihuman::NCAdata$Y
For subgroup analysis, we will use the following covariates:
# race
race_vec <- aihuman::NCAdata |>
mutate(race = if_else(White == 1, "White", "Non-white")) |>
pull(race)
# gender
gender_vec <- aihuman::NCAdata |>
mutate(gender = if_else(Sex == 1, "Male", "Female")) |>
pull(gender)
In the paper, we use doubly robust estimators throughout the analysis. For this purpose, we will be fitting the nuisance functions using the following covariates (\(X_i\)):
cov_mat <- aihuman::NCAdata |>
select(-c(Y, D, Z)) |>
bind_cols(FTAScore = aihuman::PSAdata$FTAScore,
NCAScore = aihuman::PSAdata$NCAScore,
NVCAFlag = aihuman::PSAdata$NVCAFlag) |>
as.matrix()
colnames(cov_mat)
#> [1] "Sex" "White"
#> [3] "SexWhite" "Age"
#> [5] "PendingChargeAtTimeOfOffense" "NCorNonViolentMisdemeanorCharge"
#> [7] "ViolentMisdemeanorCharge" "ViolentFelonyCharge"
#> [9] "NonViolentFelonyCharge" "PriorMisdemeanorConviction"
#> [11] "PriorFelonyConviction" "PriorViolentConviction"
#> [13] "PriorSentenceToIncarceration" "PriorFTAInPast2Years"
#> [15] "PriorFTAOlderThan2Years" "Staff_ReleaseRecommendation"
#> [17] "FTAScore" "NCAScore"
#> [19] "NVCAFlag"
Then, the first set of the nuisance functions can be computed as
follows, based on the Generalized Boosted Regression Modeling (see
gbm::gbm
function):
nuis_func <- compute_nuisance_functions(Y = Y, D = D, Z = Z, V = cov_mat, shrinkage = 0.01, n.trees = 1000)
This gives us the following nuisance functions:
nuis_func$z_models$d_pred
gives estimated \(\hat{m}^{D}(z, X_i)\).nuis_func$z_models$y_pred
gives estimated \(\hat{m}^{Y}(z, X_i)\).nuis_func$pscore
gives
estimated \(\hat{e}(1, X_i)\).The second set of the nuisance functions conditioning on AI recommendation can be computed similarly:
nuis_func_ai <- compute_nuisance_functions_ai(Y = Y, D = D, Z = Z, A = A, V = cov_mat, shrinkage = 0.01, n.trees = 1000)
This gives us the following nuisance functions:
nuis_func_ai$z_models$d_pred
gives estimated \(\hat{m}^{D}(z, a, X_i)\).nuis_func_ai$z_models$y_pred
gives estimated \(\hat{m}^{Y}(z, a, X_i)\).nuis_func_ai$pscore
gives
estimated \(\hat{e}(1, X_i)\).For the reproducibility, we will be using the pre-computed nuisance functions, generated from the same codes above.
Comparison between human decisions and PSA-generated recommendations are summarized in the following contingency table (Table 1 in the paper).
counts <- table(D[Z == 0], A[Z == 0])
proportions <- prop.table(counts) * 100
combined_table_human <- matrix(sprintf("%.1f%% (%d)", proportions, counts),
nrow = nrow(counts), ncol = ncol(counts))
colnames(combined_table_human) <- c("Signature", "Cash")
rownames(combined_table_human) <- c("Signature", "Cash")
knitr::kable(combined_table_human, caption = "Table 1 (Human v. PSA)")
Signature | Cash | |
---|---|---|
Signature | 54.1% (510) | 20.7% (195) |
Cash | 9.4% (89) | 15.8% (149) |
counts <- table(D[Z == 1], A[Z == 1])
proportions <- prop.table(counts) * 100
combined_table_human_ai <- matrix(sprintf("%.1f%% (%d)", proportions, counts),
nrow = nrow(counts), ncol = ncol(counts))
colnames(combined_table_human_ai) <- c("Signature", "Cash")
rownames(combined_table_human_ai) <- c("Signature", "Cash")
knitr::kable(combined_table_human_ai, caption = "Table 1 (Human+PSA v. PSA)")
Signature | Cash | |
---|---|---|
Signature | 57.3% (543) | 17.1% (162) |
Cash | 7.4% (70) | 18.2% (173) |
We can analyze the impact of AI recommendations on agreement between human decisions and AI recommendations using the difference in means estimates of an indicator \(1\{D_i = A_i\}\). The results are as follows (Figure S1 in the appendix).
table_agreement(
Y = Y,
D = D,
Z = Z,
A = A,
subgroup1 = race_vec,
subgroup2 = gender_vec,
label.subgroup1 = "Race",
label.subgroup2 = "Gender"
) |>
mutate_at(vars(agree_diff, agree_diff_se, ci_ub, ci_lb), ~round(., 3)) |>
knitr::kable(caption = "Agreement of Human and PSA Decision Makers")
cov | X | agree_diff | agree_diff_se | ci_ub | ci_lb |
---|---|---|---|---|---|
Overall | Overall | 0.056 | 0.020 | 0.097 | 0.016 |
Race | Non-white | 0.030 | 0.031 | 0.090 | -0.031 |
Race | White | 0.079 | 0.027 | 0.132 | 0.026 |
Gender | Female | 0.025 | 0.044 | 0.111 | -0.060 |
Gender | Male | 0.065 | 0.023 | 0.110 | 0.019 |
plot_agreement(
Y = Y,
D = D,
Z = Z,
A = A,
subgroup1 = race_vec,
subgroup2 = gender_vec,
label.subgroup1 = "Race",
label.subgroup2 = "Gender",
x.order = c("Overall", "Non-white", "White", "Female", "Male"),
p.title = "Agreement between Human Decisions and PSA Recommendations", p.lb = -0.25, p.ub = 0.25
)
We now compare difference in risk between human+AI and human decision
makers using AIPW estimators. You may choose between (1) AIPW estimator
and (2) difference-in-means (Neyman) estimator. You can also specify the
loss ratio between false positives and false negatives using
l01
parameter. For the subgroup analysis, you can specify
the subgroup variable using X
.
# AIPW estimator
compute_stats_aipw(
Y = Y,
D = D,
Z = Z,
nuis_funcs = nuis_func,
true.pscore = rep(0.5, length(D)),
X = NULL,
l01 = 1
)
#> # A tibble: 1 × 8
#> Z_focal Z_compare loss_diff loss_diff_se fn_diff fn_diff_se fp_diff fp_diff_se
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1 0 0.0419 0.0359 0.0177 0.0184 0.0242 0.0218
# Difference-in-means (Neyman) estimator
compute_stats(
Y = Y,
D = D,
Z = Z,
X = NULL,
l01 = 1
)
#> # A tibble: 1 × 8
#> Z_focal Z_compare loss_diff loss_diff_se fn_diff fn_diff_se fp_diff fp_diff_se
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1 0 0.0420 0.0364 0.0190 0.0184 0.0230 0.0229
The results can be visualized as follows (Figure 1 in the paper). You
may use plot_diff_human()
function for the Neyman
estimator.
plot_diff_human_aipw(
Y = Y,
D = D,
Z = Z,
nuis_funcs = nuis_func,
l01 = 1,
true.pscore = rep(0.5, length(D)),
subgroup1 = race_vec,
subgroup2 = gender_vec,
label.subgroup1 = "Race",
label.subgroup2 = "Gender",
x.order = c("Overall", "Non-white", "White", "Female", "Male"),
p.title = NULL, p.lb = -0.3, p.ub = 0.3
)
We can answer the question using the following subgroup analysis
defined by A
.
First, subgroup analysis for the cases where AI recommends harsher decision (\(A = 1\)). The figure shows how the human overrides the AI recommendation in terms of true negative proportion (TNP), false negative proportion (FNP), and their differences (Figure S2 in the appendix).
plot_diff_subgroup(
Y = Y,
D = D,
Z = Z,
A = A,
a = 1,
l01 = 1,
nuis_funcs = nuis_func,
true.pscore = rep(0.5, length(D)),
subgroup1 = race_vec,
subgroup2 = gender_vec,
label.subgroup1 = "Race",
label.subgroup2 = "Gender",
x.order = c("Overall", "Non-white", "White", "Female", "Male"),
p.title = NULL, p.lb = -0.5, p.ub = 0.5,
label = "TNP - FNP",
metrics = c("True Negative Proportion (TNP)", "False Negative Proportion (FNP)", "TNP - FNP")
)
Second, subgroup analysis for the cases where AI recommends lenient decision (\(A = 0\)) is also available. The figure shows how human overrides the AI recommendation in terms of true positive proportion (TPP), false positive proportion (FPP), and their differences (Figure S3 in the appendix).
plot_diff_subgroup(
Y = Y,
D = D,
Z = Z,
A = A,
a = 0,
l01 = 1,
nuis_funcs = nuis_func,
true.pscore = rep(0.5, length(D)),
subgroup1 = race_vec,
subgroup2 = gender_vec,
label.subgroup1 = "Race",
label.subgroup2 = "Gender",
x.order = c("Overall", "Non-white", "White", "Female", "Male"),
p.title = NULL, p.lb = -0.5, p.ub = 0.5,
label = "TPP - FPP",
metrics = c("True Positive Proportion (TPP)", "False Positive Proportion (FPP)", "TPP - FPP")
)
We now compare difference in risk between AI and human decision makers using AIPW estimators. Note that these quantities are partially identified given the assumptions made in the paper.
# AIPW estimator
compute_bounds_aipw(
Y = Y,
D = D,
Z = Z,
A = A,
nuis_funcs = nuis_func,
nuis_funcs_ai = nuis_func_ai,
true.pscore = rep(0.5, length(D)),
X = NULL,
l01 = 1
)
#> # A tibble: 2 × 14
#> Z_focal Z_compare fn_diff_lb fn_diff_ub fp_diff_lb fp_diff_ub loss_diff_lb
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 AI 1 -0.0578 0.00203 0.0421 0.102 -0.0156
#> 2 AI 0 -0.0401 0.0197 0.0674 0.127 0.0273
#> # ℹ 7 more variables: loss_diff_ub <dbl>, fn_diff_lb_se <dbl>,
#> # fn_diff_ub_se <dbl>, fp_diff_lb_se <dbl>, fp_diff_ub_se <dbl>,
#> # loss_diff_lb_se <dbl>, loss_diff_ub_se <dbl>
The results can be visualized as follows (Figure 2 and Figure S4 in the paper).
# AI versus Human
plot_diff_ai_aipw(
Y = Y,
D = D,
Z = Z,
A = A,
z_compare = 0,
nuis_funcs = nuis_func,
nuis_funcs_ai = nuis_func_ai,
l01 = 1,
true.pscore = rep(0.5, length(D)),
subgroup1 = race_vec,
subgroup2 = gender_vec,
label.subgroup1 = "Race",
label.subgroup2 = "Gender",
x.order = c("Overall", "Non-white", "White", "Female", "Male"),
p.title = NULL, p.lb = -0.3, p.ub = 0.3
)
# AI versus Human+AI
plot_diff_ai_aipw(
Y = Y,
D = D,
Z = Z,
A = A,
z_compare = 1,
nuis_funcs = nuis_func,
nuis_funcs_ai = nuis_func_ai,
l01 = 1,
true.pscore = rep(0.5, length(D)),
subgroup1 = race_vec,
subgroup2 = gender_vec,
label.subgroup1 = "Race",
label.subgroup2 = "Gender",
x.order = c("Overall", "Non-white", "White", "Female", "Male"),
p.title = NULL, p.lb = -0.3, p.ub = 0.3
)
Note that researcher can specify different AI recommendations by
changing the A
variable. For example, in the paper, we have
used Llama3 recommendations as an alternative comparison.
# Llama3 versus Human
plot_diff_ai_aipw(
Y = Y,
D = D,
Z = Z,
A = A_llama,
z_compare = 0,
nuis_funcs = nuis_func,
nuis_funcs_ai = nuis_func_ai,
l01 = 1,
true.pscore = rep(0.5, length(D)),
subgroup1 = race_vec,
subgroup2 = gender_vec,
label.subgroup1 = "Race",
label.subgroup2 = "Gender",
x.order = c("Overall", "Non-white", "White", "Female", "Male"),
y.lab = "Llama3 versus Human", p.label = c("Llama3 worse", "Llama3 better"),
p.title = NULL, p.lb = -0.5, p.ub = 0.5
)
As noted earlier, one may specify different loss functions by
changing the l01
parameter. Here, we provide an example of
when human decision maker is preferred over AI recommendation (Figure 3
and Figure S5 in the paper).
# When to prefer human decision maker over AI recommendation
plot_preference(
Y = Y,
D = D,
Z = Z,
A = A,
z_compare = 0,
nuis_funcs = nuis_func,
nuis_funcs_ai = nuis_func_ai,
l01_seq = 10^seq(-2, 2, length.out = 200),
alpha = 0.05,
true.pscore = rep(0.5, length(D)),
subgroup1 = race_vec,
subgroup2 = gender_vec,
label.subgroup1 = "Race",
label.subgroup2 = "Gender",
x.order = c("Overall", "Non-white", "White", "Female", "Male"),
p.title = NULL,
legend.position = "bottom",
p.label = c("AI-alone preferred", "Human-alone preferred", "Ambiguous")
) +
scale_color_manual("",
values = c("", "#4d9221", "grey30"),
labels = c("", "Human-alone preferred", "Ambiguous"), drop = FALSE
) +
scale_linetype_manual("", values = c(1, 1, 3), labels = c("", "Human-alone preferred", "Ambiguous"), drop = FALSE)
# When to prefer human+AI decision maker over AI recommendation
plot_preference(
Y = Y,
D = D,
Z = Z,
A = A,
z_compare = 1,
nuis_funcs = nuis_func,
nuis_funcs_ai = nuis_func_ai,
l01_seq = 10^seq(-2, 2, length.out = 200),
alpha = 0.05,
true.pscore = rep(0.5, length(D)),
subgroup1 = race_vec,
subgroup2 = gender_vec,
label.subgroup1 = "Race",
label.subgroup2 = "Gender",
x.order = c("Overall", "Non-white", "White", "Female", "Male"),
p.title = NULL,
legend.position = "bottom",
p.label = c("AI-alone preferred", "Human-alone preferred", "Ambiguous")
) +
scale_color_manual("",
values = c("", "#4d9221", "grey30"),
labels = c("", "Human-with-algorithm preferred", "Ambiguous"), drop = FALSE
) +
scale_linetype_manual("", values = c(1, 1, 3), labels = c("", "Human-with-algorithm preferred", "Ambiguous"), drop = FALSE)
Finally, we can conduct policy learning using the following codes.
The optimization requires gurobi
software, and you may need
to install it separately (documentation). In this vignette, we
provide the codes for replicating the results in the paper (Figure 4 and
Table 3).
library(gurobi)
## Optimization functions
make_edge_mat <- function(X) {
n <- nrow(X)
lapply(1:n,
function(i) {
vapply(1:n,
function(j) {
if(all(X[i,] <= X[j,]) & i != j) {
e <- numeric(2 * n)
e[j] <- 1
e[n + i] <- -1
} else {
e <- numeric(2 * n)
}
return(e)
},
numeric(2 * n)) %>% t()
}) %>% do.call(rbind, .) %>% unique() %>% Matrix::Matrix()
}
make_action_mat <- function(n, k) {
do.call(cbind, lapply(1:k, function(a) a * Matrix::Diagonal(n)))
}
create_monotone_constraints <- function(X, rev = FALSE) {
n <- nrow(X)
d <- ncol(X)
# policy vector is (pi(1,.),...,pi(A,.))
# sum to one constraints
pi_sum_to_one <- do.call(cbind, lapply(1:2, function(x) Matrix::Diagonal(n)))
rhs_sum_to_one <- rep(1, n)
sense_sum_to_one <- rep("=", n)
# monotonicy constraints
edge_mat <- make_edge_mat(X)
action_mat <- make_action_mat(n, 2)
# the following codes insure that (1) edge_mat: select pairs that monotonicity should hold in a correct order (i.e. if X[i,]<X[j,] then assign 1 to j and -1 to i) (2) rbind(action_mat, action_mat): if multiplied by a vector of binary actions (length = 2*n since there exist two actions), it selects the action that has been chosen (e.g. if action 1 = 0 and action 2 = 1 for a given observation, it gives the value of 2)
monotone_mat <- edge_mat %*% rbind(action_mat, action_mat)
rhs_monotone <- rep(0, nrow(monotone_mat))
if(isTRUE(rev)) {
sense_monotone <- rep("<=", nrow(monotone_mat))
} else {
sense_monotone <- rep(">=", nrow(monotone_mat))
}
A <- rbind(pi_sum_to_one, monotone_mat)
rhs <- c(rhs_sum_to_one, rhs_monotone)
sense <- c(sense_sum_to_one, sense_monotone)
# restrict variables
vtype <- rep("B", n * 2) # binary policy decisions
return(list(A = A, rhs = rhs, sense = sense, vtype = vtype))
}
get_monotone_policy <- function(X, wts, rev = FALSE) {
n <- nrow(X)
# get the constraints
model <- create_monotone_constraints(as.matrix(X), rev = rev)
# solve
model$obj <- c(numeric(n), wts)
model$modelsense <- "min"
sol <- gurobi(model)
# extract values
policy <- apply(matrix(sol$x, ncol = 2), 1, which.max) - 1
return(policy)
}
get_monotone_policy_ai <- function(X, wts, rev = FALSE) {
X_df <- data.frame(X)
wts_df <- data.frame(wts)
colnames(wts_df) <- "wts"
comb_df <- bind_cols(X_df, wts_df)
# get distinct X values and the sum of the weights on them
grouped_df <- comb_df %>%
group_by(across(-wts)) %>%
summarise(across(everything(), sum),
n = n(),
.groups = "drop"
)
X_distinct <- grouped_df %>%
select(-wts, -n) %>%
as.matrix()
wts_distinct <- grouped_df %>% pull(wts)
policy <- get_monotone_policy(X_distinct, wts_distinct, rev = rev)
grouped_df <- grouped_df %>%
mutate(policy = policy)
return(grouped_df)
}
## Weights functions
compute_wts_dr <- function(Y, D, Z, nuis_funcs, pscore = NULL, l01 = 1) {
## update the propensity score if provided
if (!is.null(pscore)) {
nuis_funcs$pscore <- pscore
}
## check whether Z is 0/1 binary variable
if (any(sort(unique(Z)) != c(0, 1))) {
stop("Z must be a binary variable")
}
dat <- data.frame(Y = Y, D = D, Z = Z, pscore = nuis_funcs$pscore)
preds <- nuis_funcs$z_models |>
pivot_wider(names_from = c(Z), values_from = c(-Z, -idx), names_sep = "") |>
select(-idx)
dat <- dat %>%
bind_cols(preds)
wts <- dat %>%
mutate(
p.1i = (1 - d_pred1) * ((1 + l01) * y_pred1 - l01) + (1 + l01) * Z * (1 - D) * (Y - y_pred1) / pscore - ((1 + l01) * y_pred1 - l01) * Z * (D - d_pred1) / pscore,
p.0i = (1 - d_pred0) * ((1 + l01) * y_pred0 - l01) + (1 + l01) * (1 - Z) * (1 - D) * (Y - y_pred0) / (1 - pscore) - ((1 + l01) * y_pred0 - l01) * (1 - Z) * (D - d_pred0) / (1 - pscore),
p.i = p.1i - p.0i
) %>%
pull(p.i)
return(wts)
}
compute_bound_wts_dr <- function(Y, A, D, Z, nuis_funcs1, nuis_funcs2, pscore = NULL, l01 = 1) {
## update the propensity score if provided
if (!is.null(pscore)) {
nuis_funcs1$pscore <- pscore
nuis_funcs2$pscore <- pscore
}
## check whether Z is 0/1 binary variable
if (any(sort(unique(Z)) != c(0, 1))) {
stop("Z must be a binary variable")
}
data <- data.frame(Y = Y, D = D, Z = Z, A = A, pscore = nuis_funcs1$pscore)
preds1 <- nuis_funcs1$z_models |>
pivot_wider(names_from = c(Z), values_from = c(-Z, -idx), names_sep = "") |>
select(-idx)
preds2 <- nuis_funcs2$z_models |>
pivot_wider(names_from = c(Z, A), values_from = c(-Z, -A, -idx), names_sep = "") |>
select(-idx)
data <- data %>%
bind_cols(preds1, preds2)
wts <- data %>%
mutate(
# gU0.i = 1 if z'=1 i.e. Pr(Y=0,D=0|A=0,Z=1,X=x) > Pr(Y=0,D=0|A=0,Z=0,X=x)
gU0.i = 1 * ((1 - d_pred10) * (1 - y_pred10) >= (1 - d_pred00) * (1 - y_pred00)),
# phi_z0(x): Pr(Y=0,D=0,A=0|Z=z,X=x)
p10.i = (1 - A) * (1 - d_pred10) * (1 - y_pred10) - Z * (1 - A) * (1 - D) * (Y - y_pred10) / pscore - (1 - y_pred10) * Z * (1 - A) * (D - d_pred10) / pscore,
p00.i = (1 - A) * (1 - d_pred00) * (1 - y_pred00) - (1 - Z) * (1 - A) * (1 - D) * (Y - y_pred00) / (1 - pscore) - (1 - y_pred00) * (1 - Z) * (1 - A) * (D - d_pred00) / (1 - pscore)
) %>%
mutate(
p10_gU0.i = p10.i * gU0.i,
p00_gU0.i = p00.i * gU0.i
) %>%
mutate(
fn_diff_ub.0i = p01.i - p0.i + p00.D.i - p10_gU0.i + p00_gU0.i,
fp_diff_ub.0i = p01.i - p0.i + p01.D.i - p10_gU0.i + p00_gU0.i,
loss_diff_ub.0i = fn_diff_ub.0i + l01 * fp_diff_ub.0i
) %>%
pull(loss_diff_ub.0i)
return(wts)
}
Let’s consider policy class with an increasing monotonicity constraint.
psaX <- cov_mat[,c("FTAScore", "NCAScore", "NVCAFlag")]
nca_wts <- compute_wts_dr(Y = Y, D = D, Z = Z, nuis_funcs = nuis_func, pscore = 0.5, l01 = 1)
nca_provide_policy <- get_monotone_policy_ai(X = psaX, wts = nca_wts)
## The estimated values of the empirical risk minimization problem
get_policy_value(nca_provide_policy, n_obs = length(Y))
#> [1] -0.01014338
## Visualization
nca_provide_policy |>
filter(NVCAFlag == 1) |>
mutate(
policy = factor(policy, levels = c(0, 1)),
NVCAFlag = paste0("NVCA Flag: ", NVCAFlag)
) |>
ggplot(aes(x = NCAScore, y = FTAScore, fill = policy)) +
geom_tile(color = "grey50") +
geom_segment(aes(x = 2.5, xend = 4.5, y = 4.5, yend = 4.5), lty = 3) +
geom_segment(aes(x = 4.5, xend = 4.5, y = 4.5, yend = 1.5), lty = 3) +
facet_grid(. ~ NVCAFlag) +
scale_fill_brewer("PSA Recommendation Provision",
type = "seq", palette = "Blues",
direction = 1
) +
scale_y_reverse(breaks = 1:6) +
scale_x_continuous(breaks = 1:6) +
expand_limits(x = 0.5:6, y =0.5:6) +
labs(x = "NCA Score",
y = "FTA Score",
title = "Whether to provide PSA recommendations") +
coord_fixed(expand = F) +
theme_classic(base_size = 15) +
scale_fill_brewer("", breaks = 0:1, labels = c("Not Provide", "Provide")) +
theme(legend.position = "bottom", plot.title = element_text(hjust = 0.5)) +
geom_text(aes(label = n), color = "grey10")
We can also consider policy class with a decreasing monotonicity constraint.
Let’s consider policy class with an increasing monotonicity constraint.
nca_wts <- compute_bound_wts_dr(Y = Y, A = A, D = D, Z = Z, nuis_funcs1 = nuis_func, nuis_funcs2 = nuis_func_ai, pscore = 0.5, l01 = 1)
nca_follow_policy <- get_monotone_policy_ai(X = psaX, wts = nca_wts)
## The estimated values of the empirical risk minimization problem
get_policy_value(nca_follow_policy, n_obs = length(Y))
#> [1] -0.003514698
## Visualization
nca_follow_policy |>
filter(NVCAFlag == 1) |>
mutate(
policy = factor(policy, levels = c(0, 1)),
NVCAFlag = paste0("NVCA Flag: ", NVCAFlag)
) |>
ggplot(aes(x = NCAScore, y = FTAScore, fill = policy)) +
geom_tile(color = "grey50") +
geom_segment(aes(x = 2.5, xend = 4.5, y = 4.5, yend = 4.5), lty = 3) +
geom_segment(aes(x = 4.5, xend = 4.5, y = 4.5, yend = 1.5), lty = 3) +
facet_grid(. ~ NVCAFlag) +
scale_fill_brewer("PSA Recommendation Provision",
type = "seq", palette = "Blues",
direction = 1
) +
scale_y_reverse(breaks = 1:6) +
scale_x_continuous(breaks = 1:6) +
expand_limits(x = 0.5:6, y =0.5:6) +
labs(x = "NCA Score",
y = "FTA Score",
title = "Whether to follow PSA recommendations") +
coord_fixed(expand = F) +
theme_classic(base_size = 15) +
scale_fill_brewer("", type = "seq", palette = "Reds", direction = 1,
breaks = 0:1, label = c("Do not follow", "Follow")) +
theme(legend.position = "bottom", plot.title = element_text(hjust = 0.5)) +
geom_text(aes(label = n), color = "grey10")
We can also consider policy class with a decreasing monotonicity constraint.