Metaculus_response_login <- MetaculR_login()
Other Metaculus predictions provide a wealth of insight about a question, MetaculR_myPredictions()
quickly puts together a number of summary statistics.
questions_myPredictions <- MetaculR_myPredictions(guessed_by = Metaculus_response_login$Metaculus_user_id,
order_by = "close_time")
MetaculR_aggregated_forecasts(questions_myPredictions, Metaculus_id = 8997) %>%
t() %>%
as.data.frame() %>%
tibble::rownames_to_column(var = "Name") %>%
dplyr::rename(Value = V1) %>%
knitr::kable()
Name | Value |
---|---|
ID | 8997.0000000 |
community_q2 | 0.7100000 |
community_ave | 0.7236701 |
community_q2_unweighted | 0.7650000 |
community_ave_unweighted | 0.7656349 |
community_mean_logodds | 0.7342514 |
community_mean_logodds_extremized_baseline | 0.8526317 |
Sometimes you find external forecasts relevant to your question, but how to combine them? Using some work from McAndrew & Reich (2020), this provides a quick way to combine forecasts.
As an example, this question asked, What will be the 13-month mean sunspot number for the year of Solar Cycle 25’s maximum? and Table 1 of “The Next Solar Cycle” published a number of early forecasts. Here you see that we combine forecasts using normal distributions as well as min-mode-max as well as 20p-80p forecasts. Additionally, we gave Pishkalo a weight of 0.2 (while the others default to 1) so that their very narrow range doesn’t dominate our consensus.
forecasts <- list(list(range = c(0, 250), resolution = 1),
list(source = "Pishkalo",
dist = "Norm",
params = c("mu", "sd"),
values = c(116, 12),
weight = 0.2),
list(source = "Miao",
dist = "Norm",
params = c("mu", "sd"),
values = c(121.5, 32.9)),
list(source = "Labonville",
dist = "TPD",
params = c("min", "mode", "max"),
values = c(89-14, 89, 89+29)),
list(source = "NOAA",
dist = "PCT",
params = c(0.2, 0.8),
values = c(95, 130)),
list(source = "Han",
dist = "Norm",
params = c("mu", "sd"),
values = c(228, 40.5)),
list(source = "Dani",
dist = "Norm",
params = c("mu", "sd"),
values = c(159, 22.3)),
list(source = "Li",
dist = "Norm",
params = c("mu", "sd"),
values = c(168, 6.3)),
list(source = "Singh",
dist = "Norm",
params = c("mu", "sd"),
values = c(89, 9)))
prob_consensus <- MetaculR_probabilistic_consensus(
f = forecasts)
prob_consensus$summary %>%
knitr::kable()
source | q10 | q25 | q50 | mu | q75 | q90 | |
---|---|---|---|---|---|---|---|
10% | Pishkalo | 101 | 108 | 116 | 116.0000 | 124 | 131 |
10%1 | Miao | 79 | 99 | 122 | 121.5075 | 144 | 164 |
10%2 | Labonville | 83 | 87 | 93 | 94.0000 | 100 | 107 |
10%3 | NOAA | 47 | 97 | 112 | 114.6000 | 127 | 190 |
10%4 | Han | 169 | 191 | 213 | 208.5179 | 231 | 242 |
10%5 | Dani | 130 | 144 | 159 | 158.9980 | 174 | 188 |
10%6 | Li | 160 | 164 | 168 | 168.0000 | 172 | 176 |
10%7 | Singh | 77 | 83 | 89 | 89.0000 | 95 | 101 |
10%8 | agg | 84 | 95 | 128 | 135.8088 | 170 | 204 |
You can also visualize it to help you improve your prediction.
prob_consensus$pdf %>%
tidyr::pivot_longer(cols = c(-x, -agg)) %>%
ggplot2::ggplot() +
ggplot2::geom_line(ggplot2::aes(x = x,
y = value,
color = name),
alpha = 0.5) +
ggplot2::geom_line(ggplot2::aes(x = x,
y = agg * (ncol(prob_consensus$pdf) - 2)), # 8 forecasters, make aggregated density appear larger in plot
linetype = "dotted") +
ggplot2::geom_segment(ggplot2::aes(x = prob_consensus$summary$q50[nrow(prob_consensus$summary)],
xend = prob_consensus$summary$q50[nrow(prob_consensus$summary)],
y = 0,
yend = prob_consensus$pdf$agg[which(prob_consensus$pdf$x == prob_consensus$summary$q50[nrow(prob_consensus$summary)])] * (ncol(prob_consensus$pdf) - 2)),
linetype = "dashed") +
ggplot2::theme_classic() +
ggplot2::labs(y = "density")
questions_recent_open <-
MetaculR_questions(order_by = "close_time",
status = "open",
guessed_by = MetaculR_response_login$Metaculus_user_id)
### NOT RUN, see .gif below for what function does
# MetaculR_review(questions_recent_open,
# MetaculR_response_login$csrftoken)
questions_myPredictions_analysis_binary <- MetaculR_analysis_binary_resolved(questions_myPredictions)
MetaculR_myChallenges(questions_myPredictions_analysis_binary)
myCategories <- MetaculR_categories(ids = questions_myPredictions_analysis_binary %>%
dplyr::distinct(id) %>%
dplyr::pull())
MetaculR_myCategories(questions_myPredictions_analysis_binary,
myCategories)
See your cumulative mean relative log score on questions where you have made predictions. If the question is unresolved (or ambiguously resolved), see both “Yes” and “No” possibilities, otherwise see only the resolution possibility.
MetaculR_plot(questions_myPredictions, 8997, tournament = TRUE)
#> Warning: Removed 2 row(s) containing missing values (geom_path).
Watch your tournament score rise and fall over time.
MetaculR_plot(questions_myPredictions, 9939, tournament = TRUE)
#> Warning: Removed 1 rows containing missing values (position_stack).
What if question bins for histograms, calibration plots, etc. were not 10 percentage points each, but were based on logodds? Use the thresholds
argument of MetaculR_brier()
.
questions_myPredictions_analysis_binary <- MetaculR_analysis_binary_resolved(questions_myPredictions)
#> Joining, by = c("id", "Date")
#> Joining, by = c("id", "Date")
#> Joining, by = c("id", "Date")
#> Joining, by = "id"
brier_me <- MetaculR_brier(questions_myPredictions_analysis_binary,
thresholds = c(0, exp(seq(from = -log(99), to = log(99), by = log(99) / 5)) / (exp(seq(from = -log(99), to = log(99), by = log(99) / 5)) + 1), 1))
brier_me$brier_df %>%
ggplot2::ggplot() +
ggplot2::geom_col(
ggplot2::aes(x = ID,
y = value,
fill = ID) #"dodge2",
) +
ggplot2::facet_wrap(
ggplot2::vars(name),
scales = "free_y"
) +
ggplot2::geom_hline(yintercept = 0,
size = 0.1) +
ggplot2::theme_classic() +
ggplot2::labs(x = "Statistic",
y = "Value") +
# ggplot2::coord_cartesian(ylim = c(-1, 1)) +
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, vjust = 1, hjust = 1))
#> Warning: Removed 17 rows containing missing values (position_stack).
brier_me$brier_bins_df %>%
ggplot2::ggplot() +
ggplot2::geom_col(
ggplot2::aes(x = centers,
y = freqs,
fill = ID),
position = ggplot2::position_dodge2(width = 0.1, preserve = "single")
) +
ggplot2::theme_classic() +
ggplot2::coord_cartesian(ylim = c(0, 1))
brier_me$brier_bins_df %>%
ggplot2::ggplot() +
ggplot2::geom_pointrange(
ggplot2::aes(x = centers,
y = obars,
ymin = ci_low,
ymax = ci_high,
color = ID),
position = ggplot2::position_dodge2(width = 0.02)
) +
ggplot2::geom_line(
ggplot2::aes(x = centers,
y = ideal)
) +
ggplot2::theme_classic() +
ggplot2::coord_cartesian(ylim = c(0, max(brier_me$brier_bins_df$obars) * 1.1))
The new MetaculR_analysis_binary_resolved()
function enables further analysis.
questions_myPredictions_analysis_binary %>%
tidyr::pivot_longer(cols = dplyr::starts_with("Brier")) %>%
dplyr::mutate(name = gsub("Brier_", "", name)) %>%
dplyr::mutate(name = paste(toupper(substring(name, 1, 1)), substring(name, 2), sep = "")) %>%
ggplot2::ggplot() +
ggplot2::geom_point(
ggplot2::aes(x = np,
y = value,
color = factor(name)),
alpha = 0.01) +
ggplot2::geom_smooth(
ggplot2::aes(x = np,
y = value,
color = factor(name),
weight = Weight_Resolve),
alpha = 0.1) +
ggplot2::facet_wrap(
ggplot2::vars(obs),
nrow = 2) +
ggplot2::theme_classic() +
ggplot2::labs(y = "Brier score",
color = "Predictor")
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#> Warning: Removed 3190 rows containing non-finite values (stat_smooth).
#> Warning: Removed 3190 rows containing missing values (geom_point).
questions_myPredictions_analysis_binary %>%
ggplot2::ggplot() +
ggplot2::geom_line(
ggplot2::aes(x = Cum_Close_Pct,
y = Cum_RelLogScore_met,
color = factor(id)),
alpha = 0.5) +
ggplot2::geom_smooth(
ggplot2::aes(x = Cum_Close_Pct,
y = Cum_RelLogScore_met,
weight = Weight_Close),
alpha = 0.5) +
ggplot2::facet_wrap(
ggplot2::vars(obs),
nrow = 2) +
ggplot2::geom_hline(yintercept = 0) +
ggplot2::theme_classic() +
ggplot2::coord_cartesian(xlim = c(-0.1, 1.1),
ylim = c(-0.5, 0.5)) +
ggplot2::guides(color = FALSE)
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#> Warning: Removed 44 rows containing non-finite values (stat_smooth).
#> Warning: Removed 44 row(s) containing missing values (geom_path).
questions_myPredictions_analysis_binary %>%
dplyr::filter(min(Countdown_weeks_Close) <= -26,
Countdown_weeks_Close <= 0) %>%
ggplot2::ggplot() +
ggplot2::geom_line(
ggplot2::aes(x = Countdown_weeks_Close,
y = Log_comm,
color = factor(id)),
alpha = 0.9) +
ggplot2::geom_smooth(
ggplot2::aes(x = Countdown_weeks_Close,
y = Log_comm,
weight = Weight_Close),
alpha = 0.5) +
ggplot2::facet_wrap(
ggplot2::vars(obs),
nrow = 2) +
ggplot2::geom_hline(yintercept = 0) +
ggplot2::theme_classic() +
ggplot2::guides(color = FALSE)
#> Don't know how to automatically pick scale for object of type difftime. Defaulting to continuous.
#> `geom_smooth()` using method = 'loess' and formula 'y ~ x'