Skip to contents

This vignette shows how to perform a simplified version of the case study presented in Rassmusen and Williams (2006, p 119-121).

library(zoomerGP)
library(tibble)
library(ggplot2)

data <- data.frame(
  year = (1:length(co2) - 1)/12 + 1959,
  co2 = c(co2)
)
data %>% 
  ggplot(aes(x=year, y=co2)) +
  geom_point()

Now we can begin analyzing the data using Gaussian Processes. Examining the data, and based on our understanding of the climate, it looks like this data can be well-described by a long-term trend plus a periodic component.

set.seed(1)
gp_model <- gaussian_process(
  co2 ~ periodic(year, periods = c(1.0)) + rbf(year), 
  data = data
  )
gp_model
#> A Gaussian Process model with 468 observations
#> Log-Likelihood: -375.984632
#> R-Squared: 0.998943
#> Kernel Specification:
#> └──  (A) Plus (+)
#>     ├──  (AA) RBF Kernel
#>     └──  (AB) RBF Kernel
prediction_data <- data.frame(year = 1959 + seq(0,60, length.out = 500))
prediction_data$preds <- predict(gp_model, prediction_data)[,1]
prediction_data$pred_var <- predict(gp_model, prediction_data)[,3]

prediction_data$ci_lo <- prediction_data$preds + 2*sqrt(prediction_data$pred_var)
prediction_data$ci_hi <- prediction_data$preds - 2*sqrt(prediction_data$pred_var)

ggplot() + 
  geom_point(data = data, aes(x=year, y=co2)) + 
  geom_line(data = prediction_data, aes(x=year, y=preds)) + 
  geom_line(data = prediction_data, aes(x=year, y=ci_hi), linetype = 2, col = "darkgrey") + 
  geom_line(data = prediction_data, aes(x=year, y=ci_lo), linetype = 2, col = "darkgrey")