7 Calibration targeting weather examples

7.1 Preparation

Choose file format for generated figures:

output_dir <- "output"
plot_file_format <- c("png", "eps")[1] # modify index number to change format

Load source file containing the R implementation of the Weather model:

source("source/weatherModel.R")
source("source/estimate_hyperparameters_optim.R")

Set simulation constants:

SEED <- 0
YEAR_LENGTH <- 365 # ignoring leap year adjustment
SOLSTICE_SUMMER <- 172  # June 21st (approx.)
SOLSTICE_WINTER <- 355  # December 21st (approx.)

As a final part in this demonstration, we will extend the above process to deal with multiple instances of curves and parameter sets, generated by the same configuration of hyperparameters. We will then want to estimate those original hyperparameter values.

We use the data downloaded at NASA´s POWER access viewer (power.larc.nasa.gov/data-access-viewer/) selecting the user community ‘Agroclimatology’ and pin pointing the different locations between 01/01/1984 and 31/12/2007. The exact locations are:

Rakhigarhi, Haryana, India (Latitude: 29.1687, Longitude: 76.0687)
Irkutsk, Irkutsk Óblast, Russia (Latitude: 52.2891, Longitude: 104.2493)
Hobart, Tasmania, Australia (Latitude: -42.8649, Longitude: 147.3441)
Pearl Harbor, Hawaii, United States of America (Latitude: 21.376, Longitude: -157.9708)
São Paulo, Brazil (Latitude: -23.5513, Longitude: -46.6344)
Cambridge, United Kingdom (Latitude: 52.2027, Longitude: 0.122)
Windhoek, Namibia (Latitude: -22.5718, Longitude: 17.0953)

We selected the ICASA Format’s parameters:

Precipitation (PRECTOT)
Wind speed at 2m (WS2M)
Relative Humidity at 2m (RH2M)
Dew/frost point at 2m (T2MDEW)
Maximum temperature at 2m (T2M_MAX)
Minimum temperature at 2m (T2M_MIN)
All sky insolation incident on a horizontal surface (ALLSKY_SFC_SW_DWN)
Temperature at 2m (T2M)

and from Solar Related Parameters:

Top-of-atmosphere Insolation (ALLSKY_TOA_SW_DWN)

# Function to read and filter weather data
read_weather_data <- function(file_path) {
  data <- read.csv(file_path, skip = 18)
  data[data$YEAR %in% 1984:2007, ]
}

# Get input file paths
input_files <- list.files(path = "input", full.names = TRUE)

# Read and combine all weather data
weather <- do.call(rbind, lapply(input_files, read_weather_data))

# Define site mapping
site_mapping <- list(
  list(condition = function(x) floor(x$LAT) == 29, site = "Rakhigarhi"),
  list(condition = function(x) floor(x$LON) == 104, site = "Irkutsk"),
  list(condition = function(x) floor(x$LAT) == -43, site = "Hobart"),
  list(condition = function(x) floor(x$LAT) == 21, site = "Pearl Harbor"),
  list(condition = function(x) floor(x$LAT) == -24, site = "Sao Paulo"),
  list(condition = function(x) floor(x$LON) == 0, site = "Cambridge"),
  list(condition = function(x) floor(x$LAT) == -23, site = "Windhoek")
)

# Assign sites based on latitude and longitude
weather$Site <- NA
for (mapping in site_mapping) {
  weather$Site[mapping$condition(weather)] <- mapping$site
}

# Calculate summary statistics
years <- unique(weather$YEAR)
number_of_years <- length(years)

# Calculate the yearly length in days
year_length_in_days <- as.integer(table(weather$YEAR) / nlevels(factor(weather$Site)))

year_length_max <- max(year_length_in_days)

Prepare display order according to latitude:

# Create a function to format latitude
format_latitude <- function(lat) {
  paste(abs(round(lat, 2)), ifelse(lat < 0, "S", "N"))
}

# Create and process sites_latitude data frame
sites_latitude <- data.frame(
  Site = unique(weather$Site),
  Latitude = as.numeric(unique(weather$LAT))
)

# Sort sites_latitude by descending latitude
sites_latitude <- sites_latitude[order(-sites_latitude$Latitude), ]

# Format latitude values
sites_latitude$Latitude <- sapply(sites_latitude$Latitude, format_latitude)

# calculate easy references to sites
sites <- sites_latitude$Site
number_of_sites <- length(sites)

Compute statistics for each site and day of year:

# Define summary statistics function
calculate_summary <- function(data, column) {
  c(mean = mean(data[[column]], na.rm = TRUE),
    sd = sd(data[[column]], na.rm = TRUE),
    max = max(data[[column]], na.rm = TRUE),
    min = min(data[[column]], na.rm = TRUE),
    error = qt(0.975, length(data[[column]]) - 1) * 
      sd(data[[column]], na.rm = TRUE) / 
      sqrt(length(data[[column]])))
}

# Initialize weather_summary as a data frame
weather_summary <- data.frame(
  Site = character(),
  dayOfYear = integer(),
  solarRadiation.mean = numeric(),
  solarRadiation.sd = numeric(),
  solarRadiation.max = numeric(),
  solarRadiation.min = numeric(),
  solarRadiation.error = numeric(),
  solarRadiationTop.mean = numeric(),
  temperature.mean = numeric(),
  temperature.sd = numeric(),
  temperature.max = numeric(),
  temperature.min = numeric(),
  temperature.error = numeric(),
  maxTemperature.mean = numeric(),
  maxTemperature.max = numeric(),
  maxTemperature.min = numeric(),
  maxTemperature.error = numeric(),
  minTemperature.mean = numeric(),
  minTemperature.max = numeric(),
  minTemperature.min = numeric(),
  minTemperature.error = numeric(),
  temperature.lowerDeviation = numeric(),
  temperature.lowerDeviation.error = numeric(),
  temperature.upperDeviation = numeric(),
  temperature.upperDeviation.error = numeric(),
  precipitation.mean = numeric(),
  precipitation.max = numeric(),
  precipitation.min = numeric(),
  precipitation.error = numeric()
)

# Pre-allocate the weather_summary data frame
total_rows <- length(sites) * 366
weather_summary <- weather_summary[rep(1, total_rows), ]

# Main loop
row_index <- 1
for (site in sites) {
  for (day in 1:366) {
    weather_site_day <- weather[weather$Site == site & weather$DOY == day, ]
    
    if (nrow(weather_site_day) == 0) next
    
    weather_summary[row_index, "Site"] <- site
    weather_summary[row_index, "dayOfYear"] <- day
    
    # Solar radiation
    solar_summary <- calculate_summary(weather_site_day, "ALLSKY_SFC_SW_DWN")
    weather_summary[row_index, c("solarRadiation.mean", "solarRadiation.sd", 
                                "solarRadiation.max", "solarRadiation.min", 
                                "solarRadiation.error")] <- solar_summary
    
    weather_summary[row_index, "solarRadiationTop.mean"] <- mean(weather_site_day$ALLSKY_TOA_SW_DWN, na.rm = TRUE)
    
    # Temperature
    temp_summary <- calculate_summary(weather_site_day, "T2M")
    weather_summary[row_index, c("temperature.mean", "temperature.sd", 
                                "temperature.max", "temperature.min", 
                                "temperature.error")] <- temp_summary
    
    # Max temperature
    max_temp_summary <- calculate_summary(weather_site_day, "T2M_MAX")
    weather_summary[row_index, c("maxTemperature.mean", "maxTemperature.max", 
                                "maxTemperature.min", "maxTemperature.error")] <- max_temp_summary[c("mean", "max", "min", "error")]
    
    # Min temperature
    min_temp_summary <- calculate_summary(weather_site_day, "T2M_MIN")
    weather_summary[row_index, c("minTemperature.mean", "minTemperature.max", 
                                "minTemperature.min", "minTemperature.error")] <- min_temp_summary[c("mean", "max", "min", "error")]
    
    # Temperature deviations
    lower_dev <- weather_site_day$T2M - weather_site_day$T2M_MIN
    upper_dev <- weather_site_day$T2M_MAX - weather_site_day$T2M
    
    weather_summary[row_index, "temperature.lowerDeviation"] <- mean(lower_dev, na.rm = TRUE)
    weather_summary[row_index, "temperature.lowerDeviation.error"] <- qt(0.975, length(lower_dev) - 1) * 
      sd(lower_dev, na.rm = TRUE) / sqrt(length(lower_dev))
    
    weather_summary[row_index, "temperature.upperDeviation"] <- mean(upper_dev, na.rm = TRUE)
    weather_summary[row_index, "temperature.upperDeviation.error"] <- qt(0.975, length(upper_dev) - 1) * 
      sd(upper_dev, na.rm = TRUE) / sqrt(length(upper_dev))
    
    # Precipitation
    precip_summary <- calculate_summary(weather_site_day, "PRECTOT")
    weather_summary[row_index, c("precipitation.mean", "precipitation.max", 
                                "precipitation.min", "precipitation.error")] <- precip_summary[c("mean", "max", "min", "error")]
    
    row_index <- row_index + 1
  }
}

# Remove any unused rows
weather_summary <- weather_summary[1:(row_index-1), ]

7.2 Estimation of annual cumulative precipitation hyperparameters based on weather dataset

Declare auxiliary objects for estimating the precipitation cumulative curve with optim:

# Define the objective function for optimization
objective_function <- function(params, observed_data) {
  predicted_data <- gen_cum_precipitation_of_year(
    plateau_value = params[1], 
    inflection1 = params[2], rate1 = params[3], 
    inflection2 = params[4], rate2 = params[5], 
    year_length = length(observed_data),
    n_samples = params[6], 
    max_sample_size = params[7], 
    seed = SEED
  )
  sum((observed_data - predicted_data)^2)
}

7.2.1 Test an isolated version of the estimation of cumulative precipitation hyperparameters using `optim`

Prepare data for Cambridge site:

cambridge_data <- subset(weather, Site == "Cambridge")
cum_precip <- get_cumulative_precipitation(
  daily_precipitation = cambridge_data$PRECTOT, 
  years = cambridge_data$YEAR
)
cambridge_curves <- split(cum_precip, cambridge_data$YEAR)

Choose a good initial guess:

cambridge_initial_guess <- c(0.5, 122, 0.005, 243, 0.005, 180, 15)

cambridge_initial_guess_curve <- gen_cum_precipitation_of_year(
    plateau_value = cambridge_initial_guess[1], 
    inflection1 =  cambridge_initial_guess[2], rate1 = cambridge_initial_guess[3], 
    inflection2 = cambridge_initial_guess[4], rate2 = cambridge_initial_guess[5], 
    year_length = YEAR_LENGTH,
    n_samples = cambridge_initial_guess[6], 
    max_sample_size = cambridge_initial_guess[7], 
    seed = SEED
    )

Visually assess initial guess:

plot(cambridge_curves[[1]], type = 'l', col = 1, lwd = 3, ylab = 'curve')
for (i in 2:length(cambridge_curves))
{
  lines(cambridge_curves[[i]], col = i,  lwd = 3)
}
lines(cambridge_initial_guess_curve, col = "black", lwd = 4, lty = 2)

Perform parameter estimation with best initial guess:

cambridge_estimation_result <- estimate_hyperparameters_optim(
  curves = cambridge_curves,
  objective_function = objective_function,
  method = "L-BFGS-B",
  lower = c(0, 1, 0.01, 1, 0.01, 1, 3),
  upper = c(1, 365, 0.9, 365, 0.9, 365, 30),
  initial_guess = cambridge_initial_guess
)

Use parameter estimations to generate curves for each year:

cambridge_best_estimation_curves <- list()

for (year in years)
{
  fit_year <- cambridge_estimation_result$curve_fits[[as.character(year)]]
  
  cambridge_best_estimation_curve <- gen_cum_precipitation_of_year(
    plateau_value = fit_year$par[1], 
    inflection1 =  fit_year$par[2], rate1 = fit_year$par[3], 
    inflection2 = fit_year$par[4], rate2 = fit_year$par[5], 
    year_length = YEAR_LENGTH,
    n_samples = fit_year$par[6], 
    max_sample_size = fit_year$par[7], 
    seed = SEED
  )
  
  cambridge_best_estimation_curves[[as.character(year)]] <- cambridge_best_estimation_curve
}

Visualise fit for the first year:

plot(cambridge_curves[[1]], type = 'l', col = "grey", lwd = 3, xaxt = 'n', yaxt = 'n')
lines(cambridge_best_estimation_curves[[1]], 
      col = "black",
      lty = 2)

Visualise fit per year:

layout(matrix(1:length(cambridge_curves), nrow = 6, ncol = 4, byrow = TRUE))
par(mar = c(0.1, 0.1, 0.1, 0.1))
for (year in years) {
  plot(cambridge_curves[[as.character(year)]], type = 'l', col = as.character(year), lwd = 3, xaxt = 'n', yaxt = 'n')
  lines(cambridge_best_estimation_curves[[as.character(year)]], 
        col = "black",#as.character(year), 
        lty = 2)
  text(as.character(year), x = 20, y = 0.9)
}

7.2.2 Run estimation of cumulative precipitation hyperparameters for all sites

Prepare data for all sites:

cum_precip_per_site <- setNames(lapply(sites, function(site){
  site_data <- subset(weather, Site == site)
  cum_precip <- get_cumulative_precipitation(
    daily_precipitation = site_data$PRECTOT, 
    years = site_data$YEAR
  )
  site_curves <- split(cum_precip, site_data$YEAR)
}), sites)

Choose best initial guess per site:

initial_guesses <- setNames(lapply(sites, function(x) numeric(7)), sites)

initial_guesses[["Irkutsk"]] <- c(0.1, 60, 0.01, 200, 0.1, 180, 15)
initial_guesses[["Cambridge"]] <- c(0.5, 122, 0.005, 243, 0.005, 180, 15)
initial_guesses[["Rakhigarhi"]] <- c(0.2, 40, 0.1, 200, 0.1, 180, 15)
initial_guesses[["Pearl Harbor"]] <- c(0.8, 150, 0.005, 320, 0.1, 180, 15)
initial_guesses[["Windhoek"]] <- c(0.7, 80, 0.1, 330, 0.1, 180, 15)
initial_guesses[["Sao Paulo"]] <- c(0.6, 60, 0.1, 310, 0.1, 180, 15)
initial_guesses[["Hobart"]] <- c(0.5, 122, 0.005, 243, 0.005, 180, 15)

initial_guesses_curve <- lapply(initial_guesses, function(x) {
  gen_cum_precipitation_of_year(
    plateau_value = x[1], 
    inflection1 =  x[2], rate1 = x[3], 
    inflection2 = x[4], rate2 = x[5], 
    year_length = YEAR_LENGTH,
    n_samples = x[6], 
    max_sample_size = x[7], 
    seed = SEED
  )
})

Visually assess initial guess:

layout(matrix(1:(length(sites)+1), nrow = 2, ncol = 4, byrow = TRUE))
par(mar = c(0.1, 0.1, 0.1, 0.1))

for (site in sites) {
  plot(cum_precip_per_site[[site]][[1]], type = 'l', col = 1, lwd = 3, xaxt = 'n', yaxt = 'n')
  for (i in 2:length(cum_precip_per_site[[site]]))
  {
    lines(cum_precip_per_site[[site]][[i]], col = i,  lwd = 3)
  }
  lines(initial_guesses_curve[[site]], col = "black", lwd = 4, lty = 2)
  text(site, x = 340, y = 0.05, adj = 1)
}
plot(c(0, 1), c(0, 1), ann = FALSE, bty = 'n', type = 'n', xaxt = 'n', yaxt = 'n')

Perform parameter estimation for each site and year with best initial guess:

# Initialize an empty list to store results
estimation_results <- list()

# Iterate over all sites
for (site in sites)
{
  site_data <- subset(weather, Site == site)
  
  cum_precip <- get_cumulative_precipitation(
    daily_precipitation = site_data$PRECTOT, 
    years = site_data$YEAR
    )
  
  curves <- split(cum_precip, site_data$YEAR)
  
  estimation_results[[site]] <- estimate_hyperparameters_optim(
    curves = curves,
    objective_function = objective_function,
    method = "L-BFGS-B",
    lower = c(0, 1, 0.01, 1, 0.01, 1, 3),
    upper = c(1, 365, 0.9, 365, 0.9, 365, 30),
    initial_guess = initial_guesses[[site]]
  )
}

Use parameter estimations to generate curves for each site:

best_estimation_curves <- list()

for (site in sites)
{
  for (year in years)
  {
    fit_year <- estimation_results[[site]]$curve_fits[[as.character(year)]]
    
    best_estimation_curve <- gen_cum_precipitation_of_year(
      plateau_value = fit_year$par[1], 
      inflection1 =  fit_year$par[2], rate1 = fit_year$par[3], 
      inflection2 = fit_year$par[4], rate2 = fit_year$par[5], 
      year_length = YEAR_LENGTH,
      n_samples = fit_year$par[6], 
      max_sample_size = fit_year$par[7], 
      seed = SEED
    )
    
    best_estimation_curves[[site]][[as.character(year)]] <- best_estimation_curve
  }
}

Visually assess fit of multiple years per site:

layout(matrix(1:(length(sites)+1), nrow = 2, ncol = 4, byrow = TRUE))
par(mar = c(0.1, 0.1, 0.1, 0.1))

for (site in sites) {
  plot(c(0, max(year_length_in_days)), c(0, 1), ann = FALSE, bty = 'n', type = 'n', xaxt = 'n', yaxt = 'n')
  for (year in years)
  {
    lines(cum_precip_per_site[[site]][[as.character(year)]], col = year,  lwd = 3)
    lines(best_estimation_curves[[site]][[as.character(year)]], col = year, lwd = 3, lty = 2)
  }
  text(site, x = 340, y = 0.05, adj = 1)
}
plot(c(0, 1), c(0, 1), ann = FALSE, bty = 'n', type = 'n', xaxt = 'n', yaxt = 'n')

Estimate a single parameter setting per site by averaging values per year:

best_estimation_fits_mean <- list()

for (site in sites)
{
  parameters_per_year <- list()
  for (year in years)
  {
    parameters_per_year[[as.character(year)]] <- estimation_results[[site]]$curve_fits[[as.character(year)]]$par
  }
  best_estimation_fits_mean[[site]]$mean <- apply(data.frame(parameters_per_year), 1, mean)
  best_estimation_fits_mean[[site]]$sd <- apply(data.frame(parameters_per_year), 1, sd)
}

Use mean parameter estimations to generate curves for each site:

best_estimation_curves_mean <- list()

for (site in sites)
{
  fit_site <- best_estimation_fits_mean[[site]]$mean
  
  best_estimation_curve_mean <- gen_cum_precipitation_of_year(
    plateau_value = fit_site[1], 
    inflection1 =  fit_site[2], rate1 = fit_site[3], 
    inflection2 = fit_site[4], rate2 = fit_site[5], 
    year_length = YEAR_LENGTH,
    n_samples = fit_site[6], 
    max_sample_size = fit_site[7], 
    seed = SEED
  )
  
  best_estimation_curves_mean[[site]] <- best_estimation_curve_mean
}

Visually assess fit of the single estimation per site:

layout(matrix(1:(length(sites)+1), nrow = 2, ncol = 4, byrow = TRUE))
par(mar = c(0.1, 0.1, 0.1, 0.1))

for (site in sites) {
  plot(c(0, max(year_length_in_days)), c(0, 1), ann = FALSE, bty = 'n', type = 'n', xaxt = 'n', yaxt = 'n')
  for (year in years)
  {
    lines(cum_precip_per_site[[site]][[as.character(year)]], col = year,  lwd = 3)
  }
  lines(best_estimation_curves_mean[[site]], col = "black", lwd = 4, lty = 2)
  text(site, x = 340, y = 0.05, adj = 1)
}
plot(c(0, 1), c(0, 1), ann = FALSE, bty = 'n', type = 'n', xaxt = 'n', yaxt = 'n')

best_estimation_fits_mean_table <- list()

for (site in sites)
{
  best_estimation_fits_mean_table[[site]] <- paste0(
    round(best_estimation_fits_mean[[site]]$mean, digits = 4), 
    " (&PlusMinus;", round(best_estimation_fits_mean[[site]]$sd, digits = 4), ")")
}
best_estimation_fits_mean_table <- as.data.frame(best_estimation_fits_mean_table, 
                  row.names = c("plateau value", "inflection1", "rate1", "inflection2", "rate2", "n_samples", "max_sample_size"))

knitr::kable(best_estimation_fits_mean_table)

	Irkutsk	Cambridge	Rakhigarhi	Pearl.Harbor	Windhoek	Sao.Paulo	Hobart
plateau value	0.0935 (±0.1015)	0.3902 (±0.2624)	0.2267 (±0.1762)	0.783 (±0.19)	0.814 (±0.1738)	0.5705 (±0.1077)	0.3987 (±0.281)
inflection1	60.909 (±4.4393)	116.5566 (±24.6686)	34.8833 (±11.7029)	69.9943 (±69.6337)	27.9262 (±26.9712)	43.2431 (±23.0397)	122.0008 (±0.0077)
rate1	0.1268 (±0.2551)	0.0474 (±0.1816)	0.5323 (±0.3584)	0.1046 (±0.2554)	0.3787 (±0.3929)	0.3253 (±0.3593)	0.01 (±0)
inflection2	204.3544 (±9.2733)	262.4333 (±44.5082)	210.5208 (±11.3796)	325.012 (±12.8186)	333.2413 (±11.3875)	319.3132 (±14.9556)	253.1712 (±34.4429)
rate2	0.0352 (±0.0279)	0.01 (±0)	0.3347 (±0.4006)	0.3834 (±0.4011)	0.4163 (±0.3817)	0.0292 (±0.0095)	0.0106 (±0.0012)
n_samples	178.7062 (±3.5977)	181.7599 (±1.9596)	178.646 (±10.3818)	179.9609 (±9.3249)	176.6509 (±19.3775)	182.5667 (±3.5898)	181.4085 (±4.7813)
max_sample_size	15.0996 (±3.9922)	13.0235 (±2.2968)	17.2914 (±4.5612)	13.2541 (±6.3016)	17.0167 (±5.7825)	16.5374 (±1.8574)	13.6198 (±2.5149)

This approach seems not to work well on rate1 and rate2 standard deviations, which are estimated too high within the relative scale of a logistic rate. For example, Windhoek gets 0.3787 (±0.3929), according to which a normal probability distribution would cover most of the 0-1 range. A similar problem occurs with the Pearl Harbor’s and Windhoek’s inflection1 standard deviation.

Since the purpose is to test the potential fit of the Weather model, not the optimisation approach, we proceed to divide by a third all instances of standard deviations.

sd_adjustment <- 0.3

for (site in sites)
{
  best_estimation_fits_mean[[site]]$sd[3] <- best_estimation_fits_mean[[site]]$sd[3] * sd_adjustment
  best_estimation_fits_mean[[site]]$sd[5] <- best_estimation_fits_mean[[site]]$sd[5] * sd_adjustment
}
best_estimation_fits_mean[["Pearl Harbor"]]$sd[2] <- best_estimation_fits_mean[["Pearl Harbor"]]$sd[2] * sd_adjustment
best_estimation_fits_mean[["Windhoek"]]$sd[2] <- best_estimation_fits_mean[["Windhoek"]]$sd[2] * sd_adjustment

best_estimation_fits_mean_table <- list()

for (site in sites)
{
  best_estimation_fits_mean_table[[site]] <- paste0(
    round(best_estimation_fits_mean[[site]]$mean, digits = 4), 
    " (&PlusMinus;", round(best_estimation_fits_mean[[site]]$sd, digits = 4), ")")
}
best_estimation_fits_mean_table <- as.data.frame(best_estimation_fits_mean_table, 
                  row.names = c("plateau value", "inflection1", "rate1", "inflection2", "rate2", "n_samples", "max_sample_size"))

knitr::kable(best_estimation_fits_mean_table)

	Irkutsk	Cambridge	Rakhigarhi	Pearl.Harbor	Windhoek	Sao.Paulo	Hobart
plateau value	0.0935 (±0.1015)	0.3902 (±0.2624)	0.2267 (±0.1762)	0.783 (±0.19)	0.814 (±0.1738)	0.5705 (±0.1077)	0.3987 (±0.281)
inflection1	60.909 (±4.4393)	116.5566 (±24.6686)	34.8833 (±11.7029)	69.9943 (±20.8901)	27.9262 (±8.0914)	43.2431 (±23.0397)	122.0008 (±0.0077)
rate1	0.1268 (±0.0765)	0.0474 (±0.0545)	0.5323 (±0.1075)	0.1046 (±0.0766)	0.3787 (±0.1179)	0.3253 (±0.1078)	0.01 (±0)
inflection2	204.3544 (±9.2733)	262.4333 (±44.5082)	210.5208 (±11.3796)	325.012 (±12.8186)	333.2413 (±11.3875)	319.3132 (±14.9556)	253.1712 (±34.4429)
rate2	0.0352 (±0.0084)	0.01 (±0)	0.3347 (±0.1202)	0.3834 (±0.1203)	0.4163 (±0.1145)	0.0292 (±0.0028)	0.0106 (±4e-04)
n_samples	178.7062 (±3.5977)	181.7599 (±1.9596)	178.646 (±10.3818)	179.9609 (±9.3249)	176.6509 (±19.3775)	182.5667 (±3.5898)	181.4085 (±4.7813)
max_sample_size	15.0996 (±3.9922)	13.0235 (±2.2968)	17.2914 (±4.5612)	13.2541 (±6.3016)	17.0167 (±5.7825)	16.5374 (±1.8574)	13.6198 (±2.5149)

7.3 Running the entire Weather model using all estimated parameters

Calculate yearly summary statistics matching parameter inputs for each example location:

# Define summary function for a single site
calculate_site_summary <- function(site_data) {
  # Daily aggregated statistics
  daily_temp_mean <- aggregate(site_data$T2M, by = list(site_data$DOY), FUN = mean)
  daily_temp_sd <- aggregate(site_data$T2M, by = list(site_data$DOY), FUN = sd)
  daily_solar_mean <- aggregate(site_data$ALLSKY_SFC_SW_DWN, by = list(site_data$DOY), FUN = mean)
  daily_solar_sd <- aggregate(site_data$ALLSKY_SFC_SW_DWN, by = list(site_data$DOY), FUN = sd)
  
  # Yearly precipitation aggregation
  annual_sum <- aggregate(site_data$PRECTOT, by = list(site_data$YEAR), FUN = sum)
  
  # Return computed values as a named list
  list(
    temp_annual_max = max(daily_temp_mean$x, na.rm = TRUE),
    temp_annual_min = min(daily_temp_mean$x, na.rm = TRUE),
    temp_daily_fluctuation = mean(daily_temp_sd$x, na.rm = TRUE),
    temp_daily_lower_dev = mean(site_data$T2M - site_data$T2M_MIN, na.rm = TRUE),
    temp_daily_upper_dev = mean(site_data$T2M_MAX - site_data$T2M, na.rm = TRUE),
    solar_annual_max = max(daily_solar_mean$x, na.rm = TRUE),
    solar_annual_min = min(daily_solar_mean$x, na.rm = TRUE),
    solar_daily_fluctuation = mean(daily_solar_sd$x, na.rm = TRUE),
    precip_annual_sum_mean = mean(annual_sum$x, na.rm = TRUE),
    precip_annual_sum_sd = sd(annual_sum$x, na.rm = TRUE)
  )
}

# Apply the function across sites
annual_weather_summary <- lapply(split(weather, weather$Site), calculate_site_summary)

# Convert the list of summaries into a data frame
annual_weather_summary_df <- do.call(rbind, annual_weather_summary)
#annual_weather_summary_df <- cbind(Site = names(annual_weather_summary), annual_weather_summary_df)

# Ensure the data frame structure is consistent
annual_weather_summary_df <- as.data.frame(annual_weather_summary_df)
#rownames(annual_weather_summary_df) <- NULL

Initialise experiments per site using annual summary statistics and estimated yearly cumulative precipitation parameters of example locations as parameter inputs:

weather_model_runs <- list()

for (site in sites)
{
  estimation_optim <- best_estimation_fits_mean[[site]]
  
  weather_model_runs[[site]] <- initialise_weather_model(
    year_length = year_length_in_days,
    seed = SEED,
    albedo = 0.4,
    is_southern_hemisphere = weather[weather$Site == site,"LAT"][1] < 0,
    temp_annual_max = annual_weather_summary_df$temp_annual_max[[site]],
    temp_annual_min = annual_weather_summary_df$temp_annual_min[[site]],
    temp_daily_fluctuation = annual_weather_summary_df$temp_daily_fluctuation[[site]],
    temp_daily_lower_dev = annual_weather_summary_df$temp_daily_lower_dev[[site]],
    temp_daily_upper_dev = annual_weather_summary_df$temp_daily_upper_dev[[site]],
    
    solar_annual_max = annual_weather_summary_df$solar_annual_max[[site]],
    solar_annual_min = annual_weather_summary_df$solar_annual_min[[site]],
    solar_daily_fluctuation = annual_weather_summary_df$solar_daily_fluctuation[[site]],
    
    precip_annual_sum_mean = annual_weather_summary_df$precip_annual_sum_mean[[site]],
    precip_annual_sum_sd = annual_weather_summary_df$precip_annual_sum_sd[[site]],
    
    precip_plateau_value_mean = estimation_optim$mean[1],
    precip_plateau_value_sd = estimation_optim$sd[1],
    precip_inflection1_mean = estimation_optim$mean[2],
    precip_inflection1_sd = estimation_optim$sd[2],
    precip_rate1_mean = estimation_optim$mean[3],
    precip_rate1_sd = estimation_optim$sd[3],
    precip_inflection2_mean = estimation_optim$mean[4],
    precip_inflection2_sd = estimation_optim$sd[4],
    precip_rate2_mean = estimation_optim$mean[5],
    precip_rate2_sd = estimation_optim$sd[5],
    precip_n_samples_mean = estimation_optim$mean[6],
    precip_n_samples_sd = estimation_optim$sd[6],
    precip_max_sample_size_mean = estimation_optim$mean[7],
    precip_max_sample_size_sd = estimation_optim$sd[7]
  )
}

Run experiments:

for (site in sites)
{
  weather_model_runs[[site]] <-
    run_weather_model(weather_model_runs[[site]], number_of_years)
}

Create a data frame containing the daily summary statistics of simulations comparable to the one for the real data:

# Function to calculate summary statistics for a single day's data
calculate_daily_summary <- function(day_data) {
  # Solar radiation
  solar_mean <- mean(day_data$solar_radiation, na.rm = TRUE)
  solar_sd <- sd(day_data$solar_radiation, na.rm = TRUE)
  solar_max <- max(day_data$solar_radiation, na.rm = TRUE)
  solar_min <- min(day_data$solar_radiation, na.rm = TRUE)
  solar_error <- qt(0.975, df = max(length(day_data$solar_radiation) - 1, 1)) * 
                 solar_sd / sqrt(length(day_data$solar_radiation))

  # Temperature
  temp_mean <- mean(day_data$temperature, na.rm = TRUE)
  temp_sd <- sd(day_data$temperature, na.rm = TRUE)
  temp_max <- max(day_data$temperature, na.rm = TRUE)
  temp_min <- min(day_data$temperature, na.rm = TRUE)
  temp_error <- qt(0.975, df = max(length(day_data$temperature) - 1, 1)) * 
                temp_sd / sqrt(length(day_data$temperature))

  # Max temperature
  max_temp_mean <- mean(day_data$temperature_max, na.rm = TRUE)
  max_temp_max <- max(day_data$temperature_max, na.rm = TRUE)
  max_temp_min <- min(day_data$temperature_max, na.rm = TRUE)
  max_temp_error <- qt(0.975, df = max(length(day_data$temperature_max) - 1, 1)) * 
                    sd(day_data$temperature_max, na.rm = TRUE) /
                    sqrt(length(day_data$temperature_max))

  # Min temperature
  min_temp_mean <- mean(day_data$temperature_min, na.rm = TRUE)
  min_temp_max <- max(day_data$temperature_min, na.rm = TRUE)
  min_temp_min <- min(day_data$temperature_min, na.rm = TRUE)
  min_temp_error <- qt(0.975, df = max(length(day_data$temperature_min) - 1, 1)) * 
                    sd(day_data$temperature_min, na.rm = TRUE) /
                    sqrt(length(day_data$temperature_min))

  # Deviations
  lower_dev <- mean(day_data$temperature - day_data$temperature_min, na.rm = TRUE)
  lower_dev_error <- qt(0.975, df = max(length(day_data$temperature_min) - 1, 1)) * 
                     sd(day_data$temperature - day_data$temperature_min, na.rm = TRUE) /
                     sqrt(length(day_data$temperature_min))

  upper_dev <- mean(day_data$temperature_max - day_data$temperature, na.rm = TRUE)
  upper_dev_error <- qt(0.975, df = max(length(day_data$temperature_max) - 1, 1)) * 
                     sd(day_data$temperature_max - day_data$temperature, na.rm = TRUE) /
                     sqrt(length(day_data$temperature_max))

  # Precipitation
  precip_mean <- mean(day_data$precipitation, na.rm = TRUE)
  precip_max <- max(day_data$precipitation, na.rm = TRUE)
  precip_min <- min(day_data$precipitation, na.rm = TRUE)
  precip_error <- qt(0.975, df = max(length(day_data$precipitation) - 1, 1)) * 
                  sd(day_data$precipitation, na.rm = TRUE) /
                  sqrt(length(day_data$precipitation))

  # Combine results into a named list
  list(
    solarRadiation.mean = solar_mean,
    solarRadiation.sd = solar_sd,
    solarRadiation.max = solar_max,
    solarRadiation.min = solar_min,
    solarRadiation.error = solar_error,
    temperature.mean = temp_mean,
    temperature.sd = temp_sd,
    temperature.max = temp_max,
    temperature.min = temp_min,
    temperature.error = temp_error,
    maxTemperature.mean = max_temp_mean,
    maxTemperature.max = max_temp_max,
    maxTemperature.min = max_temp_min,
    maxTemperature.error = max_temp_error,
    minTemperature.mean = min_temp_mean,
    minTemperature.max = min_temp_max,
    minTemperature.min = min_temp_min,
    minTemperature.error = min_temp_error,
    temperature.lowerDeviation = lower_dev,
    temperature.lowerDeviation.error = lower_dev_error,
    temperature.upperDeviation = upper_dev,
    temperature.upperDeviation.error = upper_dev_error,
    precipitation.mean = precip_mean,
    precipitation.max = precip_max,
    precipitation.min = precip_min,
    precipitation.error = precip_error
  )
}

# Process data for all sites and days
weather_summary_sim <- do.call(rbind, lapply(sites, function(site) {
  site_data <- as.data.frame(weather_model_runs[[site]]$daily)
  do.call(rbind, lapply(1:max(year_length_in_days), function(day) {
    day_data <- site_data[site_data$current_day_of_year == day,]
    as.data.frame(list(
      Site = site, 
      day_of_year = day, 
      calculate_daily_summary(day_data)
      ))
  }))
}))

# Convert to a data frame
weather_summary_sim <- as.data.frame(weather_summary_sim)

7.4 Creating figure

Set colours for real and simulated data:

realDataColour = hsv(200/360, 62/100, 63/100) # teal

simulatedDataColour = hsv(24/360, 79/100, 89/100) # orange

Create figure:

# Helper functions
round_to_multiple <- function(x, base, round_fn = round) {
  round_fn(x / base) * base
}

create_polygon <- function(x, y1, y2, alpha = 0.5, col = "black") {
  polygon(c(x, rev(x)), c(y1, rev(y2)), col = adjustcolor(col, alpha = alpha), border = NA)
}

plot_weather_variable <- function(x, y, ylim, lwd, col = "black", lty = 1) {
  plot(x, y, axes = FALSE, ylim = ylim, type = "l", lwd = lwd, col = col, lty = lty)
}

add_confidence_interval <- function(x, y_mean, error, col, alpha = 0.5) {
  create_polygon(x, y_mean + error, y_mean, alpha, col)
  create_polygon(x, y_mean - error, y_mean, alpha, col)
}

add_min_max_interval <- function(x, y_mean, y_min, y_max, col, alpha = 0.3) {
  create_polygon(x, y_max, y_mean, alpha, col)
  create_polygon(x, y_min, y_mean, alpha, col)
}

# Main plotting function

plot_weather_summary_comparison <- function(weather_summary, sites, sites_latitude, weather) {
  # Setup plot
  num_columns <- length(sites) + 1
  num_rows_except_bottom <- 4
  
  layout_matrix <- rbind(
    matrix(1:(num_columns * num_rows_except_bottom), nrow = num_rows_except_bottom, ncol = num_columns, byrow = FALSE),
    c((num_columns * num_rows_except_bottom) + 1, rep((num_columns * num_rows_except_bottom) + 2, length(sites)))
  )
  
  layout(layout_matrix,
         widths = c(3, 12, rep(10, length(sites) - 2), 14),
         heights = c(3, 10, 10, 12, 2))
  
  # Y-axis labels
  y_labs <- c(expression(paste("solar radiation (", MJ/m^-2, ")")),
             "temperature (C)", "precipitation (mm)")
  
  # Calculate ranges
  range_solar <- c(
    round_to_multiple(min(
      min(weather_summary$solarRadiation.min),
      min(weather_summary_sim$solarRadiation.min)), 
      5, floor),
    round_to_multiple(max(
      max(weather_summary$solarRadiation.max),
      40),
      #max(weather_summary_sim$solarRadiation.max)), 
      ## an outlier in Sao Paulo brings it to c. 46 and does not show with the polygon
      5, ceiling)
  )
  range_temp <- c(
    round_to_multiple(min(
      min(weather_summary$minTemperature.min),
      min(weather_summary_sim$minTemperature.min)), 
      5, floor),
    round_to_multiple(max(
      max(weather_summary$maxTemperature.max),
      max(weather_summary_sim$maxTemperature.max)),
      5, ceiling)
  )
  range_precip <- c(
    round_to_multiple(min(
      min(weather_summary$precipitation.min),
      min(weather_summary_sim$precipitation.min)), 
      5, floor),
    round_to_multiple(max(
      max(weather_summary$precipitation.max),
      max(weather_summary_sim$precipitation.max)),
      5, ceiling)
  )
  
  # Plot settings
  par(cex = graphic_scale, cex.axis = graphic_scale * (font_rescale + axis_text_rescale))
  
  # First column: y axis titles
  for (i in 1:4) {
    par(mar = c(0, 0, 0, 0.4))
    plot(c(0, 1), c(0, 1), ann = FALSE, bty = 'n', type = 'n', xaxt = 'n', yaxt = 'n')
    if (i > 1) {
      text(x = 0.5, y = 0.5, font = 4, 
           cex = graphic_scale * (0.78 + font_rescale + margin_text_rescale_left), 
           srt = 90,
           labels = y_labs[i-1])
    }
  }
  
  # Plot for each site
  for (site in sites) {
    weather_site <- weather[weather$Site == site,]
    weather_model_site <- weather_model_runs[[site]]$daily
    weather_summary_site <- weather_summary[weather_summary$Site == site,]
    weather_summary_site_sim <- weather_summary_sim[weather_summary_sim$Site == site,]
    
    left_plot_margin <- ifelse(site == sites[1], 2, 0.1)
    right_plot_margin <- ifelse(site == sites[length(sites)], 4, 0.1)
    
    # Site name + latitude
    par(mar = c(0.2, left_plot_margin, 0.1, right_plot_margin))
    plot(c(0, 1), c(0, 1), ann = FALSE, bty = 'n', type = 'n', xaxt = 'n', yaxt = 'n')
    text(x = 0.5, y = 0.5, font = 4, 
         cex = graphic_scale * (0.7 + font_rescale + margin_text_rescale_top),
         labels = paste(site, sites_latitude$Latitude[sites_latitude$Site == site], sep = "\n"))
    
    # Solar radiation
    
    # original data
    par(mar = c(0.1, left_plot_margin, 0.1, right_plot_margin))
    plot_weather_variable(1:year_length_max, weather_summary_site$solarRadiation.mean, 
                          range_solar, graphic_scale, 
                          col = adjustcolor(realDataColour, alpha.f = 1))
    add_confidence_interval(1:year_length_max, weather_summary_site$solarRadiation.mean, 
                            weather_summary_site$solarRadiation.error, 
                            adjustcolor(realDataColour, alpha.f = 0.75))
    add_min_max_interval(1:year_length_max, 
                         weather_summary_site$solarRadiation.mean, 
                         weather_summary_site$solarRadiation.min, 
                         weather_summary_site$solarRadiation.max, 
                         adjustcolor(realDataColour, alpha.f = 0.5))
    
    # simulations
    lines(1:year_length_max, weather_summary_site_sim$solarRadiation.mean, 
          lwd = graphic_scale, 
          col = adjustcolor(simulatedDataColour, alpha.f = 1))
    add_confidence_interval(1:year_length_max, 
                            weather_summary_site_sim$solarRadiation.mean, 
                            weather_summary_site_sim$solarRadiation.error, 
                            adjustcolor(simulatedDataColour, alpha.f = 0.75))
    add_min_max_interval(1:year_length_max, 
                         weather_summary_site_sim$solarRadiation.mean, 
                         weather_summary_site_sim$solarRadiation.min, 
                         weather_summary_site_sim$solarRadiation.max, 
                         adjustcolor(simulatedDataColour, alpha.f = 0.5))
    
    # solstices and axes
    #lines(1:year_length_max, weather_summary_site$solarRadiationTop.mean, lty = 2, lwd = graphic_scale)
    
    abline(v = c(SOLSTICE_SUMMER, SOLSTICE_WINTER), lty = 3, lwd = graphic_scale)
    
    if (site == sites[1]) {
      axis(2, at = seq(range_solar[1], range_solar[2], 5))
    }
  
    # Temperature
    
    # original data
    plot_weather_variable(1:year_length_max, weather_summary_site$temperature.mean, 
                          range_temp, graphic_scale,
                          col = adjustcolor(realDataColour, alpha.f = 1))
    add_confidence_interval(1:year_length_max, 
                            weather_summary_site$temperature.mean, 
                            weather_summary_site$temperature.error,
                            adjustcolor(realDataColour, alpha.f = 0.75))
    add_min_max_interval(1:year_length_max, 
                         weather_summary_site$temperature.mean, 
                         weather_summary_site$temperature.min, 
                         weather_summary_site$temperature.max, 
                         adjustcolor(realDataColour, alpha.f = 0.5))
    
    lines(1:year_length_max, weather_summary_site$maxTemperature.mean, 
          lwd = graphic_scale, 
          col = adjustcolor(realDataColour, alpha.f = 1))
    add_confidence_interval(1:year_length_max, 
                            weather_summary_site$maxTemperature.mean, 
                            weather_summary_site$maxTemperature.error, 
                            col = adjustcolor(realDataColour, alpha.f = 0.75))
    add_min_max_interval(1:year_length_max, 
                         weather_summary_site$maxTemperature.mean, 
                         weather_summary_site$maxTemperature.min, 
                         weather_summary_site$maxTemperature.max, 
                         adjustcolor(realDataColour, alpha.f = 0.5))
    
    lines(1:year_length_max, weather_summary_site$minTemperature.mean, 
          lwd = graphic_scale, 
          col = adjustcolor(realDataColour, alpha.f = 1))
    add_confidence_interval(1:year_length_max, 
                            weather_summary_site$minTemperature.mean, 
                            weather_summary_site$minTemperature.error, 
                            adjustcolor(realDataColour, alpha.f = 0.75))
    add_min_max_interval(1:year_length_max, 
                         weather_summary_site$minTemperature.mean, 
                         weather_summary_site$minTemperature.min, 
                         weather_summary_site$minTemperature.max, 
                         adjustcolor(realDataColour, alpha.f = 0.5))

    # simulations
    lines(1:year_length_max, weather_summary_site_sim$temperature.mean, 
          lwd = graphic_scale,
          col = adjustcolor(simulatedDataColour, alpha.f = 1))
    add_confidence_interval(1:year_length_max, 
                            weather_summary_site_sim$temperature.mean, 
                            weather_summary_site_sim$temperature.error,
                            adjustcolor(simulatedDataColour, alpha.f = 0.75))
    add_min_max_interval(1:year_length_max, 
                         weather_summary_site_sim$temperature.mean, 
                         weather_summary_site_sim$temperature.min, 
                         weather_summary_site_sim$temperature.max, 
                         adjustcolor(simulatedDataColour, alpha.f = 0.5))
    
    lines(1:year_length_max, weather_summary_site_sim$maxTemperature.mean, 
          lwd = graphic_scale, 
          col = adjustcolor(simulatedDataColour, alpha.f = 1))
    add_confidence_interval(1:year_length_max, 
                            weather_summary_site_sim$maxTemperature.mean, 
                            weather_summary_site_sim$maxTemperature.error, 
                            col = adjustcolor(simulatedDataColour, alpha.f = 0.75))
    add_min_max_interval(1:year_length_max, 
                         weather_summary_site_sim$maxTemperature.mean, 
                         weather_summary_site_sim$maxTemperature.min, 
                         weather_summary_site_sim$maxTemperature.max, 
                         adjustcolor(simulatedDataColour, alpha.f = 0.5))
    
    lines(1:year_length_max, weather_summary_site_sim$minTemperature.mean, 
          lwd = graphic_scale, 
          col = adjustcolor(simulatedDataColour, alpha.f = 1))
    add_confidence_interval(1:year_length_max, 
                            weather_summary_site_sim$minTemperature.mean, 
                            weather_summary_site_sim$minTemperature.error, 
                            adjustcolor(simulatedDataColour, alpha.f = 0.75))
    add_min_max_interval(1:year_length_max, 
                         weather_summary_site_sim$minTemperature.mean, 
                         weather_summary_site_sim$minTemperature.min, 
                         weather_summary_site_sim$minTemperature.max, 
                         adjustcolor(simulatedDataColour, alpha.f = 0.5))

    # solstices and axes
    abline(v = c(SOLSTICE_SUMMER, SOLSTICE_WINTER), lty = 3, lwd = graphic_scale)
    
    if (site == sites[1]) {
      axis(2, at = seq(range_temp[1], range_temp[2], 5))
    }

    # Precipitation
    par(mar = c(8, left_plot_margin, 0.1, right_plot_margin))
    
    # cumulative precipitation
    plot(c(1, year_length_max), c(0, 1), ann = FALSE, bty = 'n', type = 'n', xaxt = 'n', yaxt = 'n')
    # original data
    for (year in years) {
      site_year_data <- weather_site$PRECTOT[weather_site$YEAR == year]
      lines(1:length(site_year_data), 
            get_cumulative_precipitation_of_year(site_year_data), 
            lwd = graphic_scale, 
            col = adjustcolor(realDataColour, alpha.f = 0.5))
    }
    # simulation
    for (year in 1:number_of_years) {
      site_year_data_sim <- weather_model_site$precipitation[weather_model_site$current_year == year]
      lines(1:length(site_year_data_sim), 
            get_cumulative_precipitation_of_year(site_year_data_sim), 
            lwd = graphic_scale, 
            col = adjustcolor(simulatedDataColour, alpha.f = 0.5))
    }
    
    if (site == sites[length(sites)]) {
      axis(4, at = seq(0, 1, 0.25))
      mtext("cumulative annual sum", 4, line = 2.5, cex = graphic_scale * (font_rescale + margin_text_rescale_right))
    }
    
    # daily precipitation
    par(new = TRUE, mar = c(3, left_plot_margin, 0.1, right_plot_margin))
    
    # original data
    plot_weather_variable(1:year_length_max, 
                          weather_summary_site$precipitation.mean, 
                          range_precip, 
                          graphic_scale, 
                          col = adjustcolor(realDataColour, alpha.f = 0.5))
    add_confidence_interval(1:year_length_max, 
                            weather_summary_site$precipitation.mean, 
                            weather_summary_site$precipitation.error, 
                            adjustcolor(realDataColour, alpha.f = 0.5))
    add_min_max_interval(1:year_length_max, 
                         weather_summary_site$precipitation.mean, 
                         weather_summary_site$precipitation.min, 
                         weather_summary_site$precipitation.max, 
                         adjustcolor(realDataColour, alpha.f = 0.5))
    
    # simulation
    lines(1:year_length_max, 
          weather_summary_site_sim$precipitation.mean, 
          lwd = graphic_scale, 
          col = adjustcolor(simulatedDataColour, alpha.f = 0.5))
    add_confidence_interval(1:year_length_max, 
                            weather_summary_site_sim$precipitation.mean, 
                            weather_summary_site_sim$precipitation.error, 
                            adjustcolor(simulatedDataColour, alpha.f = 0.5))
    add_min_max_interval(1:year_length_max, 
                         weather_summary_site_sim$precipitation.mean, 
                         weather_summary_site_sim$precipitation.min, 
                         weather_summary_site_sim$precipitation.max, 
                         adjustcolor(simulatedDataColour, alpha.f = 0.5))
    
    # solstices and axes
    abline(v = c(SOLSTICE_SUMMER, SOLSTICE_WINTER), lty = 3, lwd = graphic_scale)
    
    if (site == sites[1]) {
      axis(2, at = seq(range_precip[1], range_precip[2], 50))
    }
    
    axis(1, at = cumsum(c(31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)), las = 2)
  }
  
  # Bottom row: "day of year" label
  par(mar = c(0, 0, 0, 0))
  plot(c(0, 1), c(0, 1), ann = FALSE, bty = 'n', type = 'n', xaxt = 'n', yaxt = 'n')
  plot(c(0, 1), c(0, 1), ann = FALSE, bty = 'n', type = 'n', xaxt = 'n', yaxt = 'n')
  text(x = 0.5, y = 0.7, font = 4, 
       cex = graphic_scale * (0.8 + font_rescale + margin_text_rescale_bottom),
       labels = "day of year")
}

# Main execution
plot_name <- file.path(output_dir, paste0("Fig6-ValidationUsingExamples.", plot_file_format))

if (plot_file_format == "png") {
  graphic_scale <- 1
  font_rescale <- 0.3
  axis_text_rescale <- 0.5
  margin_text_rescale_top <- 0.4
  margin_text_rescale_left <- 0
  margin_text_rescale_right <- 1
  margin_text_rescale_bottom <- 0.7

  png(plot_name, width = number_of_sites * graphic_scale * 114, height = graphic_scale * 534)
} else if (plot_file_format == "eps") {
  graphic_scale = 1.2
  font_rescale = 0.1
  axis_text_rescale = 0.8
  margin_text_rescale_top <- 0.3
  margin_text_rescale_left <- 0.1
  margin_text_rescale_right <- 1.1
  margin_text_rescale_bottom <- 0.7

  extrafont::loadfonts(device = "postscript")
  grDevices::cairo_ps(filename = plot_name ,
                      pointsize = 12,
                      width = number_of_sites * graphic_scale * 1.5,
                      height = graphic_scale * 8,
                      onefile = FALSE,
                      family = "sans"
                      )
}
plot_weather_summary_comparison(weather_summary, sites, sites_latitude, weather)
dev.off()

svg 
  2

knitr::include_graphics(plot_name)