<- "output"
output_dir <- c("png", "eps")[1] # modify index number to change format plot_file_format
7 Calibration targeting weather examples
7.1 Preparation
Choose file format for generated figures:
Load source file containing the R implementation of the Weather model:
source("source/weatherModel.R")
source("source/estimate_hyperparameters_optim.R")
Set simulation constants:
<- 0
SEED <- 365 # ignoring leap year adjustment
YEAR_LENGTH <- 172 # June 21st (approx.)
SOLSTICE_SUMMER <- 355 # December 21st (approx.) SOLSTICE_WINTER
As a final part in this demonstration, we will extend the above process to deal with multiple instances of curves and parameter sets, generated by the same configuration of hyperparameters. We will then want to estimate those original hyperparameter values.
We use the data downloaded at NASA´s POWER access viewer (power.larc.nasa.gov/data-access-viewer/) selecting the user community ‘Agroclimatology’ and pin pointing the different locations between 01/01/1984 and 31/12/2007. The exact locations are:
- Rakhigarhi, Haryana, India (Latitude: 29.1687, Longitude: 76.0687)
- Irkutsk, Irkutsk Óblast, Russia (Latitude: 52.2891, Longitude: 104.2493)
- Hobart, Tasmania, Australia (Latitude: -42.8649, Longitude: 147.3441)
- Pearl Harbor, Hawaii, United States of America (Latitude: 21.376, Longitude: -157.9708)
- São Paulo, Brazil (Latitude: -23.5513, Longitude: -46.6344)
- Cambridge, United Kingdom (Latitude: 52.2027, Longitude: 0.122)
- Windhoek, Namibia (Latitude: -22.5718, Longitude: 17.0953)
We selected the ICASA Format’s parameters:
- Precipitation (PRECTOT)
- Wind speed at 2m (WS2M)
- Relative Humidity at 2m (RH2M)
- Dew/frost point at 2m (T2MDEW)
- Maximum temperature at 2m (T2M_MAX)
- Minimum temperature at 2m (T2M_MIN)
- All sky insolation incident on a horizontal surface (ALLSKY_SFC_SW_DWN)
- Temperature at 2m (T2M)
and from Solar Related Parameters:
- Top-of-atmosphere Insolation (ALLSKY_TOA_SW_DWN)
# Function to read and filter weather data
<- function(file_path) {
read_weather_data <- read.csv(file_path, skip = 18)
data $YEAR %in% 1984:2007, ]
data[data
}
# Get input file paths
<- list.files(path = "input", full.names = TRUE)
input_files
# Read and combine all weather data
<- do.call(rbind, lapply(input_files, read_weather_data))
weather
# Define site mapping
<- list(
site_mapping list(condition = function(x) floor(x$LAT) == 29, site = "Rakhigarhi"),
list(condition = function(x) floor(x$LON) == 104, site = "Irkutsk"),
list(condition = function(x) floor(x$LAT) == -43, site = "Hobart"),
list(condition = function(x) floor(x$LAT) == 21, site = "Pearl Harbor"),
list(condition = function(x) floor(x$LAT) == -24, site = "Sao Paulo"),
list(condition = function(x) floor(x$LON) == 0, site = "Cambridge"),
list(condition = function(x) floor(x$LAT) == -23, site = "Windhoek")
)
# Assign sites based on latitude and longitude
$Site <- NA
weatherfor (mapping in site_mapping) {
$Site[mapping$condition(weather)] <- mapping$site
weather
}
# Calculate summary statistics
<- unique(weather$YEAR)
years <- length(years)
number_of_years
# Calculate the yearly length in days
<- as.integer(table(weather$YEAR) / nlevels(factor(weather$Site)))
year_length_in_days
<- max(year_length_in_days) year_length_max
Prepare display order according to latitude:
# Create a function to format latitude
<- function(lat) {
format_latitude paste(abs(round(lat, 2)), ifelse(lat < 0, "S", "N"))
}
# Create and process sites_latitude data frame
<- data.frame(
sites_latitude Site = unique(weather$Site),
Latitude = as.numeric(unique(weather$LAT))
)
# Sort sites_latitude by descending latitude
<- sites_latitude[order(-sites_latitude$Latitude), ]
sites_latitude
# Format latitude values
$Latitude <- sapply(sites_latitude$Latitude, format_latitude)
sites_latitude
# calculate easy references to sites
<- sites_latitude$Site
sites <- length(sites) number_of_sites
Compute statistics for each site and day of year:
# Define summary statistics function
<- function(data, column) {
calculate_summary c(mean = mean(data[[column]], na.rm = TRUE),
sd = sd(data[[column]], na.rm = TRUE),
max = max(data[[column]], na.rm = TRUE),
min = min(data[[column]], na.rm = TRUE),
error = qt(0.975, length(data[[column]]) - 1) *
sd(data[[column]], na.rm = TRUE) /
sqrt(length(data[[column]])))
}
# Initialize weather_summary as a data frame
<- data.frame(
weather_summary Site = character(),
dayOfYear = integer(),
solarRadiation.mean = numeric(),
solarRadiation.sd = numeric(),
solarRadiation.max = numeric(),
solarRadiation.min = numeric(),
solarRadiation.error = numeric(),
solarRadiationTop.mean = numeric(),
temperature.mean = numeric(),
temperature.sd = numeric(),
temperature.max = numeric(),
temperature.min = numeric(),
temperature.error = numeric(),
maxTemperature.mean = numeric(),
maxTemperature.max = numeric(),
maxTemperature.min = numeric(),
maxTemperature.error = numeric(),
minTemperature.mean = numeric(),
minTemperature.max = numeric(),
minTemperature.min = numeric(),
minTemperature.error = numeric(),
temperature.lowerDeviation = numeric(),
temperature.lowerDeviation.error = numeric(),
temperature.upperDeviation = numeric(),
temperature.upperDeviation.error = numeric(),
precipitation.mean = numeric(),
precipitation.max = numeric(),
precipitation.min = numeric(),
precipitation.error = numeric()
)
# Pre-allocate the weather_summary data frame
<- length(sites) * 366
total_rows <- weather_summary[rep(1, total_rows), ]
weather_summary
# Main loop
<- 1
row_index for (site in sites) {
for (day in 1:366) {
<- weather[weather$Site == site & weather$DOY == day, ]
weather_site_day
if (nrow(weather_site_day) == 0) next
"Site"] <- site
weather_summary[row_index, "dayOfYear"] <- day
weather_summary[row_index,
# Solar radiation
<- calculate_summary(weather_site_day, "ALLSKY_SFC_SW_DWN")
solar_summary c("solarRadiation.mean", "solarRadiation.sd",
weather_summary[row_index, "solarRadiation.max", "solarRadiation.min",
"solarRadiation.error")] <- solar_summary
"solarRadiationTop.mean"] <- mean(weather_site_day$ALLSKY_TOA_SW_DWN, na.rm = TRUE)
weather_summary[row_index,
# Temperature
<- calculate_summary(weather_site_day, "T2M")
temp_summary c("temperature.mean", "temperature.sd",
weather_summary[row_index, "temperature.max", "temperature.min",
"temperature.error")] <- temp_summary
# Max temperature
<- calculate_summary(weather_site_day, "T2M_MAX")
max_temp_summary c("maxTemperature.mean", "maxTemperature.max",
weather_summary[row_index, "maxTemperature.min", "maxTemperature.error")] <- max_temp_summary[c("mean", "max", "min", "error")]
# Min temperature
<- calculate_summary(weather_site_day, "T2M_MIN")
min_temp_summary c("minTemperature.mean", "minTemperature.max",
weather_summary[row_index, "minTemperature.min", "minTemperature.error")] <- min_temp_summary[c("mean", "max", "min", "error")]
# Temperature deviations
<- weather_site_day$T2M - weather_site_day$T2M_MIN
lower_dev <- weather_site_day$T2M_MAX - weather_site_day$T2M
upper_dev
"temperature.lowerDeviation"] <- mean(lower_dev, na.rm = TRUE)
weather_summary[row_index, "temperature.lowerDeviation.error"] <- qt(0.975, length(lower_dev) - 1) *
weather_summary[row_index, sd(lower_dev, na.rm = TRUE) / sqrt(length(lower_dev))
"temperature.upperDeviation"] <- mean(upper_dev, na.rm = TRUE)
weather_summary[row_index, "temperature.upperDeviation.error"] <- qt(0.975, length(upper_dev) - 1) *
weather_summary[row_index, sd(upper_dev, na.rm = TRUE) / sqrt(length(upper_dev))
# Precipitation
<- calculate_summary(weather_site_day, "PRECTOT")
precip_summary c("precipitation.mean", "precipitation.max",
weather_summary[row_index, "precipitation.min", "precipitation.error")] <- precip_summary[c("mean", "max", "min", "error")]
<- row_index + 1
row_index
}
}
# Remove any unused rows
<- weather_summary[1:(row_index-1), ] weather_summary
7.2 Estimation of annual cumulative precipitation hyperparameters based on weather dataset
Declare auxiliary objects for estimating the precipitation cumulative curve with optim
:
# Define the objective function for optimization
<- function(params, observed_data) {
objective_function <- gen_cum_precipitation_of_year(
predicted_data plateau_value = params[1],
inflection1 = params[2], rate1 = params[3],
inflection2 = params[4], rate2 = params[5],
year_length = length(observed_data),
n_samples = params[6],
max_sample_size = params[7],
seed = SEED
)sum((observed_data - predicted_data)^2)
}
7.2.1 Test an isolated version of the estimation of cumulative precipitation hyperparameters using optim
Prepare data for Cambridge site:
<- subset(weather, Site == "Cambridge")
cambridge_data <- get_cumulative_precipitation(
cum_precip daily_precipitation = cambridge_data$PRECTOT,
years = cambridge_data$YEAR
)<- split(cum_precip, cambridge_data$YEAR) cambridge_curves
Choose a good initial guess:
<- c(0.5, 122, 0.005, 243, 0.005, 180, 15)
cambridge_initial_guess
<- gen_cum_precipitation_of_year(
cambridge_initial_guess_curve plateau_value = cambridge_initial_guess[1],
inflection1 = cambridge_initial_guess[2], rate1 = cambridge_initial_guess[3],
inflection2 = cambridge_initial_guess[4], rate2 = cambridge_initial_guess[5],
year_length = YEAR_LENGTH,
n_samples = cambridge_initial_guess[6],
max_sample_size = cambridge_initial_guess[7],
seed = SEED
)
Visually assess initial guess:
plot(cambridge_curves[[1]], type = 'l', col = 1, lwd = 3, ylab = 'curve')
for (i in 2:length(cambridge_curves))
{lines(cambridge_curves[[i]], col = i, lwd = 3)
}lines(cambridge_initial_guess_curve, col = "black", lwd = 4, lty = 2)
Perform parameter estimation with best initial guess:
<- estimate_hyperparameters_optim(
cambridge_estimation_result curves = cambridge_curves,
objective_function = objective_function,
method = "L-BFGS-B",
lower = c(0, 1, 0.01, 1, 0.01, 1, 3),
upper = c(1, 365, 0.9, 365, 0.9, 365, 30),
initial_guess = cambridge_initial_guess
)
Use parameter estimations to generate curves for each year:
<- list()
cambridge_best_estimation_curves
for (year in years)
{<- cambridge_estimation_result$curve_fits[[as.character(year)]]
fit_year
<- gen_cum_precipitation_of_year(
cambridge_best_estimation_curve plateau_value = fit_year$par[1],
inflection1 = fit_year$par[2], rate1 = fit_year$par[3],
inflection2 = fit_year$par[4], rate2 = fit_year$par[5],
year_length = YEAR_LENGTH,
n_samples = fit_year$par[6],
max_sample_size = fit_year$par[7],
seed = SEED
)
as.character(year)]] <- cambridge_best_estimation_curve
cambridge_best_estimation_curves[[ }
Visualise fit for the first year:
plot(cambridge_curves[[1]], type = 'l', col = "grey", lwd = 3, xaxt = 'n', yaxt = 'n')
lines(cambridge_best_estimation_curves[[1]],
col = "black",
lty = 2)
Visualise fit per year:
layout(matrix(1:length(cambridge_curves), nrow = 6, ncol = 4, byrow = TRUE))
par(mar = c(0.1, 0.1, 0.1, 0.1))
for (year in years) {
plot(cambridge_curves[[as.character(year)]], type = 'l', col = as.character(year), lwd = 3, xaxt = 'n', yaxt = 'n')
lines(cambridge_best_estimation_curves[[as.character(year)]],
col = "black",#as.character(year),
lty = 2)
text(as.character(year), x = 20, y = 0.9)
}
7.2.2 Run estimation of cumulative precipitation hyperparameters for all sites
Prepare data for all sites:
<- setNames(lapply(sites, function(site){
cum_precip_per_site <- subset(weather, Site == site)
site_data <- get_cumulative_precipitation(
cum_precip daily_precipitation = site_data$PRECTOT,
years = site_data$YEAR
)<- split(cum_precip, site_data$YEAR)
site_curves }), sites)
Choose best initial guess per site:
<- setNames(lapply(sites, function(x) numeric(7)), sites)
initial_guesses
"Irkutsk"]] <- c(0.1, 60, 0.01, 200, 0.1, 180, 15)
initial_guesses[["Cambridge"]] <- c(0.5, 122, 0.005, 243, 0.005, 180, 15)
initial_guesses[["Rakhigarhi"]] <- c(0.2, 40, 0.1, 200, 0.1, 180, 15)
initial_guesses[["Pearl Harbor"]] <- c(0.8, 150, 0.005, 320, 0.1, 180, 15)
initial_guesses[["Windhoek"]] <- c(0.7, 80, 0.1, 330, 0.1, 180, 15)
initial_guesses[["Sao Paulo"]] <- c(0.6, 60, 0.1, 310, 0.1, 180, 15)
initial_guesses[["Hobart"]] <- c(0.5, 122, 0.005, 243, 0.005, 180, 15)
initial_guesses[[
<- lapply(initial_guesses, function(x) {
initial_guesses_curve gen_cum_precipitation_of_year(
plateau_value = x[1],
inflection1 = x[2], rate1 = x[3],
inflection2 = x[4], rate2 = x[5],
year_length = YEAR_LENGTH,
n_samples = x[6],
max_sample_size = x[7],
seed = SEED
) })
Visually assess initial guess:
layout(matrix(1:(length(sites)+1), nrow = 2, ncol = 4, byrow = TRUE))
par(mar = c(0.1, 0.1, 0.1, 0.1))
for (site in sites) {
plot(cum_precip_per_site[[site]][[1]], type = 'l', col = 1, lwd = 3, xaxt = 'n', yaxt = 'n')
for (i in 2:length(cum_precip_per_site[[site]]))
{lines(cum_precip_per_site[[site]][[i]], col = i, lwd = 3)
}lines(initial_guesses_curve[[site]], col = "black", lwd = 4, lty = 2)
text(site, x = 340, y = 0.05, adj = 1)
}plot(c(0, 1), c(0, 1), ann = FALSE, bty = 'n', type = 'n', xaxt = 'n', yaxt = 'n')
Perform parameter estimation for each site and year with best initial guess:
# Initialize an empty list to store results
<- list()
estimation_results
# Iterate over all sites
for (site in sites)
{<- subset(weather, Site == site)
site_data
<- get_cumulative_precipitation(
cum_precip daily_precipitation = site_data$PRECTOT,
years = site_data$YEAR
)
<- split(cum_precip, site_data$YEAR)
curves
<- estimate_hyperparameters_optim(
estimation_results[[site]] curves = curves,
objective_function = objective_function,
method = "L-BFGS-B",
lower = c(0, 1, 0.01, 1, 0.01, 1, 3),
upper = c(1, 365, 0.9, 365, 0.9, 365, 30),
initial_guess = initial_guesses[[site]]
) }
Use parameter estimations to generate curves for each site:
<- list()
best_estimation_curves
for (site in sites)
{for (year in years)
{<- estimation_results[[site]]$curve_fits[[as.character(year)]]
fit_year
<- gen_cum_precipitation_of_year(
best_estimation_curve plateau_value = fit_year$par[1],
inflection1 = fit_year$par[2], rate1 = fit_year$par[3],
inflection2 = fit_year$par[4], rate2 = fit_year$par[5],
year_length = YEAR_LENGTH,
n_samples = fit_year$par[6],
max_sample_size = fit_year$par[7],
seed = SEED
)
as.character(year)]] <- best_estimation_curve
best_estimation_curves[[site]][[
} }
Visually assess fit of multiple years per site:
layout(matrix(1:(length(sites)+1), nrow = 2, ncol = 4, byrow = TRUE))
par(mar = c(0.1, 0.1, 0.1, 0.1))
for (site in sites) {
plot(c(0, max(year_length_in_days)), c(0, 1), ann = FALSE, bty = 'n', type = 'n', xaxt = 'n', yaxt = 'n')
for (year in years)
{lines(cum_precip_per_site[[site]][[as.character(year)]], col = year, lwd = 3)
lines(best_estimation_curves[[site]][[as.character(year)]], col = year, lwd = 3, lty = 2)
}text(site, x = 340, y = 0.05, adj = 1)
}plot(c(0, 1), c(0, 1), ann = FALSE, bty = 'n', type = 'n', xaxt = 'n', yaxt = 'n')
Estimate a single parameter setting per site by averaging values per year:
<- list()
best_estimation_fits_mean
for (site in sites)
{<- list()
parameters_per_year for (year in years)
{as.character(year)]] <- estimation_results[[site]]$curve_fits[[as.character(year)]]$par
parameters_per_year[[
}$mean <- apply(data.frame(parameters_per_year), 1, mean)
best_estimation_fits_mean[[site]]$sd <- apply(data.frame(parameters_per_year), 1, sd)
best_estimation_fits_mean[[site]] }
Use mean parameter estimations to generate curves for each site:
<- list()
best_estimation_curves_mean
for (site in sites)
{<- best_estimation_fits_mean[[site]]$mean
fit_site
<- gen_cum_precipitation_of_year(
best_estimation_curve_mean plateau_value = fit_site[1],
inflection1 = fit_site[2], rate1 = fit_site[3],
inflection2 = fit_site[4], rate2 = fit_site[5],
year_length = YEAR_LENGTH,
n_samples = fit_site[6],
max_sample_size = fit_site[7],
seed = SEED
)
<- best_estimation_curve_mean
best_estimation_curves_mean[[site]] }
Visually assess fit of the single estimation per site:
layout(matrix(1:(length(sites)+1), nrow = 2, ncol = 4, byrow = TRUE))
par(mar = c(0.1, 0.1, 0.1, 0.1))
for (site in sites) {
plot(c(0, max(year_length_in_days)), c(0, 1), ann = FALSE, bty = 'n', type = 'n', xaxt = 'n', yaxt = 'n')
for (year in years)
{lines(cum_precip_per_site[[site]][[as.character(year)]], col = year, lwd = 3)
}lines(best_estimation_curves_mean[[site]], col = "black", lwd = 4, lty = 2)
text(site, x = 340, y = 0.05, adj = 1)
}plot(c(0, 1), c(0, 1), ann = FALSE, bty = 'n', type = 'n', xaxt = 'n', yaxt = 'n')
<- list()
best_estimation_fits_mean_table
for (site in sites)
{<- paste0(
best_estimation_fits_mean_table[[site]] round(best_estimation_fits_mean[[site]]$mean, digits = 4),
" (±", round(best_estimation_fits_mean[[site]]$sd, digits = 4), ")")
}<- as.data.frame(best_estimation_fits_mean_table,
best_estimation_fits_mean_table row.names = c("plateau value", "inflection1", "rate1", "inflection2", "rate2", "n_samples", "max_sample_size"))
::kable(best_estimation_fits_mean_table) knitr
Irkutsk | Cambridge | Rakhigarhi | Pearl.Harbor | Windhoek | Sao.Paulo | Hobart | |
---|---|---|---|---|---|---|---|
plateau value | 0.0935 (±0.1015) | 0.3902 (±0.2624) | 0.2267 (±0.1762) | 0.783 (±0.19) | 0.814 (±0.1738) | 0.5705 (±0.1077) | 0.3987 (±0.281) |
inflection1 | 60.909 (±4.4393) | 116.5566 (±24.6686) | 34.8833 (±11.7029) | 69.9943 (±69.6337) | 27.9262 (±26.9712) | 43.2431 (±23.0397) | 122.0008 (±0.0077) |
rate1 | 0.1268 (±0.2551) | 0.0474 (±0.1816) | 0.5323 (±0.3584) | 0.1046 (±0.2554) | 0.3787 (±0.3929) | 0.3253 (±0.3593) | 0.01 (±0) |
inflection2 | 204.3544 (±9.2733) | 262.4333 (±44.5082) | 210.5208 (±11.3796) | 325.012 (±12.8186) | 333.2413 (±11.3875) | 319.3132 (±14.9556) | 253.1712 (±34.4429) |
rate2 | 0.0352 (±0.0279) | 0.01 (±0) | 0.3347 (±0.4006) | 0.3834 (±0.4011) | 0.4163 (±0.3817) | 0.0292 (±0.0095) | 0.0106 (±0.0012) |
n_samples | 178.7062 (±3.5977) | 181.7599 (±1.9596) | 178.646 (±10.3818) | 179.9609 (±9.3249) | 176.6509 (±19.3775) | 182.5667 (±3.5898) | 181.4085 (±4.7813) |
max_sample_size | 15.0996 (±3.9922) | 13.0235 (±2.2968) | 17.2914 (±4.5612) | 13.2541 (±6.3016) | 17.0167 (±5.7825) | 16.5374 (±1.8574) | 13.6198 (±2.5149) |
This approach seems not to work well on rate1
and rate2
standard deviations, which are estimated too high within the relative scale of a logistic rate. For example, Windhoek gets 0.3787 (±0.3929), according to which a normal probability distribution would cover most of the 0-1 range. A similar problem occurs with the Pearl Harbor’s and Windhoek’s inflection1
standard deviation.
Since the purpose is to test the potential fit of the Weather model, not the optimisation approach, we proceed to divide by a third all instances of standard deviations.
<- 0.3
sd_adjustment
for (site in sites)
{$sd[3] <- best_estimation_fits_mean[[site]]$sd[3] * sd_adjustment
best_estimation_fits_mean[[site]]$sd[5] <- best_estimation_fits_mean[[site]]$sd[5] * sd_adjustment
best_estimation_fits_mean[[site]]
}"Pearl Harbor"]]$sd[2] <- best_estimation_fits_mean[["Pearl Harbor"]]$sd[2] * sd_adjustment
best_estimation_fits_mean[["Windhoek"]]$sd[2] <- best_estimation_fits_mean[["Windhoek"]]$sd[2] * sd_adjustment best_estimation_fits_mean[[
<- list()
best_estimation_fits_mean_table
for (site in sites)
{<- paste0(
best_estimation_fits_mean_table[[site]] round(best_estimation_fits_mean[[site]]$mean, digits = 4),
" (±", round(best_estimation_fits_mean[[site]]$sd, digits = 4), ")")
}<- as.data.frame(best_estimation_fits_mean_table,
best_estimation_fits_mean_table row.names = c("plateau value", "inflection1", "rate1", "inflection2", "rate2", "n_samples", "max_sample_size"))
::kable(best_estimation_fits_mean_table) knitr
Irkutsk | Cambridge | Rakhigarhi | Pearl.Harbor | Windhoek | Sao.Paulo | Hobart | |
---|---|---|---|---|---|---|---|
plateau value | 0.0935 (±0.1015) | 0.3902 (±0.2624) | 0.2267 (±0.1762) | 0.783 (±0.19) | 0.814 (±0.1738) | 0.5705 (±0.1077) | 0.3987 (±0.281) |
inflection1 | 60.909 (±4.4393) | 116.5566 (±24.6686) | 34.8833 (±11.7029) | 69.9943 (±20.8901) | 27.9262 (±8.0914) | 43.2431 (±23.0397) | 122.0008 (±0.0077) |
rate1 | 0.1268 (±0.0765) | 0.0474 (±0.0545) | 0.5323 (±0.1075) | 0.1046 (±0.0766) | 0.3787 (±0.1179) | 0.3253 (±0.1078) | 0.01 (±0) |
inflection2 | 204.3544 (±9.2733) | 262.4333 (±44.5082) | 210.5208 (±11.3796) | 325.012 (±12.8186) | 333.2413 (±11.3875) | 319.3132 (±14.9556) | 253.1712 (±34.4429) |
rate2 | 0.0352 (±0.0084) | 0.01 (±0) | 0.3347 (±0.1202) | 0.3834 (±0.1203) | 0.4163 (±0.1145) | 0.0292 (±0.0028) | 0.0106 (±4e-04) |
n_samples | 178.7062 (±3.5977) | 181.7599 (±1.9596) | 178.646 (±10.3818) | 179.9609 (±9.3249) | 176.6509 (±19.3775) | 182.5667 (±3.5898) | 181.4085 (±4.7813) |
max_sample_size | 15.0996 (±3.9922) | 13.0235 (±2.2968) | 17.2914 (±4.5612) | 13.2541 (±6.3016) | 17.0167 (±5.7825) | 16.5374 (±1.8574) | 13.6198 (±2.5149) |
7.3 Running the entire Weather model using all estimated parameters
Calculate yearly summary statistics matching parameter inputs for each example location:
# Define summary function for a single site
<- function(site_data) {
calculate_site_summary # Daily aggregated statistics
<- aggregate(site_data$T2M, by = list(site_data$DOY), FUN = mean)
daily_temp_mean <- aggregate(site_data$T2M, by = list(site_data$DOY), FUN = sd)
daily_temp_sd <- aggregate(site_data$ALLSKY_SFC_SW_DWN, by = list(site_data$DOY), FUN = mean)
daily_solar_mean <- aggregate(site_data$ALLSKY_SFC_SW_DWN, by = list(site_data$DOY), FUN = sd)
daily_solar_sd
# Yearly precipitation aggregation
<- aggregate(site_data$PRECTOT, by = list(site_data$YEAR), FUN = sum)
annual_sum
# Return computed values as a named list
list(
temp_annual_max = max(daily_temp_mean$x, na.rm = TRUE),
temp_annual_min = min(daily_temp_mean$x, na.rm = TRUE),
temp_daily_fluctuation = mean(daily_temp_sd$x, na.rm = TRUE),
temp_daily_lower_dev = mean(site_data$T2M - site_data$T2M_MIN, na.rm = TRUE),
temp_daily_upper_dev = mean(site_data$T2M_MAX - site_data$T2M, na.rm = TRUE),
solar_annual_max = max(daily_solar_mean$x, na.rm = TRUE),
solar_annual_min = min(daily_solar_mean$x, na.rm = TRUE),
solar_daily_fluctuation = mean(daily_solar_sd$x, na.rm = TRUE),
precip_annual_sum_mean = mean(annual_sum$x, na.rm = TRUE),
precip_annual_sum_sd = sd(annual_sum$x, na.rm = TRUE)
)
}
# Apply the function across sites
<- lapply(split(weather, weather$Site), calculate_site_summary)
annual_weather_summary
# Convert the list of summaries into a data frame
<- do.call(rbind, annual_weather_summary)
annual_weather_summary_df #annual_weather_summary_df <- cbind(Site = names(annual_weather_summary), annual_weather_summary_df)
# Ensure the data frame structure is consistent
<- as.data.frame(annual_weather_summary_df)
annual_weather_summary_df #rownames(annual_weather_summary_df) <- NULL
Initialise experiments per site using annual summary statistics and estimated yearly cumulative precipitation parameters of example locations as parameter inputs:
<- list()
weather_model_runs
for (site in sites)
{<- best_estimation_fits_mean[[site]]
estimation_optim
<- initialise_weather_model(
weather_model_runs[[site]] year_length = year_length_in_days,
seed = SEED,
albedo = 0.4,
is_southern_hemisphere = weather[weather$Site == site,"LAT"][1] < 0,
temp_annual_max = annual_weather_summary_df$temp_annual_max[[site]],
temp_annual_min = annual_weather_summary_df$temp_annual_min[[site]],
temp_daily_fluctuation = annual_weather_summary_df$temp_daily_fluctuation[[site]],
temp_daily_lower_dev = annual_weather_summary_df$temp_daily_lower_dev[[site]],
temp_daily_upper_dev = annual_weather_summary_df$temp_daily_upper_dev[[site]],
solar_annual_max = annual_weather_summary_df$solar_annual_max[[site]],
solar_annual_min = annual_weather_summary_df$solar_annual_min[[site]],
solar_daily_fluctuation = annual_weather_summary_df$solar_daily_fluctuation[[site]],
precip_annual_sum_mean = annual_weather_summary_df$precip_annual_sum_mean[[site]],
precip_annual_sum_sd = annual_weather_summary_df$precip_annual_sum_sd[[site]],
precip_plateau_value_mean = estimation_optim$mean[1],
precip_plateau_value_sd = estimation_optim$sd[1],
precip_inflection1_mean = estimation_optim$mean[2],
precip_inflection1_sd = estimation_optim$sd[2],
precip_rate1_mean = estimation_optim$mean[3],
precip_rate1_sd = estimation_optim$sd[3],
precip_inflection2_mean = estimation_optim$mean[4],
precip_inflection2_sd = estimation_optim$sd[4],
precip_rate2_mean = estimation_optim$mean[5],
precip_rate2_sd = estimation_optim$sd[5],
precip_n_samples_mean = estimation_optim$mean[6],
precip_n_samples_sd = estimation_optim$sd[6],
precip_max_sample_size_mean = estimation_optim$mean[7],
precip_max_sample_size_sd = estimation_optim$sd[7]
) }
Run experiments:
for (site in sites)
{<-
weather_model_runs[[site]] run_weather_model(weather_model_runs[[site]], number_of_years)
}
Create a data frame containing the daily summary statistics of simulations comparable to the one for the real data:
# Function to calculate summary statistics for a single day's data
<- function(day_data) {
calculate_daily_summary # Solar radiation
<- mean(day_data$solar_radiation, na.rm = TRUE)
solar_mean <- sd(day_data$solar_radiation, na.rm = TRUE)
solar_sd <- max(day_data$solar_radiation, na.rm = TRUE)
solar_max <- min(day_data$solar_radiation, na.rm = TRUE)
solar_min <- qt(0.975, df = max(length(day_data$solar_radiation) - 1, 1)) *
solar_error / sqrt(length(day_data$solar_radiation))
solar_sd
# Temperature
<- mean(day_data$temperature, na.rm = TRUE)
temp_mean <- sd(day_data$temperature, na.rm = TRUE)
temp_sd <- max(day_data$temperature, na.rm = TRUE)
temp_max <- min(day_data$temperature, na.rm = TRUE)
temp_min <- qt(0.975, df = max(length(day_data$temperature) - 1, 1)) *
temp_error / sqrt(length(day_data$temperature))
temp_sd
# Max temperature
<- mean(day_data$temperature_max, na.rm = TRUE)
max_temp_mean <- max(day_data$temperature_max, na.rm = TRUE)
max_temp_max <- min(day_data$temperature_max, na.rm = TRUE)
max_temp_min <- qt(0.975, df = max(length(day_data$temperature_max) - 1, 1)) *
max_temp_error sd(day_data$temperature_max, na.rm = TRUE) /
sqrt(length(day_data$temperature_max))
# Min temperature
<- mean(day_data$temperature_min, na.rm = TRUE)
min_temp_mean <- max(day_data$temperature_min, na.rm = TRUE)
min_temp_max <- min(day_data$temperature_min, na.rm = TRUE)
min_temp_min <- qt(0.975, df = max(length(day_data$temperature_min) - 1, 1)) *
min_temp_error sd(day_data$temperature_min, na.rm = TRUE) /
sqrt(length(day_data$temperature_min))
# Deviations
<- mean(day_data$temperature - day_data$temperature_min, na.rm = TRUE)
lower_dev <- qt(0.975, df = max(length(day_data$temperature_min) - 1, 1)) *
lower_dev_error sd(day_data$temperature - day_data$temperature_min, na.rm = TRUE) /
sqrt(length(day_data$temperature_min))
<- mean(day_data$temperature_max - day_data$temperature, na.rm = TRUE)
upper_dev <- qt(0.975, df = max(length(day_data$temperature_max) - 1, 1)) *
upper_dev_error sd(day_data$temperature_max - day_data$temperature, na.rm = TRUE) /
sqrt(length(day_data$temperature_max))
# Precipitation
<- mean(day_data$precipitation, na.rm = TRUE)
precip_mean <- max(day_data$precipitation, na.rm = TRUE)
precip_max <- min(day_data$precipitation, na.rm = TRUE)
precip_min <- qt(0.975, df = max(length(day_data$precipitation) - 1, 1)) *
precip_error sd(day_data$precipitation, na.rm = TRUE) /
sqrt(length(day_data$precipitation))
# Combine results into a named list
list(
solarRadiation.mean = solar_mean,
solarRadiation.sd = solar_sd,
solarRadiation.max = solar_max,
solarRadiation.min = solar_min,
solarRadiation.error = solar_error,
temperature.mean = temp_mean,
temperature.sd = temp_sd,
temperature.max = temp_max,
temperature.min = temp_min,
temperature.error = temp_error,
maxTemperature.mean = max_temp_mean,
maxTemperature.max = max_temp_max,
maxTemperature.min = max_temp_min,
maxTemperature.error = max_temp_error,
minTemperature.mean = min_temp_mean,
minTemperature.max = min_temp_max,
minTemperature.min = min_temp_min,
minTemperature.error = min_temp_error,
temperature.lowerDeviation = lower_dev,
temperature.lowerDeviation.error = lower_dev_error,
temperature.upperDeviation = upper_dev,
temperature.upperDeviation.error = upper_dev_error,
precipitation.mean = precip_mean,
precipitation.max = precip_max,
precipitation.min = precip_min,
precipitation.error = precip_error
)
}
# Process data for all sites and days
<- do.call(rbind, lapply(sites, function(site) {
weather_summary_sim <- as.data.frame(weather_model_runs[[site]]$daily)
site_data do.call(rbind, lapply(1:max(year_length_in_days), function(day) {
<- site_data[site_data$current_day_of_year == day,]
day_data as.data.frame(list(
Site = site,
day_of_year = day,
calculate_daily_summary(day_data)
))
}))
}))
# Convert to a data frame
<- as.data.frame(weather_summary_sim) weather_summary_sim
7.4 Creating figure
Set colours for real and simulated data:
= hsv(200/360, 62/100, 63/100) # teal
realDataColour
= hsv(24/360, 79/100, 89/100) # orange simulatedDataColour
Create figure:
# Helper functions
<- function(x, base, round_fn = round) {
round_to_multiple round_fn(x / base) * base
}
<- function(x, y1, y2, alpha = 0.5, col = "black") {
create_polygon polygon(c(x, rev(x)), c(y1, rev(y2)), col = adjustcolor(col, alpha = alpha), border = NA)
}
<- function(x, y, ylim, lwd, col = "black", lty = 1) {
plot_weather_variable plot(x, y, axes = FALSE, ylim = ylim, type = "l", lwd = lwd, col = col, lty = lty)
}
<- function(x, y_mean, error, col, alpha = 0.5) {
add_confidence_interval create_polygon(x, y_mean + error, y_mean, alpha, col)
create_polygon(x, y_mean - error, y_mean, alpha, col)
}
<- function(x, y_mean, y_min, y_max, col, alpha = 0.3) {
add_min_max_interval create_polygon(x, y_max, y_mean, alpha, col)
create_polygon(x, y_min, y_mean, alpha, col)
}
# Main plotting function
<- function(weather_summary, sites, sites_latitude, weather) {
plot_weather_summary_comparison # Setup plot
<- length(sites) + 1
num_columns <- 4
num_rows_except_bottom
<- rbind(
layout_matrix matrix(1:(num_columns * num_rows_except_bottom), nrow = num_rows_except_bottom, ncol = num_columns, byrow = FALSE),
c((num_columns * num_rows_except_bottom) + 1, rep((num_columns * num_rows_except_bottom) + 2, length(sites)))
)
layout(layout_matrix,
widths = c(3, 12, rep(10, length(sites) - 2), 14),
heights = c(3, 10, 10, 12, 2))
# Y-axis labels
<- c(expression(paste("solar radiation (", MJ/m^-2, ")")),
y_labs "temperature (C)", "precipitation (mm)")
# Calculate ranges
<- c(
range_solar round_to_multiple(min(
min(weather_summary$solarRadiation.min),
min(weather_summary_sim$solarRadiation.min)),
5, floor),
round_to_multiple(max(
max(weather_summary$solarRadiation.max),
40),
#max(weather_summary_sim$solarRadiation.max)),
## an outlier in Sao Paulo brings it to c. 46 and does not show with the polygon
5, ceiling)
)<- c(
range_temp round_to_multiple(min(
min(weather_summary$minTemperature.min),
min(weather_summary_sim$minTemperature.min)),
5, floor),
round_to_multiple(max(
max(weather_summary$maxTemperature.max),
max(weather_summary_sim$maxTemperature.max)),
5, ceiling)
)<- c(
range_precip round_to_multiple(min(
min(weather_summary$precipitation.min),
min(weather_summary_sim$precipitation.min)),
5, floor),
round_to_multiple(max(
max(weather_summary$precipitation.max),
max(weather_summary_sim$precipitation.max)),
5, ceiling)
)
# Plot settings
par(cex = graphic_scale, cex.axis = graphic_scale * (font_rescale + axis_text_rescale))
# First column: y axis titles
for (i in 1:4) {
par(mar = c(0, 0, 0, 0.4))
plot(c(0, 1), c(0, 1), ann = FALSE, bty = 'n', type = 'n', xaxt = 'n', yaxt = 'n')
if (i > 1) {
text(x = 0.5, y = 0.5, font = 4,
cex = graphic_scale * (0.78 + font_rescale + margin_text_rescale_left),
srt = 90,
labels = y_labs[i-1])
}
}
# Plot for each site
for (site in sites) {
<- weather[weather$Site == site,]
weather_site <- weather_model_runs[[site]]$daily
weather_model_site <- weather_summary[weather_summary$Site == site,]
weather_summary_site <- weather_summary_sim[weather_summary_sim$Site == site,]
weather_summary_site_sim
<- ifelse(site == sites[1], 2, 0.1)
left_plot_margin <- ifelse(site == sites[length(sites)], 4, 0.1)
right_plot_margin
# Site name + latitude
par(mar = c(0.2, left_plot_margin, 0.1, right_plot_margin))
plot(c(0, 1), c(0, 1), ann = FALSE, bty = 'n', type = 'n', xaxt = 'n', yaxt = 'n')
text(x = 0.5, y = 0.5, font = 4,
cex = graphic_scale * (0.7 + font_rescale + margin_text_rescale_top),
labels = paste(site, sites_latitude$Latitude[sites_latitude$Site == site], sep = "\n"))
# Solar radiation
# original data
par(mar = c(0.1, left_plot_margin, 0.1, right_plot_margin))
plot_weather_variable(1:year_length_max, weather_summary_site$solarRadiation.mean,
range_solar, graphic_scale, col = adjustcolor(realDataColour, alpha.f = 1))
add_confidence_interval(1:year_length_max, weather_summary_site$solarRadiation.mean,
$solarRadiation.error,
weather_summary_siteadjustcolor(realDataColour, alpha.f = 0.75))
add_min_max_interval(1:year_length_max,
$solarRadiation.mean,
weather_summary_site$solarRadiation.min,
weather_summary_site$solarRadiation.max,
weather_summary_siteadjustcolor(realDataColour, alpha.f = 0.5))
# simulations
lines(1:year_length_max, weather_summary_site_sim$solarRadiation.mean,
lwd = graphic_scale,
col = adjustcolor(simulatedDataColour, alpha.f = 1))
add_confidence_interval(1:year_length_max,
$solarRadiation.mean,
weather_summary_site_sim$solarRadiation.error,
weather_summary_site_simadjustcolor(simulatedDataColour, alpha.f = 0.75))
add_min_max_interval(1:year_length_max,
$solarRadiation.mean,
weather_summary_site_sim$solarRadiation.min,
weather_summary_site_sim$solarRadiation.max,
weather_summary_site_simadjustcolor(simulatedDataColour, alpha.f = 0.5))
# solstices and axes
#lines(1:year_length_max, weather_summary_site$solarRadiationTop.mean, lty = 2, lwd = graphic_scale)
abline(v = c(SOLSTICE_SUMMER, SOLSTICE_WINTER), lty = 3, lwd = graphic_scale)
if (site == sites[1]) {
axis(2, at = seq(range_solar[1], range_solar[2], 5))
}
# Temperature
# original data
plot_weather_variable(1:year_length_max, weather_summary_site$temperature.mean,
range_temp, graphic_scale,col = adjustcolor(realDataColour, alpha.f = 1))
add_confidence_interval(1:year_length_max,
$temperature.mean,
weather_summary_site$temperature.error,
weather_summary_siteadjustcolor(realDataColour, alpha.f = 0.75))
add_min_max_interval(1:year_length_max,
$temperature.mean,
weather_summary_site$temperature.min,
weather_summary_site$temperature.max,
weather_summary_siteadjustcolor(realDataColour, alpha.f = 0.5))
lines(1:year_length_max, weather_summary_site$maxTemperature.mean,
lwd = graphic_scale,
col = adjustcolor(realDataColour, alpha.f = 1))
add_confidence_interval(1:year_length_max,
$maxTemperature.mean,
weather_summary_site$maxTemperature.error,
weather_summary_sitecol = adjustcolor(realDataColour, alpha.f = 0.75))
add_min_max_interval(1:year_length_max,
$maxTemperature.mean,
weather_summary_site$maxTemperature.min,
weather_summary_site$maxTemperature.max,
weather_summary_siteadjustcolor(realDataColour, alpha.f = 0.5))
lines(1:year_length_max, weather_summary_site$minTemperature.mean,
lwd = graphic_scale,
col = adjustcolor(realDataColour, alpha.f = 1))
add_confidence_interval(1:year_length_max,
$minTemperature.mean,
weather_summary_site$minTemperature.error,
weather_summary_siteadjustcolor(realDataColour, alpha.f = 0.75))
add_min_max_interval(1:year_length_max,
$minTemperature.mean,
weather_summary_site$minTemperature.min,
weather_summary_site$minTemperature.max,
weather_summary_siteadjustcolor(realDataColour, alpha.f = 0.5))
# simulations
lines(1:year_length_max, weather_summary_site_sim$temperature.mean,
lwd = graphic_scale,
col = adjustcolor(simulatedDataColour, alpha.f = 1))
add_confidence_interval(1:year_length_max,
$temperature.mean,
weather_summary_site_sim$temperature.error,
weather_summary_site_simadjustcolor(simulatedDataColour, alpha.f = 0.75))
add_min_max_interval(1:year_length_max,
$temperature.mean,
weather_summary_site_sim$temperature.min,
weather_summary_site_sim$temperature.max,
weather_summary_site_simadjustcolor(simulatedDataColour, alpha.f = 0.5))
lines(1:year_length_max, weather_summary_site_sim$maxTemperature.mean,
lwd = graphic_scale,
col = adjustcolor(simulatedDataColour, alpha.f = 1))
add_confidence_interval(1:year_length_max,
$maxTemperature.mean,
weather_summary_site_sim$maxTemperature.error,
weather_summary_site_simcol = adjustcolor(simulatedDataColour, alpha.f = 0.75))
add_min_max_interval(1:year_length_max,
$maxTemperature.mean,
weather_summary_site_sim$maxTemperature.min,
weather_summary_site_sim$maxTemperature.max,
weather_summary_site_simadjustcolor(simulatedDataColour, alpha.f = 0.5))
lines(1:year_length_max, weather_summary_site_sim$minTemperature.mean,
lwd = graphic_scale,
col = adjustcolor(simulatedDataColour, alpha.f = 1))
add_confidence_interval(1:year_length_max,
$minTemperature.mean,
weather_summary_site_sim$minTemperature.error,
weather_summary_site_simadjustcolor(simulatedDataColour, alpha.f = 0.75))
add_min_max_interval(1:year_length_max,
$minTemperature.mean,
weather_summary_site_sim$minTemperature.min,
weather_summary_site_sim$minTemperature.max,
weather_summary_site_simadjustcolor(simulatedDataColour, alpha.f = 0.5))
# solstices and axes
abline(v = c(SOLSTICE_SUMMER, SOLSTICE_WINTER), lty = 3, lwd = graphic_scale)
if (site == sites[1]) {
axis(2, at = seq(range_temp[1], range_temp[2], 5))
}
# Precipitation
par(mar = c(8, left_plot_margin, 0.1, right_plot_margin))
# cumulative precipitation
plot(c(1, year_length_max), c(0, 1), ann = FALSE, bty = 'n', type = 'n', xaxt = 'n', yaxt = 'n')
# original data
for (year in years) {
<- weather_site$PRECTOT[weather_site$YEAR == year]
site_year_data lines(1:length(site_year_data),
get_cumulative_precipitation_of_year(site_year_data),
lwd = graphic_scale,
col = adjustcolor(realDataColour, alpha.f = 0.5))
}# simulation
for (year in 1:number_of_years) {
<- weather_model_site$precipitation[weather_model_site$current_year == year]
site_year_data_sim lines(1:length(site_year_data_sim),
get_cumulative_precipitation_of_year(site_year_data_sim),
lwd = graphic_scale,
col = adjustcolor(simulatedDataColour, alpha.f = 0.5))
}
if (site == sites[length(sites)]) {
axis(4, at = seq(0, 1, 0.25))
mtext("cumulative annual sum", 4, line = 2.5, cex = graphic_scale * (font_rescale + margin_text_rescale_right))
}
# daily precipitation
par(new = TRUE, mar = c(3, left_plot_margin, 0.1, right_plot_margin))
# original data
plot_weather_variable(1:year_length_max,
$precipitation.mean,
weather_summary_site
range_precip,
graphic_scale, col = adjustcolor(realDataColour, alpha.f = 0.5))
add_confidence_interval(1:year_length_max,
$precipitation.mean,
weather_summary_site$precipitation.error,
weather_summary_siteadjustcolor(realDataColour, alpha.f = 0.5))
add_min_max_interval(1:year_length_max,
$precipitation.mean,
weather_summary_site$precipitation.min,
weather_summary_site$precipitation.max,
weather_summary_siteadjustcolor(realDataColour, alpha.f = 0.5))
# simulation
lines(1:year_length_max,
$precipitation.mean,
weather_summary_site_simlwd = graphic_scale,
col = adjustcolor(simulatedDataColour, alpha.f = 0.5))
add_confidence_interval(1:year_length_max,
$precipitation.mean,
weather_summary_site_sim$precipitation.error,
weather_summary_site_simadjustcolor(simulatedDataColour, alpha.f = 0.5))
add_min_max_interval(1:year_length_max,
$precipitation.mean,
weather_summary_site_sim$precipitation.min,
weather_summary_site_sim$precipitation.max,
weather_summary_site_simadjustcolor(simulatedDataColour, alpha.f = 0.5))
# solstices and axes
abline(v = c(SOLSTICE_SUMMER, SOLSTICE_WINTER), lty = 3, lwd = graphic_scale)
if (site == sites[1]) {
axis(2, at = seq(range_precip[1], range_precip[2], 50))
}
axis(1, at = cumsum(c(31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)), las = 2)
}
# Bottom row: "day of year" label
par(mar = c(0, 0, 0, 0))
plot(c(0, 1), c(0, 1), ann = FALSE, bty = 'n', type = 'n', xaxt = 'n', yaxt = 'n')
plot(c(0, 1), c(0, 1), ann = FALSE, bty = 'n', type = 'n', xaxt = 'n', yaxt = 'n')
text(x = 0.5, y = 0.7, font = 4,
cex = graphic_scale * (0.8 + font_rescale + margin_text_rescale_bottom),
labels = "day of year")
}
# Main execution
<- file.path(output_dir, paste0("Fig6-ValidationUsingExamples.", plot_file_format))
plot_name
if (plot_file_format == "png") {
<- 1
graphic_scale <- 0.3
font_rescale <- 0.5
axis_text_rescale <- 0.4
margin_text_rescale_top <- 0
margin_text_rescale_left <- 1
margin_text_rescale_right <- 0.7
margin_text_rescale_bottom
png(plot_name, width = number_of_sites * graphic_scale * 114, height = graphic_scale * 534)
else if (plot_file_format == "eps") {
} = 1.2
graphic_scale = 0.1
font_rescale = 0.8
axis_text_rescale <- 0.3
margin_text_rescale_top <- 0.1
margin_text_rescale_left <- 1.1
margin_text_rescale_right <- 0.7
margin_text_rescale_bottom
::loadfonts(device = "postscript")
extrafont::cairo_ps(filename = plot_name ,
grDevicespointsize = 12,
width = number_of_sites * graphic_scale * 1.5,
height = graphic_scale * 8,
onefile = FALSE,
family = "sans"
)
}plot_weather_summary_comparison(weather_summary, sites, sites_latitude, weather)
dev.off()
svg
2
::include_graphics(plot_name) knitr