+
Skip to content

Add benchmark diagnostic sanity check table #356

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@ All notable changes to GCPy will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased] - TBD
### Added
- Added `create_benchmark_sanity_check_table` routine to `gcpy/benchmark/benchmark_funcs.py` to test if variables are all zero
or NaN

## [1.6.1] - 2025-03-24
### Added
- Added GCPy environment file `docs/environment_files/gcpy_environment_py313.yml`, which is based on Python 3.13
Expand Down
1 change: 1 addition & 0 deletions gcpy/benchmark/cloud/template.1hr_benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ options:
ste_table: True # GCC only
timing_table: True
summary_table: True
sanity_check_table: True
plot_options:
by_spc_cat: True
by_hco_cat: True
Expand Down
1 change: 1 addition & 0 deletions gcpy/benchmark/cloud/template.1mo_benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ options:
ste_table: True # GCC only
timing_table: True
summary_table: True
sanity_check_table: True
plot_options:
by_spc_cat: True
by_hco_cat: True
Expand Down
1 change: 1 addition & 0 deletions gcpy/benchmark/config/1mo_benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ options:
ste_table: True # GCC only
timing_table: True
summary_table: True
sanity_check_table: True
plot_options:
by_spc_cat: True
by_hco_cat: True
Expand Down
135 changes: 134 additions & 1 deletion gcpy/benchmark/modules/benchmark_funcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
from gcpy.grid import get_troposphere_mask
from gcpy.util import replace_whitespace
from gcpy.units import convert_units
from gcpy.constants import COL_WIDTH, MW_AIR_g, skip_these_vars, TABLE_WIDTH
from gcpy.constants import \
COL_WIDTH, ENCODING, MW_AIR_g, skip_these_vars, TABLE_WIDTH
from gcpy.plot.compare_single_level import compare_single_level
from gcpy.plot.compare_zonal_mean import compare_zonal_mean
from gcpy.benchmark.modules.benchmark_utils import \
Expand Down Expand Up @@ -5358,3 +5359,135 @@ def diff_of_diffs_toprow_title(config, model):
)

return title


def create_benchmark_sanity_check_table(
devpath,
devstr,
devdate,
collections,
dst="./benchmark",
is_gchp=False,
overwrite=False,
outfilename="Diagnostic_Sanity_Check.txt",
verbose=False,
):
"""
Creates a diagnostic sanity check table that shows which diagnostic
variables are zero or NaN everywhere. This can help to identify
bugs in diagnostic output.

Args:
devpath: str
Path to the data set to be compared (aka "Dev").
devstr: str
A string that can be used to identify the data set specified
by devfile (e.g. a model version number or other identifier).
devdate: np.datetime64
Date/time stamp used by the "Dev" data files.
collections: list of strings
List of diagnostic collections to examine.

Keyword Args (optional):
dst: str
A string denoting the destination folder where the file
containing emissions totals will be written.
Default value: "./benchmark"
is_gchp : bool
Set this flag to true to denote if the data is from GCHP.
overwrite: bool
Set this flag to True to overwrite files in the
destination folder (specified by the dst argument).
Default value: False
outfilename: str
Name of the text file which will contain the table of
emissions totals.
Default value: "Summary.txt"
verbose: bool
Set this switch to True if you wish to print out extra
informational messages.
Default value: False
spcdb_dir: str
Directory of species_datbase.yml file
Default value: Directory of GCPy code repository

Remarks:
This method is mainly intended for model benchmarking purposes,
rather than as a general-purpose tool.
"""

# ==================================================================
# Initial preparations
# ==================================================================

# Replace whitespace in the ref and dev labels
devstr = replace_whitespace(devstr)

# Create the directory for output (if necessary)
util.make_directory(dst, overwrite)
outfilename = os.path.join(dst, outfilename)

# Pick the proper function to read the data
reader = util.dataset_reader(
multi_files=False,
verbose=verbose
)

# Variables to skip
skip_vars = skip_these_vars.append("AREA")

# ==================================================================
# Open output file and write header
# ==================================================================
with open(outfilename, "w", encoding=ENCODING) as ofile:

# Title strings
title1 = "### Benchmark diagnostic sanity check table"
title2 = f"### Dev = {devstr}"

# Print header to file
print("#" * 80, file=ofile)
print(f"{title1 : <77}{'###'}", file=ofile)
print(f"{'###' : <77}{'###'}", file=ofile)
print(f"{title2 : <77}{'###'}", file=ofile)
print("#" * 80, file=ofile)

# ==============================================================
# Loop over diagnostic collections and scan files
# ==============================================================
for col in collections:

# Read data into an xr.DataSet object
file_name = util.get_filepath(
devpath,
col,
devdate,
is_gchp=is_gchp,
)
dset = reader(
file_name,
drop_variables=skip_vars
).load()

# Determine which variables are all zeroes or NaN
all_zeros_or_nans = []
for var in dset.data_vars:
data = dset[var].values
if np.all(data == 0) or np.all(data == np.nan):
all_zeros_or_nans.append(var)

# ===========================================================
# Print results for each collection
# ===========================================================
print("", file=ofile)
print("="*80, file=ofile)
print(f"{os.path.basename(file_name)}", file=ofile)
print("="*80, file=ofile)
print("", file=ofile)

if len(all_zeros_or_nans) == 0:
print("No variables were all zero or all NaN", file=ofile)
else:
print("These variables were all zero or all NaN:", file=ofile)
for var in all_zeros_or_nans:
print(f" {var}", file=ofile)
62 changes: 60 additions & 2 deletions gcpy/benchmark/run_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@
make_benchmark_emis_tables, make_benchmark_jvalue_plots, \
make_benchmark_aod_plots, make_benchmark_mass_tables, \
make_benchmark_mass_accumulation_tables, \
make_benchmark_operations_budget, create_benchmark_summary_table
make_benchmark_operations_budget, create_benchmark_summary_table, \
create_benchmark_sanity_check_table
from gcpy.benchmark.modules.ste_flux import make_benchmark_ste_table
from gcpy.benchmark.modules.oh_metrics import make_benchmark_oh_metrics
from gcpy.benchmark.modules.run_1yr_fullchem_benchmark \
Expand Down Expand Up @@ -685,6 +686,34 @@ def run_benchmark_default(config):
verbose=False,
)

# ==================================================================
# GCC vs. GCC diagnostic sanity check table
# ==================================================================
if config["options"]["outputs"]["sanity_check_table"]:
print("\n%%% Creating GCC vs. GCC sanity_check table %%%")

# Print summary of which collections are identical
# between Ref & Dev, and which are not identical.
create_benchmark_sanity_check_table(
gcc_vs_gcc_devdir,
config["data"]["dev"]["gcc"]["version"],
gcc_dev_date,
collections = [
'AerosolMass',
'Aerosols',
'DryDep',
'Emissions',
'JValues',
'Metrics',
'SpeciesConc',
'StateMet'
],
dst=gcc_vs_gcc_tablesdir,
outfilename="Diagnostic_Sanity_Check.txt",
overwrite=True,
verbose=False,
)


# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Create GCHP vs GCC benchmark plots and tables
Expand Down Expand Up @@ -1113,7 +1142,7 @@ def run_benchmark_default(config):
verbose=False,
dev_gchp=True
)

# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Create GCHP vs GCHP benchmark plots and tables
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Expand Down Expand Up @@ -1623,6 +1652,35 @@ def run_benchmark_default(config):
dev_gchp=True,
)

# ==================================================================
# GCHP vs. GCHP diagnostic sanity check table
# ==================================================================
if config["options"]["outputs"]["sanity_check_table"]:
print("\n%%% Creating GCHP vs. GCHP sanity check table %%%")

# Print summary of which collections are identical
# between Ref & Dev, and which are not identical.
create_benchmark_sanity_check_table(
gchp_vs_gchp_devdir,
config["data"]["dev"]["gchp"]["version"],
gchp_dev_date,
collections=[
'AerosolMass',
'Aerosols',
'DryDep',
'Emissions',
'JValues',
'Metrics',
'SpeciesConc',
'StateMet',
],
dst=gchp_vs_gchp_tablesdir,
is_gchp=True,
outfilename="Diagnostic_Sanity_Check.txt",
overwrite=True,
verbose=False,
)

# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Create GCHP vs GCC difference of differences benchmark plots
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Expand Down
点击 这是indexloc提供的php浏览器服务,不要输入任何密码和下载