geoschem · yantosca · Apr 10, 2025 · Apr 10, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,11 @@ All notable changes to GCPy will be documented in this file.
 
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [Unreleased] - TBD
+### Added
+- Added `create_benchmark_sanity_check_table` routine to `gcpy/benchmark/benchmark_funcs.py` to test if variables are all zero
+or NaN
+
 ## [1.6.1] - 2025-03-24
 ### Added
 - Added GCPy environment file `docs/environment_files/gcpy_environment_py313.yml`, which is based on Python 3.13

diff --git a/gcpy/benchmark/cloud/template.1hr_benchmark.yml b/gcpy/benchmark/cloud/template.1hr_benchmark.yml
@@ -127,6 +127,7 @@ options:
     ste_table: True # GCC only
     timing_table: True
     summary_table: True
+    sanity_check_table: True
     plot_options:
       by_spc_cat: True
       by_hco_cat: True

diff --git a/gcpy/benchmark/cloud/template.1mo_benchmark.yml b/gcpy/benchmark/cloud/template.1mo_benchmark.yml
@@ -127,6 +127,7 @@ options:
     ste_table: True # GCC only
     timing_table: True
     summary_table: True
+    sanity_check_table: True
     plot_options:
       by_spc_cat: True
       by_hco_cat: True

diff --git a/gcpy/benchmark/config/1mo_benchmark.yml b/gcpy/benchmark/config/1mo_benchmark.yml
@@ -127,6 +127,7 @@ options:
     ste_table: True # GCC only
     timing_table: True
     summary_table: True
+    sanity_check_table: True
     plot_options:
       by_spc_cat: True
       by_hco_cat: True

diff --git a/gcpy/benchmark/modules/benchmark_funcs.py b/gcpy/benchmark/modules/benchmark_funcs.py
@@ -18,7 +18,8 @@
 from gcpy.grid import get_troposphere_mask
 from gcpy.util import replace_whitespace
 from gcpy.units import convert_units
-from gcpy.constants import COL_WIDTH, MW_AIR_g, skip_these_vars, TABLE_WIDTH
+from gcpy.constants import \
+    COL_WIDTH, ENCODING, MW_AIR_g, skip_these_vars, TABLE_WIDTH
 from gcpy.plot.compare_single_level import compare_single_level
 from gcpy.plot.compare_zonal_mean import compare_zonal_mean
 from gcpy.benchmark.modules.benchmark_utils import \
@@ -5358,3 +5359,135 @@ def diff_of_diffs_toprow_title(config, model):
         )
 
     return title
+
+
+def create_benchmark_sanity_check_table(
+        devpath,
+        devstr,
+        devdate,
+        collections,
+        dst="./benchmark",
+        is_gchp=False,
+        overwrite=False,
+        outfilename="Diagnostic_Sanity_Check.txt",
+        verbose=False,
+):
+    """
+    Creates a diagnostic sanity check table that shows which diagnostic
+    variables are zero or NaN everywhere.  This can help to identify
+    bugs in diagnostic output.
+
+    Args:
+        devpath: str
+            Path to the data set to be compared (aka "Dev").
+        devstr: str
+            A string that can be used to identify the data set specified
+            by devfile (e.g. a model version number or other identifier).
+        devdate: np.datetime64
+            Date/time stamp used by the "Dev" data files.
+        collections: list of strings
+            List of diagnostic collections to examine.
+
+    Keyword Args (optional):
+        dst: str
+            A string denoting the destination folder where the file
+            containing emissions totals will be written.
+            Default value: "./benchmark"
+        is_gchp : bool
+           Set this flag to true to denote if the data is from GCHP.
+        overwrite: bool
+            Set this flag to True to overwrite files in the
+            destination folder (specified by the dst argument).
+            Default value: False
+        outfilename: str
+            Name of the text file which will contain the table of
+            emissions totals.
+            Default value: "Summary.txt"
+        verbose: bool
+            Set this switch to True if you wish to print out extra
+            informational messages.
+            Default value: False
+        spcdb_dir: str
+            Directory of species_datbase.yml file
+            Default value: Directory of GCPy code repository
+
+    Remarks:
+        This method is mainly intended for model benchmarking purposes,
+        rather than as a general-purpose tool.
+    """
+
+    # ==================================================================
+    # Initial preparations
+    # ==================================================================
+
+    # Replace whitespace in the ref and dev labels
+    devstr = replace_whitespace(devstr)
+
+    # Create the directory for output (if necessary)
+    util.make_directory(dst, overwrite)
+    outfilename = os.path.join(dst, outfilename)
+
+    # Pick the proper function to read the data
+    reader = util.dataset_reader(
+        multi_files=False,
+        verbose=verbose
+    )
+
+    # Variables to skip
+    skip_vars = skip_these_vars.append("AREA")
+
+    # ==================================================================
+    # Open output file and write header
+    # ==================================================================
+    with open(outfilename, "w", encoding=ENCODING) as ofile:
+
+        # Title strings
+        title1 = "### Benchmark diagnostic sanity check table"
+        title2 = f"### Dev = {devstr}"
+
+        # Print header to file
+        print("#" * 80, file=ofile)
+        print(f"{title1 : <77}{'###'}", file=ofile)
+        print(f"{'###'  : <77}{'###'}", file=ofile)
+        print(f"{title2 : <77}{'###'}", file=ofile)
+        print("#" * 80, file=ofile)
+
+        # ==============================================================
+        # Loop over diagnostic collections and scan files
+        # ==============================================================
+        for col in collections:
+
+            # Read data into an xr.DataSet object
+            file_name = util.get_filepath(
+                devpath,
+                col,
+                devdate,
+                is_gchp=is_gchp,
+            )
+            dset = reader(
+                file_name,
+                drop_variables=skip_vars
+            ).load()
+
+            # Determine which variables are all zeroes or NaN
+            all_zeros_or_nans = []
+            for var in dset.data_vars:
+                data = dset[var].values
+                if np.all(data == 0) or np.all(data == np.nan):
+                    all_zeros_or_nans.append(var)
+
+            # ===========================================================
+            # Print results for each collection
+            # ===========================================================
+            print("", file=ofile)
+            print("="*80, file=ofile)
+            print(f"{os.path.basename(file_name)}", file=ofile)
+            print("="*80, file=ofile)
+            print("", file=ofile)
+
+            if len(all_zeros_or_nans) == 0:
+                print("No variables were all zero or all NaN", file=ofile)
+            else:
+                print("These variables were all zero or all NaN:", file=ofile)
+                for var in all_zeros_or_nans:
+                    print(f"   {var}", file=ofile)
diff --git a/gcpy/benchmark/run_benchmark.py b/gcpy/benchmark/run_benchmark.py
@@ -56,7 +56,8 @@
     make_benchmark_emis_tables, make_benchmark_jvalue_plots, \
     make_benchmark_aod_plots, make_benchmark_mass_tables, \
     make_benchmark_mass_accumulation_tables, \
-    make_benchmark_operations_budget, create_benchmark_summary_table
+    make_benchmark_operations_budget, create_benchmark_summary_table, \
+    create_benchmark_sanity_check_table
 from gcpy.benchmark.modules.ste_flux import make_benchmark_ste_table
 from gcpy.benchmark.modules.oh_metrics import make_benchmark_oh_metrics
 from gcpy.benchmark.modules.run_1yr_fullchem_benchmark \
@@ -685,6 +686,34 @@ def run_benchmark_default(config):
                 verbose=False,
             )
 
+        # ==================================================================
+        # GCC vs. GCC diagnostic sanity check table
+        # ==================================================================
+        if config["options"]["outputs"]["sanity_check_table"]:
+            print("\n%%% Creating GCC vs. GCC sanity_check table %%%")
+
+            # Print summary of which collections are identical
+            # between Ref & Dev, and which are not identical.
+            create_benchmark_sanity_check_table(
+                gcc_vs_gcc_devdir,
+                config["data"]["dev"]["gcc"]["version"],
+                gcc_dev_date,
+                collections = [
+                    'AerosolMass',
+                    'Aerosols',
+                    'DryDep',
+                    'Emissions',
+                    'JValues',
+                    'Metrics',
+                    'SpeciesConc',
+                    'StateMet'
+                ],
+                dst=gcc_vs_gcc_tablesdir,
+                outfilename="Diagnostic_Sanity_Check.txt",
+                overwrite=True,
+                verbose=False,
+            )
+
 
     # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     # Create GCHP vs GCC benchmark plots and tables
@@ -1113,7 +1142,7 @@ def run_benchmark_default(config):
                 verbose=False,
                 dev_gchp=True
             )
-
+            
     # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     # Create GCHP vs GCHP benchmark plots and tables
     # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -1623,6 +1652,35 @@ def run_benchmark_default(config):
                 dev_gchp=True,
             )
 
+        # ==================================================================
+        # GCHP vs. GCHP diagnostic sanity check table
+        # ==================================================================
+        if config["options"]["outputs"]["sanity_check_table"]:
+            print("\n%%% Creating GCHP vs. GCHP sanity check table %%%")
+
+            # Print summary of which collections are identical
+            # between Ref & Dev, and which are not identical.
+            create_benchmark_sanity_check_table(
+                gchp_vs_gchp_devdir,
+                config["data"]["dev"]["gchp"]["version"],
+                gchp_dev_date,
+                collections=[
+                    'AerosolMass',
+                    'Aerosols',
+                    'DryDep',
+                    'Emissions',
+                    'JValues',
+                    'Metrics',
+                    'SpeciesConc',
+                    'StateMet',
+                ],
+                dst=gchp_vs_gchp_tablesdir,
+                is_gchp=True,
+                outfilename="Diagnostic_Sanity_Check.txt",
+                overwrite=True,
+                verbose=False,
+            )
+
     # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     # Create GCHP vs GCC difference of differences benchmark plots
     # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%