From 87b093def6b4fb483c55cc8498c8a4a8e8002f90 Mon Sep 17 00:00:00 2001
From: Melissa Sulprizio <mpayer@seas.harvard.edu>
Date: Fri, 21 Oct 2022 14:32:15 -0400
Subject: [PATCH 01/54] Add "CH4Benchmark" as a benchmark option type

Also pass bmk_type to additional routines where needed.

Signed-off-by: Melissa Sulprizio <mpayer@seas.harvard.edu>
---
 benchmark/modules/run_1yr_fullchem_benchmark.py | 2 ++
 benchmark/run_benchmark.py                      | 9 +++++++--
 gcpy/benchmark.py                               | 7 ++++++-
 gcpy/benchmark_categories.yml                   | 4 ++++
 gcpy/emission_species.yml                       | 1 +
 5 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/benchmark/modules/run_1yr_fullchem_benchmark.py b/benchmark/modules/run_1yr_fullchem_benchmark.py
index c3eaf3e7..0db5d579 100755
--- a/benchmark/modules/run_1yr_fullchem_benchmark.py
+++ b/benchmark/modules/run_1yr_fullchem_benchmark.py
@@ -477,6 +477,7 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev):
                     "plot_options"]["by_spc_cat"],
                 plot_by_hco_cat=config["options"]["outputs"][
                     "plot_options"]["by_hco_cat"],
+                benchmark_type=bmk_type,
                 overwrite=True,
                 spcdb_dir=spcdb_dir,
             )
@@ -501,6 +502,7 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev):
                         "plot_options"]["by_spc_cat"],
                     plot_by_hco_cat=config["options"]["outputs"][
                         "plot_options"]["by_hco_cat"],
+                    benchmark_type=bmk_type,
                     overwrite=True,
                     spcdb_dir=spcdb_dir,
                 )
diff --git a/benchmark/run_benchmark.py b/benchmark/run_benchmark.py
index 228256c6..82fbb741 100755
--- a/benchmark/run_benchmark.py
+++ b/benchmark/run_benchmark.py
@@ -77,10 +77,12 @@ def choose_benchmark_type(config):
     if not (
         config["options"]["bmk_type"] == "FullChemBenchmark"
         or config["options"]["bmk_type"] == "TransportTracersBenchmark"
+        or config["options"]["bmk_type"] == "CH4Benchmark"
     ):
         print(
             f"Error: invalid benchmark type {config['options']['bmk_type']}. "
-            + "Please enter FullChemBenchmark or TransportTracersBenchmark."
+            + "Please enter FullChemBenchmark, TransportTracersBenchmark, or "
+            + "CH4Benchmark."
         )
         sys.exit()
 
@@ -88,7 +90,10 @@ def choose_benchmark_type(config):
     end = np.datetime64(config["data"]["ref"]["gcc"]["bmk_end"])
     # determine benchmark type and run relevant script
     if is_full_year(start, end):
-        if config["options"]["bmk_type"] == "FullChemBenchmark":
+        if (
+            config["options"]["bmk_type"] == "FullChemBenchmark"
+            or config["options"]["bmk_type"] == "CH4Benchmark"
+        ):
             run_1yr_benchmark(
                 config,
                 str(start.astype(datetime).year),
diff --git a/gcpy/benchmark.py b/gcpy/benchmark.py
index bea5b916..19f6f1ed 100644
--- a/gcpy/benchmark.py
+++ b/gcpy/benchmark.py
@@ -1222,6 +1222,7 @@ def make_benchmark_emis_plots(
         subdst=None,
         plot_by_spc_cat=False,
         plot_by_hco_cat=False,
+        benchmark_type="FullChemBenchmark",
         cmpres=None,
         overwrite=False,
         verbose=False,
@@ -1274,6 +1275,10 @@ def make_benchmark_emis_plots(
             according to HEMCO emissions categories (e.g. Anthro,
             Aircraft, Bioburn, etc.)
             Default value: False
+        benchmark_type: str
+            A string denoting the type of benchmark output to plot,
+            either FullChemBenchmark or TransportTracersBenchmark.
+            Default value: "FullChemBenchmark"
         cmpres: string
             Grid resolution at which to compare ref and dev data, e.g. '1x1.25'
         overwrite: bool
@@ -1529,7 +1534,7 @@ def createfile_hco_cat(c):
     # ==================================================================
     if plot_by_spc_cat:
 
-        catdict = util.get_species_categories()
+        catdict = util.get_species_categories(benchmark_type)
         # in case any emissions are skipped (for use in nested pdf bookmarks)
         warninglist = ([])
         # for checking if emissions species not defined in benchmark category
diff --git a/gcpy/benchmark_categories.yml b/gcpy/benchmark_categories.yml
index d12cb500..70626853 100644
--- a/gcpy/benchmark_categories.yml
+++ b/gcpy/benchmark_categories.yml
@@ -252,3 +252,7 @@ TransportTracersBenchmark:
       - Be7Strat
       - Be10
       - Be10Strat
+CH4Benchmark:
+  CH4:
+    CH4:
+      - CH4
diff --git a/gcpy/emission_species.yml b/gcpy/emission_species.yml
index 5fe0a303..cef7e411 100644
--- a/gcpy/emission_species.yml
+++ b/gcpy/emission_species.yml
@@ -8,6 +8,7 @@ C2H6: Tg
 C3H8: Tg
 CH2Br2: Tg
 CH2O: Tg
+CH4: Tg
 CHBr3: Tg
 CO: Tg
 DMS: Tg

From 3b69d1670a98c6289b7dff6208ef1d8704a23e47 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Mon, 5 Dec 2022 11:00:08 -0500
Subject: [PATCH 02/54] Update "regrid_restart_ll_to_cs.py" example

examples/working_with_files/regrid_restart_ll_to_cs.py
- Import numpy
- Update statements that replaces restart file names
- Add coordinate arrays lon, lat, lev (which are not added by default)
- Trimmed trailing whitespace

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 .../working_with_files/regrid_restart_ll_to_cs.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)
 mode change 100644 => 100755 examples/working_with_files/regrid_restart_ll_to_cs.py

diff --git a/examples/working_with_files/regrid_restart_ll_to_cs.py b/examples/working_with_files/regrid_restart_ll_to_cs.py
old mode 100644
new mode 100755
index 3250ce60..7fd1f542
--- a/examples/working_with_files/regrid_restart_ll_to_cs.py
+++ b/examples/working_with_files/regrid_restart_ll_to_cs.py
@@ -5,6 +5,7 @@
 
 # Imports
 from os.path import join
+import numpy as np
 import xarray as xr
 import sparselt.esmf
 import sparselt.xr
@@ -39,7 +40,8 @@
             old_to_new_names = {}
             for v in ds_in.data_vars.keys():
                 if "SpeciesRst_" in v:
-                    old_to_new_names[v] = v.replace("SpeciesRst_", "SPC_")
+                    new_name = v.replace("SpeciesRst_", "SPC_")
+                    old_to_new_names[v] = new_name
             ds_in = ds_in.rename(old_to_new_names)
 
             # Loop over cubed-sphere grids
@@ -65,8 +67,17 @@
                 # Regrid to cubed-sphere
                 ds_out = sparselt.xr.apply(transform, ds_in)
 
+                # Redefine coordinate arrays to be consistent
+                # with GCHP restart file expectations
+                coords_dict = {
+                    "lon": np.arange(1, cs_res+1, dtype=np.float64),
+                    "lat": np.arange(1, 6*cs_res+1, dtype=np.float64),
+                    "lev": np.arange(1, 73, dtype=np.float64),
+                }
+                ds_out = ds_out.assign_coords(coords_dict)
+
                 # Write to output resolution
-                outfile = f"GEOSChem.Restart.{sim}.2019{mm}01_0000z.{cs}.nc4"
+                outfile = f"GEOSChem.Restart.{sim}.2015{mm}01_0000z.{cs}.nc4"
                 print(f"Writing {outfile}")
                 ds_out.to_netcdf(outfile)
 

From d3194fff25740f97dfb156e76c7da097c5c0eab2 Mon Sep 17 00:00:00 2001
From: Melissa Sulprizio <mpayer@seas.harvard.edu>
Date: Tue, 20 Dec 2022 12:51:07 -0500
Subject: [PATCH 03/54] Add new benchmark type 'CH4Benchmark' for evaluating
 CH4 simulations

Benchmark plots and tables for CH4 simulations can now be generated by
passing benchmark_type='CH4Benchmark'. This option currently utilizes
the existing routines for the FullChemBenchmark option. We may want to
consider removing "FullChem" and making those routines more generic in
the future.

For now, just the concentration plots, emission plots, emission tables,
mass tables, and OH metrics work for the CH4Benchmark option.

Signed-off-by: Melissa Sulprizio <mpayer@seas.harvard.edu>
---
 benchmark/1yr_ch4_benchmark.yml               | 115 ++++++++++++++++++
 .../modules/run_1yr_fullchem_benchmark.py     |   1 +
 benchmark/run_benchmark.py                    |   2 +-
 gcpy/benchmark.py                             |  29 +++--
 gcpy/emission_inventories.yml                 |  57 +++++----
 gcpy/emission_species.yml                     | 102 ++++++++--------
 6 files changed, 218 insertions(+), 88 deletions(-)
 create mode 100644 benchmark/1yr_ch4_benchmark.yml

diff --git a/benchmark/1yr_ch4_benchmark.yml b/benchmark/1yr_ch4_benchmark.yml
new file mode 100644
index 00000000..a83ba5e2
--- /dev/null
+++ b/benchmark/1yr_ch4_benchmark.yml
@@ -0,0 +1,115 @@
+&---
+# =====================================================================
+# Benchmark configuration file (**EDIT AS NEEDED**)
+# customize in the following manner:
+# (1) Edit the path variables so that they point to folders w/ model data
+# (2) Edit the version strings for each benchmark simulation
+# (3) Edit the switches that turn on/off creating of plots and tables
+# (4) If necessary, edit labels for the dev and ref versions
+# Note: When doing GCHP vs GCC comparisions gchp_dev will be compared
+# to gcc_dev (not gcc_ref!). This ensures consistency in version names
+# when doing GCHP vs GCC diff-of-diffs (mps, 6/27/19)
+# =====================================================================
+#
+# Configuration for 1yr CH4Benchmark
+#
+# paths:
+#   main_dir:    High-level directory containing ref & dev rundirs
+#   results_dir: Directory where plots/tables will be created
+#   weights_dir: Path to regridding weights
+#   spcdb_dir:   Folder in which the species_database.yml file is
+#                located.  If set to "default", then will look for
+#                species_database.yml in one of the Dev rundirs.
+#
+paths:
+  main_dir: /n/holyscratch01/external_repos/GEOS-CHEM/gcgrid/geos-chem/validation/gcpy_test_data/1yr_fullchem
+  results_dir: /path/to/BenchmarkResults
+  weights_dir: /n/holyscratch01/external_repos/GEOS-CHEM/gcgrid/data/ExtData/GCHP/RegriddingWeights
+  spcdb_dir: default
+#
+# data: Contains configurations for ref and dev runs
+#   version:         Version string (must not contain spaces)
+#   dir:             Path to run directory
+#   outputs_subdir:  Subdirectory w/ GEOS-Chem diagnostic files
+#   restarts_subdir: Subdirectory w/ GEOS-Chem restarts
+#   bmk_start:       Simulation start date (YYYY-MM-DDThh:mm:ss)
+#   bmk_end:         Simulation end date (YYYY-MM-DDThh:mm:ss)
+#   resolution:      GCHP resolution string
+#
+data:
+  ref:
+    gcc:
+      version: GCC_ref
+      dir: GCC_ref
+      outputs_subdir: OutputDir
+      restarts_subdir: Restarts
+      bmk_start: "2019-01-01T00:00:00"
+      bmk_end: "2020-01-01T00:00:00"
+    gchp:
+      version: GCHP_ref
+      dir: GCHP_ref
+      outputs_subdir: OutputDir
+      restarts_subdir: Restarts
+      bmk_start: "2019-01-01T00:00:00"
+      bmk_end: "2020-01-01T00:00:00"
+      is_pre_13.1: False               # for gcpy_test_data, edit if needed
+      is_pre_14.0: False               # for gcpy_test_data, edit if needed
+      resolution: c24                  # for gcpy_test_data, edit if needed
+  dev:
+    gcc:
+      version: GCC_dev
+      dir: GCC_dev
+      outputs_subdir: OutputDir
+      restarts_subdir: Restarts
+      bmk_start: "2019-01-01T00:00:00"
+      bmk_end: "2020-01-01T00:00:00"
+    gchp:
+      version: GCHP_dev
+      dir: GCHP_dev
+      outputs_subdir: OutputDir
+      restarts_subdir: Restarts
+      bmk_start: "2019-01-01T00:00:00"
+      bmk_end: "2020-01-01T00:00:00"
+      is_pre_13.1: False              # for gcpy_test_data, edit if needed
+      is_pre_14.0: False              # for gcpy_test_data, edit if needed
+      resolution: c24                 # for gcpy_test_data, edit if needed   
+#
+# options: Specify the types of comparisons to perform
+# 
+options:
+  bmk_type: CH4Benchmark
+  gcpy_test: False # Specify if this is a gcpy test validation run
+  comparisons:
+    gcc_vs_gcc: 
+      run: True # True to run this comparison
+      dir: GCC_version_comparison
+      tables_subdir: Tables
+    gchp_vs_gcc: 
+      run: False
+      dir: GCHP_GCC_comparison 
+      tables_subdir: Tables
+    gchp_vs_gchp: 
+      run: False
+      dir: GCHP_version_comparison
+      tables_subdir: Tables
+    gchp_vs_gcc_diff_of_diffs:
+      run: False
+      dir: GCHP_GCC_diff_of_diffs
+#
+# outputs: Types of output to generate (plots/tables)
+#
+  outputs:
+    plot_conc: True
+    plot_emis: True
+    emis_table: True
+    plot_jvalues: False
+    plot_aod: False
+    mass_table: True
+    ops_budget_table: False
+    aer_budget_table: False
+    Ox_budget_table: False
+    ste_table: False
+    OH_metrics: True
+    plot_options:
+      by_spc_cat: True
+      by_hco_cat: True
diff --git a/benchmark/modules/run_1yr_fullchem_benchmark.py b/benchmark/modules/run_1yr_fullchem_benchmark.py
index 0db5d579..47e7be05 100755
--- a/benchmark/modules/run_1yr_fullchem_benchmark.py
+++ b/benchmark/modules/run_1yr_fullchem_benchmark.py
@@ -532,6 +532,7 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev):
                 dev,
                 gcc_vs_gcc_devstr,
                 dst=gcc_vs_gcc_resultsdir,
+                benchmark_type=bmk_type,
                 ref_interval=sec_per_month_ref,
                 dev_interval=sec_per_month_dev,
                 overwrite=True,
diff --git a/benchmark/run_benchmark.py b/benchmark/run_benchmark.py
index 82fbb741..09049a24 100755
--- a/benchmark/run_benchmark.py
+++ b/benchmark/run_benchmark.py
@@ -527,7 +527,7 @@ def run_benchmark_default(config):
         # GCC vs. GCC tables of emission and inventory totals
         # ==================================================================
         if config["options"]["outputs"]["emis_table"]:
-            print("\n%%% Creating GCC vs. GCC emissions/inventory tables %%%")
+            print("\n%%% Creating GCC vs. GCC emissions & inventory tables %%%")
 
             # Filepaths
             ref = get_filepath(gcc_vs_gcc_refdir, "Emissions", gcc_ref_date)
diff --git a/gcpy/benchmark.py b/gcpy/benchmark.py
index 19f6f1ed..c6f68386 100644
--- a/gcpy/benchmark.py
+++ b/gcpy/benchmark.py
@@ -30,7 +30,6 @@
 
 # YAML files
 aod_spc = "aod_species.yml"
-spc_categories = "benchmark_categories.yml"
 emission_spc = "emission_species.yml"
 emission_inv = "emission_inventories.yml"
 
@@ -616,8 +615,8 @@ def make_benchmark_conc_plots(
             Name of collection to use for plotting.
             Default value: "SpeciesConc"
         benchmark_type: str
-            A string denoting the type of benchmark output to plot,
-            either FullChemBenchmark or TransportTracersBenchmark.
+            A string denoting the type of benchmark output to plot, options are
+            FullChemBenchmark, TransportTracersBenchmark, or CH4Benchmark.
             Default value: "FullChemBenchmark"
         cmpres: string
             Grid resolution at which to compare ref and dev data, e.g. '1x1.25'
@@ -897,7 +896,7 @@ def make_benchmark_conc_plots(
     # aerosol categories: Aerosols and Secondary Organic Aerosols.
     # ==================================================================
 
-    # FullChemBenchmark has lumped species (TransportTracers does not)
+    # FullChemBenchmark has lumped species (TransportTracers, CH4 do not)
     if "FullChem" in benchmark_type:
         print("\nComputing lumped species for full chemistry benchmark")
         print("-->Adding lumped species to ref dataset")
@@ -1276,8 +1275,8 @@ def make_benchmark_emis_plots(
             Aircraft, Bioburn, etc.)
             Default value: False
         benchmark_type: str
-            A string denoting the type of benchmark output to plot,
-            either FullChemBenchmark or TransportTracersBenchmark.
+            A string denoting the type of benchmark output to plot, options are
+            FullChemBenchmark, TransportTracersBenchmark, or CH4Benchmark.
             Default value: "FullChemBenchmark"
         cmpres: string
             Grid resolution at which to compare ref and dev data, e.g. '1x1.25'
@@ -1635,6 +1634,7 @@ def make_benchmark_emis_tables(
         devlist,
         devstr,
         dst="./benchmark",
+        benchmark_type="FullChemBenchmark",
         refmet=None,
         devmet=None,
         overwrite=False,
@@ -1665,6 +1665,10 @@ def make_benchmark_emis_tables(
             A string denoting the destination folder where the file
             containing emissions totals will be written.
             Default value: ./benchmark
+        benchmark_type: str
+            A string denoting the type of benchmark output to plot, options are
+            FullChemBenchmark, TransportTracersBenchmark or CH4Benchmark.
+            Default value: "FullChemBenchmark"
         refmet: str
             Path name for ref meteorology
             Default value: None
@@ -1752,14 +1756,16 @@ def make_benchmark_emis_tables(
     # ==================================================================
 
     # Emissions species dictionary
-    species = yaml.load(
+    spc_dict = yaml.load(
         open(os.path.join(os.path.dirname(__file__), emission_spc)),
         Loader=yaml.FullLoader
     )
-    inventories = yaml.load(
+    species=spc_dict[benchmark_type]
+    inv_dict = yaml.load(
         open(os.path.join(os.path.dirname(__file__), emission_inv)),
         Loader=yaml.FullLoader
     )
+    inventories=inv_dict[benchmark_type]
 
     # Destination files
     file_emis_totals = os.path.join(emisdir, "Emission_totals.txt")
@@ -3063,8 +3069,8 @@ def make_benchmark_wetdep_plots(
             for writing plots
             Default value: None
         benchmark_type: str
-            A string denoting the type of benchmark output to plot,
-            either FullChemBenchmark or TransportTracersBenchmark.
+            A string denoting the type of benchmark output to plot, options are
+            FullChemBenchmark, TransportTracersBenchmark, or CH4Benchmark.
             Default value: "FullChemBenchmark"
         overwrite: bool
             Set this flag to True to overwrite files in the
@@ -3620,7 +3626,8 @@ def make_benchmark_operations_budget(
 
     Keyword Args (optional):
         benchmark_type: str
-            "TransportTracersBenchmark" or "FullChemBenchmark".
+            A string denoting the type of benchmark output to plot, options are
+            FullChemBenchmark, TransportTracersBenchmark, or CH4Benchmark.
             Default value: None
         label: str
             Contains the date or date range for each dataframe title.
diff --git a/gcpy/emission_inventories.yml b/gcpy/emission_inventories.yml
index 757720f4..075d6488 100644
--- a/gcpy/emission_inventories.yml
+++ b/gcpy/emission_inventories.yml
@@ -1,26 +1,31 @@
-AEIC: Tg
-AFCID: Tg
-APEI: Tg
-C2H62010: Tg
-CEDS: Tg
-CEDSship: Tg
-DEAD: Tg
-DICEAfrica: Tg
-GEIAnatural: Tg
-GFED: Tg
-IODINE: Tg
-LIANG: Tg
-LIGHTNOX: Tg
-MEGAN: Tg
-MIX: Tg
-NEI2011: Tg
-ORDONEZ: Tg
-PARANOX: Tg
-PLANTDECAY: Tg
-SEABIRDS: Tg
-SeaFlux: Tg
-SeaSalt: Tg
-SOILNOX: Tg
-VOLCANOdegas: Tg
-VOLCANOerupt: Tg
-XIAO: Tg
+FullChemBenchmark:
+  AEIC: Tg
+  AFCID: Tg
+  APEI: Tg
+  C2H62010: Tg
+  CEDS: Tg
+  CEDSship: Tg
+  DEAD: Tg
+  DICEAfrica: Tg
+  GEIAnatural: Tg
+  GFED: Tg
+  IODINE: Tg
+  LIANG: Tg
+  LIGHTNOX: Tg
+  MEGAN: Tg
+  MIX: Tg
+  NEI2011: Tg
+  ORDONEZ: Tg
+  PARANOX: Tg
+  PLANTDECAY: Tg
+  SEABIRDS: Tg
+  SeaFlux: Tg
+  SeaSalt: Tg
+  SOILNOX: Tg
+  VOLCANOdegas: Tg
+  VOLCANOerupt: Tg
+  XIAO: Tg
+CH4Benchmark:
+  GEPA: Tg
+  Scarpelli_Canada: Tg
+  Scarpelli_Mexico: Tg
diff --git a/gcpy/emission_species.yml b/gcpy/emission_species.yml
index cef7e411..64121960 100644
--- a/gcpy/emission_species.yml
+++ b/gcpy/emission_species.yml
@@ -1,50 +1,52 @@
-ACET: Tg
-ALD2: Tg
-ALK4: Tg
-BCPI: Tg
-BCPO: Tg
-BENZ: Tg
-C2H6: Tg
-C3H8: Tg
-CH2Br2: Tg
-CH2O: Tg
-CH4: Tg
-CHBr3: Tg
-CO: Tg
-DMS: Tg
-DST1: Tg
-DST2: Tg
-DST3: Tg
-DST4: Tg
-EOH: Tg
-ETNO3: Tg
-GLYC: Tg
-GLYX: Tg
-HAC: Tg
-HCOOH: Tg
-HNO2: Tg
-HNO3: Tg
-ISOP: Tg
-LIMO: Tg
-MACR: Tg
-MEK: Tg
-MENO3: Tg
-MGLY: Tg
-MTPA: Tg
-MTPO: Tg
-NH3: Tg
-'NO': Tg
-NO2: Tg
-OCPI: Tg
-OCPO: Tg
-PRPE: Tg
-RCHO: Tg
-SALA: Tg
-SALC: Tg
-SO2: Tg
-SO4: Tg
-SOAP: Tg
-SOAS: Tg
-TOLU: Tg
-XYLE: Tg
-pFe: Tg
+FullChemBenchmark:
+  ACET: Tg
+  ALD2: Tg
+  ALK4: Tg
+  BCPI: Tg
+  BCPO: Tg
+  BENZ: Tg
+  C2H6: Tg
+  C3H8: Tg
+  CH2Br2: Tg
+  CH2O: Tg
+  CHBr3: Tg
+  CO: Tg
+  DMS: Tg
+  DST1: Tg
+  DST2: Tg
+  DST3: Tg
+  DST4: Tg
+  EOH: Tg
+  ETNO3: Tg
+  GLYC: Tg
+  GLYX: Tg
+  HAC: Tg
+  HCOOH: Tg
+  HNO2: Tg
+  HNO3: Tg
+  ISOP: Tg
+  LIMO: Tg
+  MACR: Tg
+  MEK: Tg
+  MENO3: Tg
+  MGLY: Tg
+  MTPA: Tg
+  MTPO: Tg
+  NH3: Tg
+  'NO': Tg
+  NO2: Tg
+  OCPI: Tg
+  OCPO: Tg
+  PRPE: Tg
+  RCHO: Tg
+  SALA: Tg
+  SALC: Tg
+  SO2: Tg
+  SO4: Tg
+  SOAP: Tg
+  SOAS: Tg
+  TOLU: Tg
+  XYLE: Tg
+  pFe: Tg
+CH4Benchmark:
+  CH4: Tg

From f2c01cd15570f81518d77d3abf7c2d5413cc18cd Mon Sep 17 00:00:00 2001
From: Melissa Sulprizio <mpayer@seas.harvard.edu>
Date: Thu, 22 Dec 2022 13:03:28 -0500
Subject: [PATCH 04/54] Update SpeciesConc variable name to SpeciesConcVV for
 consistency with GEOS-Chem 14.1.0 updates

In GEOS-Chem 14.1.0, the SpeciesConc collection may now be saved out with
variable names SpeciesConcVV or SpeciesConcMND. To allow for comparison
with output from earlier versions, we now rename SpeciesConc to SpeciesConcVV
within benchmark.py.

See corresponding GEOS-Chem PR https://github.com/geoschem/geos-chem/pull/1572

Signed-off-by: Melissa Sulprizio <mpayer@seas.harvard.edu>
---
 gcpy/benchmark.py | 47 ++++++++++++++++++++++++++++++++++++-----------
 gcpy/util.py      |  8 ++++----
 2 files changed, 40 insertions(+), 15 deletions(-)

diff --git a/gcpy/benchmark.py b/gcpy/benchmark.py
index b4dc0287..97de737e 100644
--- a/gcpy/benchmark.py
+++ b/gcpy/benchmark.py
@@ -713,20 +713,31 @@ def make_benchmark_conc_plots(
     refds = reader(ref, drop_variables=gcon.skip_these_vars).load()
     devds = reader(dev, drop_variables=gcon.skip_these_vars).load()
 
+    # Rename SpeciesConc_ to SpeciesConcVV_ for consistency with new
+    # naming introduced in GEOS-Chem 14.1.0
+    for v in refds.data_vars.keys():
+        if v.startswith('SpeciesConc_'):
+            spc = v.replace('SpeciesConc_', '')
+            refds = refds.rename({v: 'SpeciesConcVV_' + spc})
+    for v in devds.data_vars.keys():
+        if v.startswith('SpeciesConc_'):
+            spc = v.replace('Species_Conc', '')
+            devds = devds.rename({v: 'SpeciesConcVV_' + spc})
+
+    # -----------------------------------------------------------------
+    # Kludge, rename wrong variable name
+    if "SpeciesConcVV_PFE" in refds.data_vars.keys():
+        refds = refds.rename({"SpeciesConcVV_PFE": "SpeciesConcVV_pFe"})
+    if "SpeciesConcVV_PFE" in devds.data_vars.keys():
+        devds = devds.rename({"SpeciesConcVV_PFE": "SpeciesConcVV_pFe"})
+    # -----------------------------------------------------------------
+
     if verbose:
         print('\nPrinting refds (comparison ref)\n')
         print(refds)
         print('\nPrinting devds (comparison dev)\n')
         print(devds)
 
-    # -----------------------------------------------------------------
-    # Kludge, rename wrong variable name
-    if "SpeciesConc_PFE" in refds.data_vars.keys():
-        refds = refds.rename({"SpeciesConc_PFE": "SpeciesConc_pFe"})
-    if "SpeciesConc_PFE" in devds.data_vars.keys():
-        devds = devds.rename({"SpeciesConc_PFE": "SpeciesConc_pFe"})
-    # -----------------------------------------------------------------
-
     # Open met datasets if passed as arguments
     refmetds = None
     devmetds = None
@@ -835,7 +846,7 @@ def make_benchmark_conc_plots(
     # ==================================================================
     if not plot_by_spc_cat:
         [refds, devds] = util.add_missing_variables(refds, devds)
-        var_prefix = 'SpeciesConc_'
+        var_prefix = 'SpeciesConcVV_'
         varlist = [k for k in refds.data_vars.keys() if var_prefix in k]
         varlist.sort()
 
@@ -900,14 +911,18 @@ def make_benchmark_conc_plots(
     # FullChemBenchmark has lumped species (TransportTracers does not)
     if "FullChem" in benchmark_type:
         print("\nComputing lumped species for full chemistry benchmark")
+
         print("-->Adding lumped species to ref dataset")
         refds = util.add_lumped_species_to_dataset(refds)
+
         print("-->Adding lumped species to dev dataset")
         devds = util.add_lumped_species_to_dataset(devds)
+
         if diff_of_diffs:
             print("-->Adding lumped species to dev datasets")
             second_refds = util.add_lumped_species_to_dataset(second_refds)
             second_devds = util.add_lumped_species_to_dataset(second_devds)
+
         util.archive_lumped_species_definitions(dst)
         print("Lumped species computation complete.\n")
 
@@ -925,7 +940,10 @@ def make_benchmark_conc_plots(
         [devds, second_devds] = util.add_missing_variables(devds, second_devds)
 
     # Collection prefix
-    coll_prefix = collection.strip() + "_"
+    if "SpeciesConc" in collection:
+        coll_prefix = "SpeciesConcVV_"
+    else:
+        coll_prefix = collection.strip() + "_"
 
     # ==================================================================
     # Create the plots!
@@ -3402,6 +3420,13 @@ def make_benchmark_aerosol_tables(
                                    drop_variables=gcon.skip_these_vars)  # ,
         # combine="nested", concat_dim="time")
 
+    # Rename SpeciesConc_ to SpeciesConcVV_ for consistency with new
+    # naming introduced in GEOS-Chem 14.1.0
+    for v in ds_spc.data_vars.keys():
+        if v.startswith('SpeciesConc_'):
+            spc = v.replace('Species_Conc', '')
+            ds_spc = ds_spc.rename({v: 'SpeciesConcVV_' + spc})
+
     # Get troposphere mask
     tropmask = get_troposphere_mask(ds_met)
 
@@ -3501,7 +3526,7 @@ def print_aerosol_metrics(data, species_list, filename, title, label):
 
         # Whole-atmosphere and trop-only quantities [g]
         # NOTE: DryDep is by nature trop-only
-        varname = "SpeciesConc_" + spc
+        varname = "SpeciesConcVV_" + spc
         q[spc + "_f"] = ds_spc[varname].values * vv_to_Tg[spc]
         q[spc + "_t"] = np.ma.masked_array(q[spc + "_f"], tropmask)
 
diff --git a/gcpy/util.py b/gcpy/util.py
index d33bfead..c94e6351 100644
--- a/gcpy/util.py
+++ b/gcpy/util.py
@@ -344,8 +344,8 @@ def add_bookmarks_to_pdf(
         remove_prefix: str
             Specifies a prefix to remove from each entry in varlist
             when creating bookmarks.  For example, if varlist has
-            a variable name "SpeciesConc_NO", and you specify
-            remove_prefix="SpeciesConc_", then the bookmark for
+            a variable name "SpeciesConcVV_NO", and you specify
+            remove_prefix="SpeciesConcVV_", then the bookmark for
             that variable will be just "NO", etc.
          verbose: bool
             Set this flag to True to print extra informational output.
@@ -1168,7 +1168,7 @@ def add_lumped_species_to_dataset(
         lspc_yaml="",
         verbose=False,
         overwrite=False,
-        prefix="SpeciesConc_",
+        prefix="SpeciesConcVV_",
 ):
     """
     Function to calculate lumped species concentrations and add
@@ -1204,7 +1204,7 @@ def add_lumped_species_to_dataset(
             also used to extract an existing dataarray in the dataset with
             the correct size and dimensions to use during initialization of
             new lumped species dataarrays.
-            Default value: "SpeciesConc_"
+            Default value: "SpeciesConcVV_"
 
     Returns:
         ds: xarray Dataset

From ae12821fdb30aaf47d90d38c28079159a822f80b Mon Sep 17 00:00:00 2001
From: Melissa Sulprizio <mpayer@seas.harvard.edu>
Date: Thu, 22 Dec 2022 13:29:49 -0500
Subject: [PATCH 05/54] Fix typos in references to 'SpeciesConc_' variable

As @yantosca points out in his review, `Species_Conc` should be 'SpeciesConc_'.

Signed-off-by: Melissa Sulprizio <mpayer@seas.harvard.edu>
---
 gcpy/benchmark.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcpy/benchmark.py b/gcpy/benchmark.py
index 97de737e..5710b555 100644
--- a/gcpy/benchmark.py
+++ b/gcpy/benchmark.py
@@ -721,7 +721,7 @@ def make_benchmark_conc_plots(
             refds = refds.rename({v: 'SpeciesConcVV_' + spc})
     for v in devds.data_vars.keys():
         if v.startswith('SpeciesConc_'):
-            spc = v.replace('Species_Conc', '')
+            spc = v.replace('SpeciesConc_', '')
             devds = devds.rename({v: 'SpeciesConcVV_' + spc})
 
     # -----------------------------------------------------------------
@@ -3424,7 +3424,7 @@ def make_benchmark_aerosol_tables(
     # naming introduced in GEOS-Chem 14.1.0
     for v in ds_spc.data_vars.keys():
         if v.startswith('SpeciesConc_'):
-            spc = v.replace('Species_Conc', '')
+            spc = v.replace('SpeciesConc_', '')
             ds_spc = ds_spc.rename({v: 'SpeciesConcVV_' + spc})
 
     # Get troposphere mask

From d0530293e3baf8f787eff56b8bf803d48ba570eb Mon Sep 17 00:00:00 2001
From: Killian Murphy <killian.murphy@york.ac.uk>
Date: Tue, 3 Jan 2023 14:57:22 +0000
Subject: [PATCH 06/54] Add stretched grid attributes to output restart

Add stretched grid attributes to the output of regrid_restart_file, and
improve the command line argument handling of the script.
---
 gcpy/regrid_restart_file.py | 272 ++++++++++++++++++++++++++++--------
 1 file changed, 217 insertions(+), 55 deletions(-)

diff --git a/gcpy/regrid_restart_file.py b/gcpy/regrid_restart_file.py
index 9541f0fe..3490189c 100644
--- a/gcpy/regrid_restart_file.py
+++ b/gcpy/regrid_restart_file.py
@@ -10,6 +10,7 @@
 #       4x5_to_c24_weights.nc \
 #       GCHP.Restart.fullchem.20190701_0000z.c24.nc4
 
+import argparse
 import os
 import sys
 import re
@@ -23,7 +24,76 @@
 import requests
 
 
-temp_files=[]
+temp_files = []
+
+
+def file_path(file_path):
+    if not os.path.isfile(file_path):
+        raise argparse.ArgumentTypeError
+    return file_path
+
+
+def parse_command_line():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        "file_to_regrid",
+        type=file_path,
+        metavar="file_to_regrid",
+        help="The GEOS-Chem restart file to be regridded",
+    )
+    parser.add_argument(
+        "regridding_weights_file",
+        type=file_path,
+        metavar="regridding_weights_file",
+        help=(
+            "The regridding weights file for this regridding, generated ",
+            "by ESMF_RegridWeightGen",
+        ),
+    )
+    parser.add_argument(
+        "template_file",
+        type=file_path,
+        metavar="template_file",
+        help=(
+            "The GEOS-Chem restart file to use as a template for regridding - ",
+            "attributes, dimensions, and variables for the output file will ",
+            "be taken from this template",
+        ),
+    )
+
+    parser.add_argument(
+        "--stretched-grid",
+        action="store_true",
+        help=(
+            "Create a stretched-grid restart file - you must also pass ",
+            "stretched-grid parameters!",
+        ),
+    )
+
+    parser.add_argument(
+        "--stretch-factor",
+        type=np.float32,
+        metavar="stretch_factor",
+        help="The stretch factor, if creating a stretched-grid restart file",
+        required=False,
+    )
+    parser.add_argument(
+        "--target-latitude",
+        type=np.float32,
+        metavar="target_latitude",
+        help="The target latitude, if creating a stretched-grid restart file",
+        required=False,
+    )
+    parser.add_argument(
+        "--target-longitude",
+        type=np.float32,
+        metavar="target_longitude",
+        help="The target longitude, if creating a stretched-grid restart file",
+        required=False,
+    )
+
+    return parser.parse_args()
 
 
 def cleanup_tempfile():
@@ -36,22 +106,24 @@ def cleanup_tempfile():
 
 
 def is_gchp_restart_file(ds):
-    is_gchp_restart = 'SPC_O3' in ds.data_vars
-    is_gcclassic = 'SpeciesRst_O3' in ds.data_vars
+    is_gchp_restart = "SPC_O3" in ds.data_vars
+    is_gcclassic = "SpeciesRst_O3" in ds.data_vars
     if not any((is_gchp_restart, is_gcclassic)):
-        raise ValueError("Couldn't determine if the provided file is a GC-Classic or GCHP restart file.")
+        raise ValueError(
+            "Couldn't determine if the provided file is a GC-Classic or GCHP restart file."
+        )
     return is_gchp_restart
 
 
 def open_dataset(file_or_url, CHUNK_SIZE=8192):
     global temp_files
-    is_url = bool(re.match(r'https?://', file_or_url))
+    is_url = bool(re.match(r"https?://", file_or_url))
     if is_url:
         logging.debug(f"Downloading {file_or_url}")
         with requests.get(file_or_url, stream=True) as r:
             r.raise_for_status()  # raise HTTPError
             tempfile_fd, tempfile_path = tempfile.mkstemp()
-            with open(tempfile_fd, 'wb') as f:
+            with open(tempfile_fd, "wb") as f:
                 bytes_downloaded = 0
                 for chunk in r.iter_content(chunk_size=CHUNK_SIZE):
                     bytes_downloaded += len(chunk)
@@ -64,22 +136,29 @@ def open_dataset(file_or_url, CHUNK_SIZE=8192):
 
 def rename_variables(ds, to_gchp=True):
     to_gchp_re_sub = [
-        (r'SpeciesRst_(.+)', r'SPC_\1'),
-        (r'Met_(.+)', r'\1'),
-        (r'Met_DELPDRY', r'DELP_DRY'),
-        (r'Chem_(WetDepNitrogen|DryDepNitrogen|H2O2AfterChem|SO2AfterChem|KPPHvalue)', r'\1'),
+        (r"SpeciesRst_(.+)", r"SPC_\1"),
+        (r"Met_(.+)", r"\1"),
+        (r"Met_DELPDRY", r"DELP_DRY"),
+        (
+            r"Chem_(WetDepNitrogen|DryDepNitrogen|H2O2AfterChem|SO2AfterChem|KPPHvalue)",
+            r"\1",
+        ),
     ]
     to_gcclassic_re_sub = [
-        (r'SPC_(.+)', r'SpeciesRst_\1'),
-        (r'(TropLev|BXHEIGHT)', r'Met_\1')
+        (r"SPC_(.+)", r"SpeciesRst_\1"),
+        (r"(TropLev|BXHEIGHT)", r"Met_\1"),
     ]
     re_sub_arg_list = to_gchp_re_sub if to_gchp else to_gcclassic_re_sub
 
     rename_dict = {}
     for re_sub_args in re_sub_arg_list:
-        rename_dict.update({
-            name: re.sub(*re_sub_args, name) for name in ds.data_vars if re.match(re_sub_args[0], name)
-        })
+        rename_dict.update(
+            {
+                name: re.sub(*re_sub_args, name)
+                for name in ds.data_vars
+                if re.match(re_sub_args[0], name)
+            }
+        )
     logging.info(f"Renaming {len(rename_dict)} variables")
     return ds.rename(rename_dict)
 
@@ -97,48 +176,74 @@ def drop_variables(ds, output_template):
     drop_vars = input_var_set - output_var_set
     missing_vars = output_var_set - input_var_set
     if len(drop_vars) > 0:
-        logging.info(f"Dropping {len(drop_vars)} variables from the input restart file that dont exist in the output template")
-        logging.debug(f"Variables being dropped from the input restart file: {drop_vars}")
+        logging.info(
+            f"Dropping {len(drop_vars)} variables from the input restart file that dont exist in the output template"
+        )
+        logging.debug(
+            f"Variables being dropped from the input restart file: {drop_vars}"
+        )
         ds = ds.drop(drop_vars)
     if len(missing_vars) > 0:
-        logging.warning(f"The input restart file is missing {len(missing_vars)} variables that exist in the output template")
-        logging.debug(f"Variables missing in the input restart file: {missing_vars}")
+        logging.warning(
+            f"The input restart file is missing {len(missing_vars)} variables that exist in the output template"
+        )
+        logging.debug(
+            f"Variables missing in the input restart file: {missing_vars}"
+        )
         output_template = output_template.drop(missing_vars)
     return ds, output_template
 
 
 def regrid(ds, output_template, weights_file):
     weights = open_dataset(weights_file)
-    input_dims = [('lat', 'lon'), (ds.dims['lat'], ds.dims['lon'])]
+    input_dims = [("lat", "lon"), (ds.dims["lat"], ds.dims["lon"])]
 
-    output_template_shape = (output_template.dims['lat'], output_template.dims['lon'])
-    resize_output_template = np.prod(output_template_shape) != weights.dst_grid_dims.item()
+    output_template_shape = (
+        output_template.dims["lat"],
+        output_template.dims["lon"],
+    )
+    resize_output_template = (
+        np.prod(output_template_shape) != weights.dst_grid_dims.item()
+    )
     if resize_output_template:
         if is_gchp_restart_file(output_template):
-            # This is useful for stretched-grid simulations because they usually don't have a "normal" grid size    
+            # This is useful for stretched-grid simulations because they usually don't have a "normal" grid size
             cs_res = np.sqrt(weights.dst_grid_dims.item() / 6).astype(int)
-            logging.info(f"Reshaping the output restart file template to grid size C{cs_res}")
+            logging.info(
+                f"Reshaping the output restart file template to grid size C{cs_res}"
+            )
             output_shape = (6 * cs_res, cs_res)
-            func = lambda *args, **kwargs: np.ones(output_shape)*np.nan
-            vfunc = np.vectorize(func, signature='(lat,lon)->(lat1,lon1)')
+            func = lambda *args, **kwargs: np.ones(output_shape) * np.nan
+            vfunc = np.vectorize(func, signature="(lat,lon)->(lat1,lon1)")
             new_output_template = xr.apply_ufunc(
-                vfunc, output_template, keep_attrs=True,
-                input_core_dims=[['lat', 'lon']], output_core_dims=[['lat1', 'lon1']], 
+                vfunc,
+                output_template,
+                keep_attrs=True,
+                input_core_dims=[["lat", "lon"]],
+                output_core_dims=[["lat1", "lon1"]],
             )
-            new_output_template = new_output_template.rename({'lat1': 'lat', 'lon1': 'lon'})
-            new_output_template['lat'].attrs = output_template['lat'].attrs
-            new_output_template['lon'].attrs = output_template['lat'].attrs
+            new_output_template = new_output_template.rename(
+                {"lat1": "lat", "lon1": "lon"}
+            )
+            new_output_template["lat"].attrs = output_template["lat"].attrs
+            new_output_template["lon"].attrs = output_template["lat"].attrs
             new_output_template = new_output_template.assign_coords(
-                lat=np.arange(new_output_template.dims['lat'], dtype=np.float64),
-                lon=np.arange(new_output_template.dims['lon'], dtype=np.float64),
+                lat=np.arange(
+                    new_output_template.dims["lat"], dtype=np.float64
+                ),
+                lon=np.arange(
+                    new_output_template.dims["lon"], dtype=np.float64
+                ),
             )
             output_template = new_output_template
         else:
-            raise ValueError("GC-Classic restart resizing not implemented. Please provide a restart file template with the proper resolution.")
+            raise ValueError(
+                "GC-Classic restart resizing not implemented. Please provide a restart file template with the proper resolution."
+            )
     else:
         output_shape = output_template_shape
 
-    output_dims = [('lat', 'lon'), output_shape]
+    output_dims = [("lat", "lon"), output_shape]
     logging.info("Regridding the input restart file")
     transform = sparselt.esmf.load_weights(weights, input_dims, output_dims)
     ds = sparselt.xr.apply(transform, ds, output_template)
@@ -148,9 +253,12 @@ def regrid(ds, output_template, weights_file):
 def update_encoding(ds):
     logging.info(f"Updating encoding")
     for name in ds.data_vars:
-        ds[name].encoding.update({'dtype': 'float32'})
-        if 'missing_value' in ds[name].encoding and '_FillValue' in ds[name].encoding:
-            del ds[name].encoding['missing_value']
+        ds[name].encoding.update({"dtype": "float32"})
+        if (
+            "missing_value" in ds[name].encoding
+            and "_FillValue" in ds[name].encoding
+        ):
+            del ds[name].encoding["missing_value"]
     return ds
 
 
@@ -160,24 +268,37 @@ def check_for_nans(ds):
         if ds[name].isnull().any().item():
             nan_vars.append(name)
     if len(nan_vars) > 0:
-        logging.warning(f"Dataset has {len(nan_vars)}/{len(ds.data_vars)} variables with NaN values")
+        logging.warning(
+            f"Dataset has {len(nan_vars)}/{len(ds.data_vars)} variables with NaN values"
+        )
         logging.debug(f"Variables with NaN values: {nan_vars}")
 
 
-def regrid_restart_file(input_restart, regrid_weights, output_restart_template):
+def regrid_restart_file(
+    input_restart,
+    regrid_weights,
+    output_restart_template,
+    stretch_factor=None,
+    target_lat=None,
+    target_lon=None,
+):
     logging.info(f"Input restart file: {input_restart}")
     logging.info(f"Regridding weights: {regrid_weights}")
     logging.info(f"Output template restart file: {output_restart_template}")
-        
+
     ds = open_dataset(input_restart)
     check_for_nans(ds)
     output_template = open_dataset(output_restart_template)
 
     input_is_gchp_restart = is_gchp_restart_file(ds)
     output_is_gchp_restart = is_gchp_restart_file(output_template)
-    logging.info(f"Input restart file type is '{'GCHP' if input_is_gchp_restart else 'GC-Classic'}'")
-    logging.info(f"Output restart file type is '{'GCHP' if output_is_gchp_restart else 'GC-Classic'}'")
-    is_conversion = (input_is_gchp_restart != output_is_gchp_restart)
+    logging.info(
+        f"Input restart file type is '{'GCHP' if input_is_gchp_restart else 'GC-Classic'}'"
+    )
+    logging.info(
+        f"Output restart file type is '{'GCHP' if output_is_gchp_restart else 'GC-Classic'}'"
+    )
+    is_conversion = input_is_gchp_restart != output_is_gchp_restart
     if is_conversion:
         to_gchp = output_is_gchp_restart
         ds = rename_variables(ds, to_gchp)
@@ -187,17 +308,58 @@ def regrid_restart_file(input_restart, regrid_weights, output_restart_template):
     ds = regrid(ds, output_template, weights_file=regrid_weights)
     ds = update_encoding(ds)
     check_for_nans(ds)
-    ds.to_netcdf('new_restart_file.nc')
-    logging.info(f"Wrote 'new_restart_file.nc' with {len(ds.data_vars)} variables")
+
+    if stretch_factor and target_lat and target_lon:
+        try:
+            ds.attrs["STRETCH_FACTOR"] = np.float32(stretch_factor)
+            ds.attrs["TARGET_LAT"] = np.float32(target_lat)
+            ds.attrs["TARGET_LON"] = np.float32(target_lon)
+        except Exception as e:
+            raise Exception(
+                "Error when processing your stretched-grid parameters - are they correct?"
+            ) from e
+
+    ds.to_netcdf("new_restart_file.nc")
+    logging.info(
+        f"Wrote 'new_restart_file.nc' with {len(ds.data_vars)} variables"
+    )
     cleanup_tempfile()
 
 
-if __name__ == '__main__':
-    logging.basicConfig(level=os.environ.get('LOGLEVEL', 'INFO').upper())
-    if len(sys.argv) != 4:
-        logging.error("This program has 3 required arguments:  input_restart regrid_weights output_restart_template")
-        exit(1)
-    input_restart = sys.argv[1]
-    regrid_weights = sys.argv[2]
-    output_restart_template = sys.argv[3]
-    regrid_restart_file(input_restart, regrid_weights, output_restart_template)
+if __name__ == "__main__":
+    logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO").upper())
+    COMMAND_LINE = parse_command_line()
+    input_restart = COMMAND_LINE.file_to_regrid
+    regrid_weights = COMMAND_LINE.regridding_weights_file
+    output_restart_template = COMMAND_LINE.template_file
+
+    if COMMAND_LINE.stretched_grid:
+        logging.info("Creating a stretched-grid restart file")
+
+        if (
+            (not COMMAND_LINE.stretch_factor)
+            or (not COMMAND_LINE.target_latitude)
+            or (not COMMAND_LINE.target_longitude)
+        ):
+            error_message = (
+                "--stretched-grid was set but not all stretched-",
+                "grid parameters were passed!",
+            )
+            raise RuntimeError(error_message)
+        else:
+            stretch_factor = COMMAND_LINE.stretch_factor
+            target_latitude = COMMAND_LINE.target_latitude
+            target_longitude = COMMAND_LINE.target_longitude
+
+            regrid_restart_file(
+                input_restart,
+                regrid_weights,
+                output_restart_template,
+                stretch_factor=stretch_factor,
+                target_lat=target_latitude,
+                target_lon=target_longitude,
+            )
+    else:
+        regrid_restart_file(
+            input_restart, regrid_weights, output_restart_template
+        )

From 0ae2d8cf219ca85834f99f40e1ca70e22c9791dd Mon Sep 17 00:00:00 2001
From: Killian Murphy <killian.murphy@york.ac.uk>
Date: Tue, 3 Jan 2023 17:44:50 +0000
Subject: [PATCH 07/54] Working through linter issues

---
 gcpy/regrid_restart_file.py | 412 ++++++++++++++++++++++++------------
 1 file changed, 281 insertions(+), 131 deletions(-)

diff --git a/gcpy/regrid_restart_file.py b/gcpy/regrid_restart_file.py
index 3490189c..950e5435 100644
--- a/gcpy/regrid_restart_file.py
+++ b/gcpy/regrid_restart_file.py
@@ -1,39 +1,70 @@
-# Example usage:
-#
-#   regrid_restart_file \
-#       GEOSChem.Restart.fullchem.20190701_0000z.nc4 \
-#       4x5_to_c24_weights.nc \
-#       GCHP.Restart.fullchem.20190701_0000z.c24.nc4
-#
-#   regrid_restart_file \
-#       http://ftp.as.harvard.edu/gcgrid/geos-chem/10yr_benchmarks/13.0.0/GCClassic/restarts/GEOSChem.Restart.20161101_0000z.nc4 \
-#       4x5_to_c24_weights.nc \
-#       GCHP.Restart.fullchem.20190701_0000z.c24.nc4
-
+"""
+This module takes a restart file, regridding weights generated by
+ESMF_RegridWeightGen, a template file, and optional stretched-grid parameters,
+to produce a regridded GCHP restart file.
+
+Example:
+    First create source and target grid specifications using `gridspec`
+    (https://github.com/liambindle/gridspec), then create regridding weights
+    from source to target grid using ESMF_RegridWeightGen from ESMF, then:
+
+        $ python -m gcpy.regrid_restart_file \
+                GEOSChem.Restart.fullchem.20190701_0000z.nc4 \
+                4x5_to_c24_weights.nc \
+                GCHP.Restart.fullchem.20190701_0000z.c24.nc4
+
+    Or, for a stretched-grid:
+
+        $ python -m gcpy.regrid_restart_file \
+                --stretched-grid \
+                --stretch-factor=2.0 \
+                --target-latitude=32.0 \
+                --target-longitude=-64.0 \
+                GEOSChem.Restart.fullchem.20190701_0000z.nc4 \
+                4x5_to_c24_weights.nc \
+                GCHP.Restart.fullchem.20190701_0000z.c24.nc4
+
+"""
 import argparse
+import logging
 import os
-import sys
+from pathlib import Path
 import re
-import logging
 import tempfile
 import xarray as xr
 import numpy as np
 import sparselt.esmf
 import sparselt.xr
-from pathlib import Path
 import requests
 
 
-temp_files = []
+TEMP_FILES = []
+
 
+def file_path(path):
+    """
+    Checks whether or not a regular file exists at the passed path.
 
-def file_path(file_path):
-    if not os.path.isfile(file_path):
+    Args:
+        file_path (str): A path to a file.
+
+    Returns:
+        bool: True if a regular file exists at `file_path`.
+
+    """
+    if not os.path.isfile(path):
         raise argparse.ArgumentTypeError
-    return file_path
+    return path
 
 
 def parse_command_line():
+    """
+    Parses command line arguments and options into a useful data structure.
+
+    Returns:
+        argparse.Namespace: A dict-like object containing command line
+                            argument and option values.
+    """
     parser = argparse.ArgumentParser()
 
     parser.add_argument(
@@ -97,17 +128,26 @@ def parse_command_line():
 
 
 def cleanup_tempfile():
-    global temp_files
-    if len(temp_files) > 0:
-        logging.debug(f"Deleting {len(temp_files)} temp files")
-    for filepath in temp_files:
+    """
+    Clean up temporary files created as part of the regridding.
+    """
+    global TEMP_FILES
+    if len(TEMP_FILES) > 0:
+        logging.debug("Deleting %d temp files", len(TEMP_FILES))
+    for filepath in TEMP_FILES:
         Path(filepath).unlink(missing_ok=True)
-    temp_files = []
+    TEMP_FILES = []
 
 
-def is_gchp_restart_file(ds):
-    is_gchp_restart = "SPC_O3" in ds.data_vars
-    is_gcclassic = "SpeciesRst_O3" in ds.data_vars
+def is_gchp_restart_file(dataset):
+    """
+    Checks whether or not an xarray dataset represents a GCHP restart file.
+
+    Returns:
+        bool: True if `dataset` represents a GCHP restart file.
+    """
+    is_gchp_restart = "SPC_O3" in dataset.data_vars
+    is_gcclassic = "SpeciesRst_O3" in dataset.data_vars
     if not any((is_gchp_restart, is_gcclassic)):
         raise ValueError(
             "Couldn't determine if the provided file is a GC-Classic or GCHP restart file."
@@ -115,26 +155,46 @@ def is_gchp_restart_file(ds):
     return is_gchp_restart
 
 
-def open_dataset(file_or_url, CHUNK_SIZE=8192):
-    global temp_files
+def open_dataset(file_or_url, chunk_size=8192):
+    """
+    Open a NetCDF-4 dataset from either file path or URL.
+
+    Args:
+        file_or_url (str): A file path on the local system or URL
+        chunk_size  (int): Size of chunks to stream from remote dataset to
+                           the local system.
+
+    Returns:
+        xarray.Dataset: An xarray dataset.
+    """
+    global TEMP_FILES
     is_url = bool(re.match(r"https?://", file_or_url))
     if is_url:
-        logging.debug(f"Downloading {file_or_url}")
-        with requests.get(file_or_url, stream=True) as r:
-            r.raise_for_status()  # raise HTTPError
+        logging.debug("Downloading %s", file_or_url)
+        with requests.get(file_or_url, stream=True, timeout=30.0) as request:
+            request.raise_for_status()  # raise HTTPError
             tempfile_fd, tempfile_path = tempfile.mkstemp()
-            with open(tempfile_fd, "wb") as f:
+            with open(tempfile_fd, "wb") as outfile:
                 bytes_downloaded = 0
-                for chunk in r.iter_content(chunk_size=CHUNK_SIZE):
+                for chunk in request.iter_content(chunk_size=chunk_size):
                     bytes_downloaded += len(chunk)
-                    f.write(chunk)
-            temp_files.append(tempfile_path)
-    file_path = tempfile_path if is_url else file_or_url
-    logging.debug(f"Opening {file_path}")
-    return xr.open_dataset(file_path)
-
-
-def rename_variables(ds, to_gchp=True):
+                    outfile.write(chunk)
+            TEMP_FILES.append(tempfile_path)
+    dataset_file_path = tempfile_path if is_url else file_or_url
+    logging.debug("Opening %s", dataset_file_path)
+    return xr.open_dataset(dataset_file_path)
+
+
+def rename_variables(dataset, to_gchp=True):
+    """
+    Rename variables in passed dataset to match either GC-Classic or GCHP
+    naming conventions.
+
+    Args:
+        datase  (xarray.Dataset): The dataset to have its variables renamed.
+        to_gchp (bool)          : True if converting to GCHP naming convention,
+                                  False if converting to GC-Classic.
+    """
     to_gchp_re_sub = [
         (r"SpeciesRst_(.+)", r"SPC_\1"),
         (r"Met_(.+)", r"\1"),
@@ -155,48 +215,93 @@ def rename_variables(ds, to_gchp=True):
         rename_dict.update(
             {
                 name: re.sub(*re_sub_args, name)
-                for name in ds.data_vars
+                for name in dataset.data_vars
                 if re.match(re_sub_args[0], name)
             }
         )
-    logging.info(f"Renaming {len(rename_dict)} variables")
-    return ds.rename(rename_dict)
+    logging.info("Renaming %d variables", len(rename_dict))
+    return dataset.rename(rename_dict)
+
+
+def reverse_lev(dataset):
+    """
+    Reverse the level index of the passed dataset.
+
+    Args:
+        dataset (xarray.Dataset): The dataset to have its level index reversed.
+
+    Returns:
+        xarray.Dataset: The input dataset with a reversed level index.
+    """
+    logging.info("Reversing coordinate 'lev'")
+    dataset = dataset.reindex(lev=dataset.lev[::-1])
+    dataset = dataset.assign_coords(lev=dataset.lev.values[::-1])
+    return dataset
 
 
-def reverse_lev(ds):
-    logging.info(f"Reversing coordinate 'lev'")
-    ds = ds.reindex(lev=ds.lev[::-1])
-    ds = ds.assign_coords(lev=ds.lev.values[::-1])
-    return ds
+def drop_variables(dataset, output_template):
+    """
+    Drop variables in the passed dataset which aren't present in the regridding
+    output template.
 
+    Args:
+        dataset         (xarray.Dataset): The dataset from which to drop
+                                          variables.
+        output_template (xarray.Dataset): The template from which to determine
+                                          variables to drop.
 
-def drop_variables(ds, output_template):
-    input_var_set = set(ds.data_vars)
+    Returns:
+        xarray.Dataset: The input dataset with variables dropped.
+    """
+    input_var_set = set(dataset.data_vars)
     output_var_set = set(output_template.data_vars)
     drop_vars = input_var_set - output_var_set
     missing_vars = output_var_set - input_var_set
     if len(drop_vars) > 0:
-        logging.info(
-            f"Dropping {len(drop_vars)} variables from the input restart file that dont exist in the output template"
+        info_message = (
+            "Dropping %d variables from the input restart file ",
+            "that don't exist in the output template",
         )
-        logging.debug(
-            f"Variables being dropped from the input restart file: {drop_vars}"
+        logging.info(info_message, len(drop_vars))
+
+        debug_message = (
+            "Variables being dropped from the input restart file:",
+            " %s",
         )
-        ds = ds.drop(drop_vars)
+        logging.debug(debug_message, drop_vars)
+
+        dataset = dataset.drop(drop_vars)
     if len(missing_vars) > 0:
-        logging.warning(
-            f"The input restart file is missing {len(missing_vars)} variables that exist in the output template"
-        )
-        logging.debug(
-            f"Variables missing in the input restart file: {missing_vars}"
+        warning_message = (
+            "The input restart file is missing %d variables ",
+            "that exist in the output template",
         )
+        logging.warning(warning_message, len(missing_vars))
+
+        debug_message = "Variables missing in the input restart file: %s"
+        logging.debug(debug_message, missing_vars)
+
         output_template = output_template.drop(missing_vars)
-    return ds, output_template
+    return dataset, output_template
+
 
+def regrid(dataset, output_template, weights_file):
+    """
+    Calculate and apply the regridding, based on passed regridding weights
+    and input dataset attributes.
 
-def regrid(ds, output_template, weights_file):
+    Args:
+        dataset         (xarray.Dataset): The dataset to be regridded.
+        output_template (xarray.Dataset): The template file for the regridded
+                                          output.
+        weights_file    (xarray.Dataset): The precalculated regridding weights,
+                                          generated by ESMF_RegridWeightGen.
+
+    Returns:
+        xarray.Dataset: The regridded dataset.
+    """
     weights = open_dataset(weights_file)
-    input_dims = [("lat", "lon"), (ds.dims["lat"], ds.dims["lon"])]
+    input_dims = [("lat", "lon"), (dataset.dims["lat"], dataset.dims["lon"])]
 
     output_template_shape = (
         output_template.dims["lat"],
@@ -207,11 +312,16 @@ def regrid(ds, output_template, weights_file):
     )
     if resize_output_template:
         if is_gchp_restart_file(output_template):
-            # This is useful for stretched-grid simulations because they usually don't have a "normal" grid size
+            # This is useful for stretched-grid simulations because they usually
+            # don't have a "normal" grid size
             cs_res = np.sqrt(weights.dst_grid_dims.item() / 6).astype(int)
-            logging.info(
-                f"Reshaping the output restart file template to grid size C{cs_res}"
+
+            info_message = (
+                "Reshaping the output restart file template to ",
+                "grid size C%f",
             )
+            logging.info(info_message, cs_res)
+
             output_shape = (6 * cs_res, cs_res)
             func = lambda *args, **kwargs: np.ones(output_shape) * np.nan
             vfunc = np.vectorize(func, signature="(lat,lon)->(lat1,lon1)")
@@ -237,41 +347,61 @@ def regrid(ds, output_template, weights_file):
             )
             output_template = new_output_template
         else:
-            raise ValueError(
-                "GC-Classic restart resizing not implemented. Please provide a restart file template with the proper resolution."
+            error_message = (
+                "GC-Classic restart resizing not implemented. ",
+                "Please provide a restart file template with ",
+                "the proper resolution.",
             )
+            raise ValueError(error_message)
     else:
         output_shape = output_template_shape
 
     output_dims = [("lat", "lon"), output_shape]
     logging.info("Regridding the input restart file")
     transform = sparselt.esmf.load_weights(weights, input_dims, output_dims)
-    ds = sparselt.xr.apply(transform, ds, output_template)
-    return ds
+    dataset = sparselt.xr.apply(transform, dataset, output_template)
+    return dataset
+
+
+def update_encoding(dataset):
+    """
+    Ensure dataset variables are encoded as float32.
 
+    Args:
+        dataset (xarray.Dataset): The dataset to have its encoding variable
+                                  encoding checked and updated.
 
-def update_encoding(ds):
-    logging.info(f"Updating encoding")
-    for name in ds.data_vars:
-        ds[name].encoding.update({"dtype": "float32"})
+    Returns:
+        xarray.Dataset: The input dataset with float32 variable encoding
+                        applied.
+    """
+    logging.info("Updating encoding")
+    for name in dataset.data_vars:
+        dataset[name].encoding.update({"dtype": "float32"})
         if (
-            "missing_value" in ds[name].encoding
-            and "_FillValue" in ds[name].encoding
+            "missing_value" in dataset[name].encoding
+            and "_FillValue" in dataset[name].encoding
         ):
-            del ds[name].encoding["missing_value"]
-    return ds
+            del dataset[name].encoding["missing_value"]
+    return dataset
 
 
-def check_for_nans(ds):
+def check_for_nans(dataset):
+    """
+    Check for the presence of NaN values in the passed dataset.
+
+    Args:
+        dataset (xarray.Dataset): The dataset to check for NaNs.
+    """
     nan_vars = []
-    for name in ds.data_vars:
-        if ds[name].isnull().any().item():
+    for name in dataset.data_vars:
+        if dataset[name].isnull().any().item():
             nan_vars.append(name)
     if len(nan_vars) > 0:
-        logging.warning(
-            f"Dataset has {len(nan_vars)}/{len(ds.data_vars)} variables with NaN values"
-        )
-        logging.debug(f"Variables with NaN values: {nan_vars}")
+        warning_message = "Dataset has %f variables with NaN values"
+        logging.warning(warning_message, len(nan_vars) / len(dataset.data_vars))
+
+        logging.debug("Variables with NaN values: %s", nan_vars)
 
 
 def regrid_restart_file(
@@ -282,47 +412,67 @@ def regrid_restart_file(
     target_lat=None,
     target_lon=None,
 ):
-    logging.info(f"Input restart file: {input_restart}")
-    logging.info(f"Regridding weights: {regrid_weights}")
-    logging.info(f"Output template restart file: {output_restart_template}")
-
-    ds = open_dataset(input_restart)
-    check_for_nans(ds)
+    """
+    Perform and end-to-end regridding from reading input gridded data and
+    regridding weights to writing out the regridded data.
+
+    Args:
+        input_restart           (str)  : The path to the restart file that will
+                                         be regridded.
+        regrid_weights          (str)  : The path to the regridding weights,
+                                         generated by ESMF_RegridWeightGen.
+        output_restart_template (str)  : The path to the regridding output
+                                         template file.
+        stretch_factor          (float): An optional stretch factor, for use
+                                         with stretched-regridding.
+        target_lat              (float): An optional target latitude, for use
+                                         with stretched-regridding.
+        target_lon              (float): An optional target longitude, for use
+                                         with stretched-regridding.
+    """
+    logging.info("Input restart file: %s", input_restart)
+    logging.info("Regridding weights: %s", regrid_weights)
+    logging.info("Output template restart file: %s", output_restart_template)
+
+    dataset = open_dataset(input_restart)
+    check_for_nans(dataset)
     output_template = open_dataset(output_restart_template)
 
-    input_is_gchp_restart = is_gchp_restart_file(ds)
-    output_is_gchp_restart = is_gchp_restart_file(output_template)
-    logging.info(
-        f"Input restart file type is '{'GCHP' if input_is_gchp_restart else 'GC-Classic'}'"
-    )
-    logging.info(
-        f"Output restart file type is '{'GCHP' if output_is_gchp_restart else 'GC-Classic'}'"
-    )
-    is_conversion = input_is_gchp_restart != output_is_gchp_restart
+    input_is_gchp = is_gchp_restart_file(dataset)
+    output_is_gchp = is_gchp_restart_file(output_template)
+
+    info_message = "Input restart file type is %s"
+    logging.info(info_message, "GCHP" if input_is_gchp else "GC-Classic")
+
+    info_message = "Output restart file type is %s"
+    logging.info(info_message, "GCHP" if output_is_gchp else "GC-Classic")
+
+    is_conversion = input_is_gchp != output_is_gchp
     if is_conversion:
-        to_gchp = output_is_gchp_restart
-        ds = rename_variables(ds, to_gchp)
-        ds = reverse_lev(ds)
+        to_gchp = output_is_gchp
+        dataset = rename_variables(dataset, to_gchp)
+        dataset = reverse_lev(dataset)
 
-    ds, output_template = drop_variables(ds, output_template)
-    ds = regrid(ds, output_template, weights_file=regrid_weights)
-    ds = update_encoding(ds)
-    check_for_nans(ds)
+    dataset, output_template = drop_variables(dataset, output_template)
+    dataset = regrid(dataset, output_template, weights_file=regrid_weights)
+    dataset = update_encoding(dataset)
+    check_for_nans(dataset)
 
     if stretch_factor and target_lat and target_lon:
         try:
-            ds.attrs["STRETCH_FACTOR"] = np.float32(stretch_factor)
-            ds.attrs["TARGET_LAT"] = np.float32(target_lat)
-            ds.attrs["TARGET_LON"] = np.float32(target_lon)
-        except Exception as e:
+            dataset.attrs["STRETCH_FACTOR"] = np.float32(stretch_factor)
+            dataset.attrs["TARGET_LAT"] = np.float32(target_lat)
+            dataset.attrs["TARGET_LON"] = np.float32(target_lon)
+        except Exception as exception:
             raise Exception(
                 "Error when processing your stretched-grid parameters - are they correct?"
-            ) from e
+            ) from exception
+
+    dataset.to_netcdf("new_restart_file.nc")
+
+    info_message = "Wrote 'new_restart_file.nc' with %d variables"
+    logging.info(info_message, len(dataset.data_vars))
 
-    ds.to_netcdf("new_restart_file.nc")
-    logging.info(
-        f"Wrote 'new_restart_file.nc' with {len(ds.data_vars)} variables"
-    )
     cleanup_tempfile()
 
 
@@ -346,19 +496,19 @@ def regrid_restart_file(
                 "grid parameters were passed!",
             )
             raise RuntimeError(error_message)
-        else:
-            stretch_factor = COMMAND_LINE.stretch_factor
-            target_latitude = COMMAND_LINE.target_latitude
-            target_longitude = COMMAND_LINE.target_longitude
-
-            regrid_restart_file(
-                input_restart,
-                regrid_weights,
-                output_restart_template,
-                stretch_factor=stretch_factor,
-                target_lat=target_latitude,
-                target_lon=target_longitude,
-            )
+
+        stretch_factor = COMMAND_LINE.stretch_factor
+        target_latitude = COMMAND_LINE.target_latitude
+        target_longitude = COMMAND_LINE.target_longitude
+
+        regrid_restart_file(
+            input_restart,
+            regrid_weights,
+            output_restart_template,
+            stretch_factor=stretch_factor,
+            target_lat=target_latitude,
+            target_lon=target_longitude,
+        )
     else:
         regrid_restart_file(
             input_restart, regrid_weights, output_restart_template

From 5bedf0b988ebcdab17c6b27e26baf4c371aa0da1 Mon Sep 17 00:00:00 2001
From: Killian Murphy <killian.murphy@york.ac.uk>
Date: Wed, 4 Jan 2023 10:48:38 +0000
Subject: [PATCH 08/54] Further linting

---
 gcpy/regrid_restart_file.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/gcpy/regrid_restart_file.py b/gcpy/regrid_restart_file.py
index 950e5435..209b45d4 100644
--- a/gcpy/regrid_restart_file.py
+++ b/gcpy/regrid_restart_file.py
@@ -40,7 +40,6 @@
 
 TEMP_FILES = []
 
-
 def file_path(path):
     """
     Checks whether or not a regular file exists at the passed path.
@@ -479,9 +478,9 @@ def regrid_restart_file(
 if __name__ == "__main__":
     logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO").upper())
     COMMAND_LINE = parse_command_line()
-    input_restart = COMMAND_LINE.file_to_regrid
-    regrid_weights = COMMAND_LINE.regridding_weights_file
-    output_restart_template = COMMAND_LINE.template_file
+    file_to_regrid = COMMAND_LINE.file_to_regrid
+    regridding_weights_file = COMMAND_LINE.regridding_weights_file
+    template_file = COMMAND_LINE.template_file
 
     if COMMAND_LINE.stretched_grid:
         logging.info("Creating a stretched-grid restart file")
@@ -502,14 +501,14 @@ def regrid_restart_file(
         target_longitude = COMMAND_LINE.target_longitude
 
         regrid_restart_file(
-            input_restart,
-            regrid_weights,
-            output_restart_template,
+            file_to_regrid,
+            regridding_weights_file,
+            template_file,
             stretch_factor=stretch_factor,
             target_lat=target_latitude,
             target_lon=target_longitude,
         )
     else:
         regrid_restart_file(
-            input_restart, regrid_weights, output_restart_template
+            file_to_regrid, regridding_weights_file, template_file
         )

From 659eceab03ed455eeceb57fd671cefb5fa347334 Mon Sep 17 00:00:00 2001
From: Killian Murphy <killian.murphy@york.ac.uk>
Date: Wed, 4 Jan 2023 10:50:36 +0000
Subject: [PATCH 09/54] Auto-formatting

---
 gcpy/regrid_restart_file.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcpy/regrid_restart_file.py b/gcpy/regrid_restart_file.py
index 209b45d4..5e150503 100644
--- a/gcpy/regrid_restart_file.py
+++ b/gcpy/regrid_restart_file.py
@@ -40,6 +40,7 @@
 
 TEMP_FILES = []
 
+
 def file_path(path):
     """
     Checks whether or not a regular file exists at the passed path.

From 4e5e326a866785396429c733ca8bd6b02d4b17a7 Mon Sep 17 00:00:00 2001
From: Killian Murphy <killian.murphy@york.ac.uk>
Date: Wed, 4 Jan 2023 11:08:19 +0000
Subject: [PATCH 10/54] Change multi-line strings from tuples to implicit
 concat

---
 gcpy/regrid_restart_file.py | 37 ++++++++++++++++++-------------------
 1 file changed, 18 insertions(+), 19 deletions(-)

diff --git a/gcpy/regrid_restart_file.py b/gcpy/regrid_restart_file.py
index 5e150503..128650b8 100644
--- a/gcpy/regrid_restart_file.py
+++ b/gcpy/regrid_restart_file.py
@@ -78,7 +78,7 @@ def parse_command_line():
         type=file_path,
         metavar="regridding_weights_file",
         help=(
-            "The regridding weights file for this regridding, generated ",
+            "The regridding weights file for this regridding, generated "
             "by ESMF_RegridWeightGen",
         ),
     )
@@ -87,8 +87,8 @@ def parse_command_line():
         type=file_path,
         metavar="template_file",
         help=(
-            "The GEOS-Chem restart file to use as a template for regridding - ",
-            "attributes, dimensions, and variables for the output file will ",
+            "The GEOS-Chem restart file to use as a template for regridding - "
+            "attributes, dimensions, and variables for the output file will "
             "be taken from this template",
         ),
     )
@@ -97,8 +97,8 @@ def parse_command_line():
         "--stretched-grid",
         action="store_true",
         help=(
-            "Create a stretched-grid restart file - you must also pass ",
-            "stretched-grid parameters!",
+            "Create a stretched-grid restart file - you must also pass "
+            "stretched-grid parameters!"
         ),
     )
 
@@ -150,7 +150,8 @@ def is_gchp_restart_file(dataset):
     is_gcclassic = "SpeciesRst_O3" in dataset.data_vars
     if not any((is_gchp_restart, is_gcclassic)):
         raise ValueError(
-            "Couldn't determine if the provided file is a GC-Classic or GCHP restart file."
+            "Couldn't determine if the provided file is a GC-Classic or GCHP "
+            "restart file."
         )
     return is_gchp_restart
 
@@ -259,22 +260,21 @@ def drop_variables(dataset, output_template):
     missing_vars = output_var_set - input_var_set
     if len(drop_vars) > 0:
         info_message = (
-            "Dropping %d variables from the input restart file ",
-            "that don't exist in the output template",
+            "Dropping %d variables from the input restart file "
+            "that don't exist in the output template"
         )
         logging.info(info_message, len(drop_vars))
 
         debug_message = (
-            "Variables being dropped from the input restart file:",
-            " %s",
+            "Variables being dropped from the input restart file:" " %s"
         )
         logging.debug(debug_message, drop_vars)
 
         dataset = dataset.drop(drop_vars)
     if len(missing_vars) > 0:
         warning_message = (
-            "The input restart file is missing %d variables ",
-            "that exist in the output template",
+            "The input restart file is missing %d variables "
+            "that exist in the output template"
         )
         logging.warning(warning_message, len(missing_vars))
 
@@ -317,8 +317,7 @@ def regrid(dataset, output_template, weights_file):
             cs_res = np.sqrt(weights.dst_grid_dims.item() / 6).astype(int)
 
             info_message = (
-                "Reshaping the output restart file template to ",
-                "grid size C%f",
+                "Reshaping the output restart file template to " "grid size C%f"
             )
             logging.info(info_message, cs_res)
 
@@ -348,9 +347,9 @@ def regrid(dataset, output_template, weights_file):
             output_template = new_output_template
         else:
             error_message = (
-                "GC-Classic restart resizing not implemented. ",
-                "Please provide a restart file template with ",
-                "the proper resolution.",
+                "GC-Classic restart resizing not implemented. "
+                "Please provide a restart file template with "
+                "the proper resolution."
             )
             raise ValueError(error_message)
     else:
@@ -492,8 +491,8 @@ def regrid_restart_file(
             or (not COMMAND_LINE.target_longitude)
         ):
             error_message = (
-                "--stretched-grid was set but not all stretched-",
-                "grid parameters were passed!",
+                "--stretched-grid was set but not all stretched-"
+                "grid parameters were passed!"
             )
             raise RuntimeError(error_message)
 

From 6f1ae9fc2c2ad7c8855be8a75a869df718f34ed9 Mon Sep 17 00:00:00 2001
From: Killian Murphy <killian.murphy@york.ac.uk>
Date: Wed, 4 Jan 2023 11:15:19 +0000
Subject: [PATCH 11/54] Remove erroneous comma

---
 gcpy/regrid_restart_file.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcpy/regrid_restart_file.py b/gcpy/regrid_restart_file.py
index 128650b8..28e996fe 100644
--- a/gcpy/regrid_restart_file.py
+++ b/gcpy/regrid_restart_file.py
@@ -89,7 +89,7 @@ def parse_command_line():
         help=(
             "The GEOS-Chem restart file to use as a template for regridding - "
             "attributes, dimensions, and variables for the output file will "
-            "be taken from this template",
+            "be taken from this template"
         ),
     )
 

From 89b597f7a7575baffcd5bfe4de84ecd452665848 Mon Sep 17 00:00:00 2001
From: Killian Murphy <killian.murphy@york.ac.uk>
Date: Wed, 4 Jan 2023 11:22:21 +0000
Subject: [PATCH 12/54] Remove another erroneous comma

---
 gcpy/regrid_restart_file.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcpy/regrid_restart_file.py b/gcpy/regrid_restart_file.py
index 28e996fe..967741ea 100644
--- a/gcpy/regrid_restart_file.py
+++ b/gcpy/regrid_restart_file.py
@@ -79,7 +79,7 @@ def parse_command_line():
         metavar="regridding_weights_file",
         help=(
             "The regridding weights file for this regridding, generated "
-            "by ESMF_RegridWeightGen",
+            "by ESMF_RegridWeightGen"
         ),
     )
     parser.add_argument(

From 6bb1d443fb3f4476b7a20a4cb8f8dee07c746127 Mon Sep 17 00:00:00 2001
From: Killian Murphy <killian.murphy@york.ac.uk>
Date: Fri, 6 Jan 2023 11:19:59 +0000
Subject: [PATCH 13/54] Add description and example usage text to help

---
 gcpy/regrid_restart_file.py | 79 ++++++++++++++++++++++++++++---------
 1 file changed, 61 insertions(+), 18 deletions(-)

diff --git a/gcpy/regrid_restart_file.py b/gcpy/regrid_restart_file.py
index 967741ea..0cd167cf 100644
--- a/gcpy/regrid_restart_file.py
+++ b/gcpy/regrid_restart_file.py
@@ -65,7 +65,50 @@ def parse_command_line():
         argparse.Namespace: A dict-like object containing command line
                             argument and option values.
     """
-    parser = argparse.ArgumentParser()
+    description_text = (
+        "regrid_restart_file - Regrid GCHP restart files"
+        "\n\n"
+        "regrid_restart_file is a tool for regridding  GCHP restart "
+        "files. You can resize restart files from their original "
+        "resolution to a new resolution, stretch an unstretched restart "
+        "file, unstretch a stretched restart file, and re-stretch a "
+        "stretched restart file."
+        "\n\n"
+        "To use this tool, you must first generate regridding weights for "
+        "the regridding you would like to carry out, using "
+        "ESMF_RegridWeightGen."
+        "\n\n"
+        "NOTE: GC-Classic regridding is not currently supported by this"
+        "tool. If this is something you would like to be supported, please "
+        "raise an issue via "
+        "https://github.com/geoschem/gcpy/issues/new/choose"
+    )
+
+    epilog_text = (
+        "Example usage (unstretched grid resizing): "
+        "\n\n"
+        "python -m gcpy.regrid_restart_file \\ "
+        "\n\tGEOSChem.Restart.20190701_0000z.c90.nc4 \\ "
+        "\n\tC90_to_C48_weights.nc \\ "
+        "\n\tGEOSChem.Restart.20190701_0000z.c90.nc4"
+        "\n\n"
+        "Example usage (stretching a grid): "
+        "\n\n"
+        "python -m gcpy.regrid_restart_file \\ "
+        "\n\t--stretched-grid \\ "
+        "\n\t--stretch-factor 2.0 \\ "
+        "\n\t--target-latitude 32.0 \\ "
+        "\n\t--target-longitude -64.0 \\ "
+        "\n\tGEOSChem.Restart.20190701_0000z.c90.nc4 \\ "
+        "\n\tC90_to_C48_weights.nc \\ "
+        "\n\tGEOSChem.Restart.20190701_0000z.c90.nc4"
+    )
+
+    parser = argparse.ArgumentParser(
+        description=description_text,
+        epilog=epilog_text,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
 
     parser.add_argument(
         "file_to_regrid",
@@ -266,7 +309,7 @@ def drop_variables(dataset, output_template):
         logging.info(info_message, len(drop_vars))
 
         debug_message = (
-            "Variables being dropped from the input restart file:" " %s"
+            "Variables being dropped from the input restart file: %s"
         )
         logging.debug(debug_message, drop_vars)
 
@@ -317,7 +360,7 @@ def regrid(dataset, output_template, weights_file):
             cs_res = np.sqrt(weights.dst_grid_dims.item() / 6).astype(int)
 
             info_message = (
-                "Reshaping the output restart file template to " "grid size C%f"
+                "Reshaping the output restart file template to grid size C%f"
             )
             logging.info(info_message, cs_res)
 
@@ -478,9 +521,9 @@ def regrid_restart_file(
 if __name__ == "__main__":
     logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO").upper())
     COMMAND_LINE = parse_command_line()
-    file_to_regrid = COMMAND_LINE.file_to_regrid
-    regridding_weights_file = COMMAND_LINE.regridding_weights_file
-    template_file = COMMAND_LINE.template_file
+    FILE_TO_REGRID = COMMAND_LINE.file_to_regrid
+    REGRIDDING_WEIGHTS_FILE = COMMAND_LINE.regridding_weights_file
+    TEMPLATE_FILE = COMMAND_LINE.template_file
 
     if COMMAND_LINE.stretched_grid:
         logging.info("Creating a stretched-grid restart file")
@@ -490,25 +533,25 @@ def regrid_restart_file(
             or (not COMMAND_LINE.target_latitude)
             or (not COMMAND_LINE.target_longitude)
         ):
-            error_message = (
+            ERROR_MESSAGE = (
                 "--stretched-grid was set but not all stretched-"
                 "grid parameters were passed!"
             )
-            raise RuntimeError(error_message)
+            raise RuntimeError(ERROR_MESSAGE)
 
-        stretch_factor = COMMAND_LINE.stretch_factor
-        target_latitude = COMMAND_LINE.target_latitude
-        target_longitude = COMMAND_LINE.target_longitude
+        STRETCH_FACTOR = COMMAND_LINE.stretch_factor
+        TARGET_LATITUDE = COMMAND_LINE.target_latitude
+        TARGET_LONGITUDE = COMMAND_LINE.target_longitude
 
         regrid_restart_file(
-            file_to_regrid,
-            regridding_weights_file,
-            template_file,
-            stretch_factor=stretch_factor,
-            target_lat=target_latitude,
-            target_lon=target_longitude,
+            FILE_TO_REGRID,
+            REGRIDDING_WEIGHTS_FILE,
+            TEMPLATE_FILE,
+            stretch_factor=STRETCH_FACTOR,
+            target_lat=TARGET_LATITUDE,
+            target_lon=TARGET_LONGITUDE,
         )
     else:
         regrid_restart_file(
-            file_to_regrid, regridding_weights_file, template_file
+            FILE_TO_REGRID, REGRIDDING_WEIGHTS_FILE, TEMPLATE_FILE
         )

From 789d5b4a71d4fead97a4be3d2ddd6c311391d3ca Mon Sep 17 00:00:00 2001
From: Lizzie Lundgren <elundgren@seas.harvard.edu>
Date: Tue, 10 Jan 2023 11:51:34 -0500
Subject: [PATCH 14/54] Fix bug where GCHP emissions levels not flipped in
 transport tracer budget

Signed-off-by: Lizzie Lundgren <elundgren@seas.harvard.edu>
---
 CHANGELOG.md      | 7 ++++---
 gcpy/budget_tt.py | 8 ++++----
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4892ac97..9d414db8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,11 +8,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 
 ## [1.3.2] -- 2022-10-25
 
-### Changed
-- Bug fix: Fixed malformed version declaration for cartopy (use `==`
+### Fixes
+- Fixed malformed version declaration for cartopy (use `==`
   instead of `=`) in setup.py.  This was preventing upload to
   conda-forge.
-     
+- Vertically flip GCHP emissions when computing transport tracers budget
+
 ## [1.3.1] -- 2022-10-25
 
 ### Changed
diff --git a/gcpy/budget_tt.py b/gcpy/budget_tt.py
index 0d0d4a0d..14a317f4 100644
--- a/gcpy/budget_tt.py
+++ b/gcpy/budget_tt.py
@@ -559,17 +559,17 @@ def annual_average_sources(globvars):
         raise ValueException(msg)
 
     # Convert Be7 and Be10 sources from kg/m2/s to g/day
-    # NOTE: This is a kludgey way to do it but it works and
-    # preserves the shape of the data as (time,lev,lat,lon).
+    # If GCHP data, must vertically flip the emissions diagnostic
     for t in range(globvars.N_MONTHS):
         for k in range(n_levs):
             if globvars.is_gchp:
+                kf = n_levs - k - 1
                 q["Be7_f"][t, k, :, :, :] = \
-                    globvars.ds_hco["EmisBe7_Cosmic"].isel(time=t, lev=k) * \
+                    globvars.ds_hco["EmisBe7_Cosmic"].isel(time=t, lev=kf) * \
                     globvars.ds_met[area_var].isel(time=t) * \
                     globvars.kg_s_to_g_d["Be7"]
                 q["Be10_f"][t, k, :, :, :] = \
-                    globvars.ds_hco["EmisBe10_Cosmic"].isel(time=t, lev=k) * \
+                    globvars.ds_hco["EmisBe10_Cosmic"].isel(time=t, lev=kf) * \
                     globvars.ds_met[area_var].isel(time=t) * \
                     globvars.kg_s_to_g_d["Be10"]
             else:

From cc769f026c25c0ddb257cad83e2286a1c38a112d Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Thu, 19 Jan 2023 11:13:46 -0500
Subject: [PATCH 15/54] Cast to DataArray before using isel in
 slice_by_lev_and_time function

gcpy/util.py
- In routine slice_by_lev_and_time:
  - Add an error check to make sure ds is of type xarray.Dataset
  - Create dr=ds[varname] and then use isel on dr
  - Add fliplev = len(dr['lev']) - 1 - ilev as the flipped level
  - Updated comments

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 gcpy/util.py | 30 +++++++++++++++++++-----------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/gcpy/util.py b/gcpy/util.py
index c94e6351..368009d0 100644
--- a/gcpy/util.py
+++ b/gcpy/util.py
@@ -676,7 +676,7 @@ def slice_by_lev_and_time(
         flip
 ):
     """
-    Slice a DataArray by desired time and level.
+    Given a Dataset, returns a DataArray sliced by desired time and level.
 
     Args:
         ds: xarray Dataset
@@ -691,24 +691,32 @@ def slice_by_lev_and_time(
             Whether to flip ilev to be indexed from ground or top of atmosphere
 
     Returns:
-        ds[varname]: xarray DataArray
+        dr: xarray DataArray
             DataArray of data variable sliced according to ilev and itime
     """
     # used in compare_single_level and compare_zonal_mean to get dataset slices
-    vdims = ds[varname].dims
+    if not isinstance(ds, xr.Dataset):
+        msg="ds is not of type xarray.Dataset!"
+        raise TypeError(msg)
+
+    # NOTE: isel no longer seems to work on a Dataset, so
+    # first createthe DataArray object, then use isel on it. 
+    #  -- Bob Yantosca (19 Jan 2023)
+    dr = ds[varname]    
+    vdims = dr.dims
     if "time" in vdims and "lev" in vdims:
         if flip:
-            maxlev_i = len(ds['lev'])-1
-            return ds[varname].isel(time=itime, lev=maxlev_i - ilev)
-        return ds[varname].isel(time=itime, lev=ilev)
+            fliplev=len(dr['lev'])-1 - ilev
+            return dr.isel(time=itime, lev=fliplev)
+        return dr.isel(time=itime, lev=ilev)
     if ("time" not in vdims or itime == -1) and "lev" in vdims:
         if flip:
-            maxlev_i = len(ds['lev'])-1
-            return ds[varname].isel(lev=maxlev_i - ilev)
-        return ds[varname].isel(lev=ilev)
+            fliplev= len(dr['lev'])-1 - ilev
+            return dr.isel(lev=fliplev)
+        return dr.isel(lev=ilev)
     if "time" in vdims and "lev" not in vdims and itime != -1:
-        return ds[varname].isel(time=itime)
-    return ds[varname]
+        return dr.isel(time=itime)
+    return dr
 
 
 def rename_and_flip_gchp_rst_vars(

From 94903820431b423daa5d7d82df5529f5c46ba09e Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Thu, 19 Jan 2023 11:26:36 -0500
Subject: [PATCH 16/54] Add more error checks to slice_by_lev_and_time function

gcpy/util.py
- In slice_by_lev_and_time:
  - Raise a ValueError if varname is not found in the dataset
  - Also make sure the time dimension is not zero

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 gcpy/util.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/gcpy/util.py b/gcpy/util.py
index 368009d0..ea63d11e 100644
--- a/gcpy/util.py
+++ b/gcpy/util.py
@@ -698,23 +698,27 @@ def slice_by_lev_and_time(
     if not isinstance(ds, xr.Dataset):
         msg="ds is not of type xarray.Dataset!"
         raise TypeError(msg)
+    if not varname in ds.data_vars.keys():
+        msg="Could not find 'varname' in ds!"
+        raise ValueError(msg)
 
     # NOTE: isel no longer seems to work on a Dataset, so
     # first createthe DataArray object, then use isel on it. 
     #  -- Bob Yantosca (19 Jan 2023)
     dr = ds[varname]    
     vdims = dr.dims
-    if "time" in vdims and "lev" in vdims:
+    if ("time" in vdims and dr.time.size > 0) and "lev" in vdims:
         if flip:
-            fliplev=len(dr['lev'])-1 - ilev
+            fliplev=len(dr['lev']) - 1 - ilev
             return dr.isel(time=itime, lev=fliplev)
         return dr.isel(time=itime, lev=ilev)
     if ("time" not in vdims or itime == -1) and "lev" in vdims:
         if flip:
-            fliplev= len(dr['lev'])-1 - ilev
+            fliplev= len(dr['lev']) - 1 - ilev
             return dr.isel(lev=fliplev)
         return dr.isel(lev=ilev)
-    if "time" in vdims and "lev" not in vdims and itime != -1:
+    if ("time" in vdims and dr.time.size > 0 and itime != -1) and \
+       "lev" not in vdims:
         return dr.isel(time=itime)
     return dr
 

From a319945a74b399b645afa2d90cb92eaea2ac967d Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Thu, 19 Jan 2023 13:08:28 -0500
Subject: [PATCH 17/54] Request cf_xarray==0.7.4 in environment.yml

docs/environment_files/environment.yml
- Request cf_xarray==0.7.4, in order to avoid issues with dropping
  the time dimension when subtracting 2 DataArrays

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 docs/environment_files/environment.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/environment_files/environment.yml b/docs/environment_files/environment.yml
index 5d483d24..84b03572 100644
--- a/docs/environment_files/environment.yml
+++ b/docs/environment_files/environment.yml
@@ -14,6 +14,7 @@ dependencies:
     - xesmf==0.5.1                    # Universal regridder
     - pip:
         - awscli>=1.22.83             # Utilities for AWS cloud
+        - cf_xarray==0.7.4            # CF conventions for xarray
         - dask==2021.7.1              # Parallel library; backend for xarray
         - docutils==0.16              # Convert text to other formats
         - h5netcdf==0.11.0            # Python interface to netCDF4/HDF5

From 1cef122fbc857cdb7decc0a148c698260325bef1 Mon Sep 17 00:00:00 2001
From: laestrada <laestrada00@gmail.com>
Date: Thu, 19 Jan 2023 13:46:20 -0500
Subject: [PATCH 18/54] fix for updated time coordinate

---
 gcpy/util.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/gcpy/util.py b/gcpy/util.py
index ea63d11e..26319f0e 100644
--- a/gcpy/util.py
+++ b/gcpy/util.py
@@ -629,6 +629,11 @@ def get_diff_of_diffs(
     ref = ref[varlist]
     dev = dev[varlist]
     if 'nf' not in ref.dims and 'nf' not in dev.dims:
+        # if the coords do not align then set time dimensions equal
+        try:
+            xr.align(dev, ref, join='exact')
+        except:
+            ref.coords["time"] = dev.coords["time"]
         with xr.set_options(keep_attrs=True):
             absdiffs = dev - ref
             fracdiffs = dev / ref

From 7e05db365d0039b60a0bff323bcfea3196195423 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Mon, 6 Feb 2023 14:34:57 -0500
Subject: [PATCH 19/54] Add zero-diff column in print_totals

gcpy/benchmark.py
- Now use f-strings to simplify print statements
- Use numpy.array_equal to test if the Ref and Dev numpy arrays (after
  masking, if necessary) are equal.
- Print a column for zero-diff results

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 gcpy/util.py | 42 ++++++++++++++++++++++++++++--------------
 1 file changed, 28 insertions(+), 14 deletions(-)

diff --git a/gcpy/util.py b/gcpy/util.py
index 26319f0e..53c3325a 100644
--- a/gcpy/util.py
+++ b/gcpy/util.py
@@ -227,34 +227,34 @@ def print_totals(
     # ==================================================================
     # Sum the Ref array (or set to NaN if missing)
     # ==================================================================
+    refarr = ref.values
     if ref_is_all_nan:
         total_ref = np.nan
     else:
-        if masks is None:
-            total_ref = np.sum(ref.values)
-        else:
-            arr = np.ma.masked_array(ref.values, masks["Ref_TropMask"])
-            total_ref = np.sum(arr)
+        if masks is not None:
+            refarr = np.ma.masked_array(refarr, masks["Ref_TropMask"])
+        total_ref = np.sum(refarr)
 
     # ==================================================================
     # Sum the Dev array (or set to NaN if missing)
     # ==================================================================
+    devarr = dev.values
     if dev_is_all_nan:
         total_dev = np.nan
     else:
-        if masks is None:
-            total_dev = np.sum(dev.values)
-        else:
-            arr = np.ma.masked_array(dev.values, masks["Dev_TropMask"])
-            total_dev = np.sum(arr)
+        if masks is not None:
+            devarr = np.ma.masked_array(devarr, masks["Dev_TropMask"])
+        total_dev = np.sum(devarr)
 
     # ==================================================================
     # Compute differences (or set to NaN if missing)
     # ==================================================================
     if ref_is_all_nan or dev_is_all_nan:
         diff = np.nan
+        zero_diff = False
     else:
         diff = total_dev - total_ref
+        zero_diff = np.array_equal(refarr, devarr)
 
     # ==================================================================
     # Compute % differences (or set to NaN if missing)
@@ -270,10 +270,7 @@ def print_totals(
     # ==================================================================
     # Write output to file
     # ==================================================================
-    print(
-        f"{display_name.ljust(18)} : {total_ref:18.6f}  {total_dev:18.6f}  {diff:12.6f}  {pctdiff:8.3f}", file=f
-    )
-
+    print(f"{display_name.ljust(14)} : {total_ref:18.6f}  {total_dev:18.6f}  {diff:12.6f}  {pctdiff:8.3f}  {zero_diff}", file=f)
 
 def get_species_categories(
         benchmark_type="FullChemBenchmark"
@@ -2119,3 +2116,20 @@ def read_config_file(config_file, quiet=False):
         raise Exception(msg) from err
 
     return config
+
+
+def is_zero_diff(refdr, devdr):
+    """
+    Returns True if two DataArray objects contain identical data,
+    or False otherwise
+
+    Args:
+    -----
+    refdr (xarray DataArray)
+        The "Ref" DataArray object to be tested.
+    devdr (xarray DataArray)
+        The "Dev" DataArray object to be tested
+    """
+    if not np.array_equal(refdr.values, devdr.values):
+        return False
+    return True

From d1aa8bf28d70d1e94434797578ae23dfa00d1e5a Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Mon, 6 Feb 2023 15:15:01 -0500
Subject: [PATCH 20/54] Now print if Dev and Ref are identical at top of mass
 table file

gcpy/benchmark.py
- Update try/catch statement for file open
- Use xr.Dataset.equals to test if the Ref and Dev datasets are
  identical to each other or not.  (NOTE: This will likely fail when
  testing GCC vs GCHP, since the coordinates and data variables are
  different).  This was the best way to proceed because in routine
  print_totals, the test for zero-diff is done per variable.
- Now print out identicality or difference of Dev vs Ref in the
  table header

gcpy/util.py
- Restored the display name to a width of 19, as this is also used
  by the emissions tables.

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 gcpy/benchmark.py | 39 ++++++++++++++++++---------------------
 gcpy/util.py      |  3 ++-
 2 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/gcpy/benchmark.py b/gcpy/benchmark.py
index 851ebc1a..d1f7597b 100644
--- a/gcpy/benchmark.py
+++ b/gcpy/benchmark.py
@@ -433,32 +433,29 @@ def create_global_mass_table(
     # Create file
     try:
         f = open(outfilename, "w")
-    except FileNotFoundError:
-        msg = "Could not open {} for writing!".format(outfilename)
+    except (IOError, OSError, FileNotFoundError) as e:
+        msg = f"Could not open {outfilename} for writing!"
         raise FileNotFoundError(msg)
 
+    # Determine if the two data sets are identical
+    diff_str="### Dev differs from Ref"
+    if xr.Dataset.equals(refdata, devdata):
+        diff_str="### Dev is identical to Ref"
+
     # Title strings
+    title1 = f"### Global mass (Gg) {label} (Trop + Strat)"
     if trop_only:
-        title1 = "### Global mass (Gg) {} (Trop only)".format(label)
-    else:
-        title1 = "### Global mass (Gg) {} (Trop + Strat)".format(label)
-    title2 = "### Ref = {}; Dev = {}".format(refstr, devstr)
+        title1 = f"### Global mass (Gg) {label} (Trop only)"
+    title2 = f"### Ref = {refstr}; Dev = {devstr}"
 
     # Print header to file
     print("#" * 83, file=f)
-    print("{}{}".format(title1.ljust(80), "###"), file=f)
-    print("{}{}".format(title2.ljust(80), "###"), file=f)
+    print(f"{title1 : <80}{'###'}", file=f)
+    print(f"{title2 : <80}{'###'}", file=f)
+    print(f"{'###' : <80}{'###'}", file=f)
+    print(f"{diff_str : <80}{'###'}", file=f)
     print("#" * 83, file=f)
-    print(
-        "{}{}{}{}{}".format(
-            " ".ljust(19),
-            "Ref".rjust(20),
-            "Dev".rjust(20),
-            "Dev - Ref".rjust(14),
-            "% diff".rjust(10),
-        ),
-        file=f,
-    )
+    print(f"{'' : <19}{'Ref' : >20}{'Dev' : >20}{'Dev - Ref' : >14}{'% diff' : >10} {'no-diff'}", file=f)
 
     # ==================================================================
     # Print global masses for all species
@@ -521,7 +518,7 @@ def create_global_mass_table(
                 delta_p=met_and_masks["Dev_Delta_P"],
                 box_height=met_and_masks["Dev_BxHeight"],
             )
-
+            
         # ==============================================================
         # Print global masses for Ref and Dev
         # (we will mask out tropospheric boxes in util.print_totals)
@@ -531,13 +528,13 @@ def create_global_mass_table(
                 refarray,
                 devarray,
                 f,
-                masks=met_and_masks,
+                masks=met_and_masks
             )
         else:
             util.print_totals(
                 refarray,
                 devarray,
-                f,
+                f
             )
 
     # ==================================================================
diff --git a/gcpy/util.py b/gcpy/util.py
index 53c3325a..aa46b313 100644
--- a/gcpy/util.py
+++ b/gcpy/util.py
@@ -270,7 +270,8 @@ def print_totals(
     # ==================================================================
     # Write output to file
     # ==================================================================
-    print(f"{display_name.ljust(14)} : {total_ref:18.6f}  {total_dev:18.6f}  {diff:12.6f}  {pctdiff:8.3f}  {zero_diff}", file=f)
+    print(f"{display_name.ljust(19)} : {total_ref:18.6f}  {total_dev:18.6f}  {diff:12.6f}  {pctdiff:8.3f}  {zero_diff}", file=f)
+
 
 def get_species_categories(
         benchmark_type="FullChemBenchmark"

From d3d5568b17d4cd4f975f93e27144dbcaa6e6204b Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Mon, 6 Feb 2023 16:40:40 -0500
Subject: [PATCH 21/54] Display if Dev and Ref are identical in emissions &
 inventory tables

gcpy/benchmark.py
- In create_benchmark_emission_tables:
  - Make sure columns and headers line up
  - Now compute if each diagnostic for an emissions species or
    inventory is identical, and display that information in the header
  - Improve error trapping when opening the file for output
- In create_benchmark_mass_tables:
  - Make sure columns and headers line up

gcpy/util.py
- In routine print_totals, make sure the header line with Ref & Dev
  matches up with the output of create_benchmark_emissions_tables
  and create_benchmark_mass_tables
- Removed is_zero_diff function, we will add another function like it
  later.

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 gcpy/benchmark.py | 63 ++++++++++++++++++++++++++---------------------
 gcpy/util.py      | 21 ++--------------
 2 files changed, 37 insertions(+), 47 deletions(-)

diff --git a/gcpy/benchmark.py b/gcpy/benchmark.py
index d1f7597b..532caac3 100644
--- a/gcpy/benchmark.py
+++ b/gcpy/benchmark.py
@@ -176,10 +176,11 @@ def create_total_emissions_table(
     # =================================================================
     # Open the file for output
     # =================================================================
+    # Create file
     try:
         f = open(outfilename, "w")
-    except FileNotFoundError:
-        msg = "Could not open {} for writing!".format(outfilename)
+    except (IOError, OSError, FileNotFoundError) as e:
+        msg = f"Could not open {outfilename} for writing!"
         raise FileNotFoundError(msg)
 
     # =================================================================
@@ -220,29 +221,35 @@ def create_total_emissions_table(
 
         # Title strings
         if "Inv" in template:
-            print("Computing inventory totals for {}".format(species_name))
-            title1 = "### Emissions totals for inventory {} [Tg]".format(
-                species_name)
+            print(f"Computing inventory totals for {species_name}")
+            title0 = f"for inventory {species_name}"
+            title1 = f"### Emissions totals {title0} [Tg]"
         else:
-            print("Computing emissions totals for {}".format(species_name))
-            title1 = "### Emissions totals for species {} [Tg]".format(species_name)
-
-        title2 = "### Ref = {}; Dev = {}".format(refstr, devstr)
+            print(f"Computing emissions totals for {species_name}")
+            title0 = f"for species {species_name}"
+            title1 = f"### Emissions totals {title0} [Tg]"
+
+        title2 = f"### Ref = {refstr}; Dev = {devstr}"
+
+        # Determine if the all DataArrays for a given species or
+        # have identical data, and define a display string.
+        diagnames = [v for v in varnames if species_name in v]
+        diff_ct = 0
+        for v in diagnames:
+            if np.array_equal(refdata[v].values, devdata[v].values):
+                diff_ct += 1
+        diff_str = f"### Dev differs from Ref {title0}"
+        if diff_ct == len(diagnames):
+            diff_str = f"### Dev is identical to Ref {title0}"
 
         # Print header to file
-        print("#" * 83, file=f)
-        print("{}{}".format(title1.ljust(80), "###"), file=f)
-        print("{}{}".format(title2.ljust(80), "###"), file=f)
-        print("#" * 83, file=f)
-        print(
-            "{}{}{}{}{}".format(
-                " ".ljust(19),
-                "Ref".rjust(20),
-                "Dev".rjust(20),
-                "Dev - Ref".rjust(14),
-                "% diff".rjust(10),
-            ),
-            file=f)
+        print("#" * 91, file=f)
+        print(f"{title1 : <88}{'###'}", file=f)
+        print(f"{title2 : <88}{'###'}", file=f)
+        print(f"{'###' : <88}{'###'}", file=f)
+        print(f"{diff_str : <88}{'###'}", file=f)
+        print("#" * 91, file=f)
+        print(f"{'' : <19}{'Ref' : >20}{'Dev' : >20}{'Dev - Ref' : >14}{'% diff' : >10} {'no-diff'}", file=f)
 
         # =============================================================
         # Loop over all emissions variables corresponding to this
@@ -449,12 +456,12 @@ def create_global_mass_table(
     title2 = f"### Ref = {refstr}; Dev = {devstr}"
 
     # Print header to file
-    print("#" * 83, file=f)
-    print(f"{title1 : <80}{'###'}", file=f)
-    print(f"{title2 : <80}{'###'}", file=f)
-    print(f"{'###' : <80}{'###'}", file=f)
-    print(f"{diff_str : <80}{'###'}", file=f)
-    print("#" * 83, file=f)
+    print("#" * 91, file=f)
+    print(f"{title1 : <88}{'###'}", file=f)
+    print(f"{title2 : <88}{'###'}", file=f)
+    print(f"{'###' : <88}{'###'}", file=f)
+    print(f"{diff_str : <88}{'###'}", file=f)
+    print("#" * 91, file=f)
     print(f"{'' : <19}{'Ref' : >20}{'Dev' : >20}{'Dev - Ref' : >14}{'% diff' : >10} {'no-diff'}", file=f)
 
     # ==================================================================
diff --git a/gcpy/util.py b/gcpy/util.py
index aa46b313..0232722b 100644
--- a/gcpy/util.py
+++ b/gcpy/util.py
@@ -222,7 +222,7 @@ def print_totals(
 
     # Special handling for totals
     if "_TOTAL" in diagnostic_name.upper():
-        print("-"*83, file=f)
+        print("-"*90, file=f)
 
     # ==================================================================
     # Sum the Ref array (or set to NaN if missing)
@@ -270,7 +270,7 @@ def print_totals(
     # ==================================================================
     # Write output to file
     # ==================================================================
-    print(f"{display_name.ljust(19)} : {total_ref:18.6f}  {total_dev:18.6f}  {diff:12.6f}  {pctdiff:8.3f}  {zero_diff}", file=f)
+    print(f"{display_name.ljust(19)}: {total_ref:18.6f}  {total_dev:18.6f}  {diff:12.6f}  {pctdiff:8.3f}  {zero_diff}", file=f)
 
 
 def get_species_categories(
@@ -2117,20 +2117,3 @@ def read_config_file(config_file, quiet=False):
         raise Exception(msg) from err
 
     return config
-
-
-def is_zero_diff(refdr, devdr):
-    """
-    Returns True if two DataArray objects contain identical data,
-    or False otherwise
-
-    Args:
-    -----
-    refdr (xarray DataArray)
-        The "Ref" DataArray object to be tested.
-    devdr (xarray DataArray)
-        The "Dev" DataArray object to be tested
-    """
-    if not np.array_equal(refdr.values, devdr.values):
-        return False
-    return True

From 118ec1a3d0ebcd613a798ac9066b3708529809b5 Mon Sep 17 00:00:00 2001
From: Lizzie Lundgren <elundgren@seas.harvard.edu>
Date: Tue, 7 Feb 2023 13:22:57 -0500
Subject: [PATCH 22/54] Initial edits to GCPy install ReadTheDocs page

The new guide instructs users to clone GCPy, add to python path, and
create a python environment using the environment.yml file in the repo.
We are abandoning installation of GCPy using conda, at least for now,
since it is broken. That being said, the environment.yml file in this
commit does not work due to packages not found on conda-forge. This
needs to be addressed separately.

Signed-off-by: Lizzie Lundgren <elundgren@seas.harvard.edu>
---
 docs/source/Getting-Started-with-GCPy.rst | 249 ++++++++++++----------
 1 file changed, 138 insertions(+), 111 deletions(-)

diff --git a/docs/source/Getting-Started-with-GCPy.rst b/docs/source/Getting-Started-with-GCPy.rst
index fc0b725e..5ad75a81 100644
--- a/docs/source/Getting-Started-with-GCPy.rst
+++ b/docs/source/Getting-Started-with-GCPy.rst
@@ -4,130 +4,157 @@
 Installing GCPy
 ###############
 
-GCPy and its dependencies can be installed using `Conda
-<https://github.com/conda/conda>`__ in either standard user mode
-(allow Conda to handle installation without Git support) or
-development mode using Conda and `Conda-build
-<https://github.com/conda/conda-build>`__ (install from a Git
-clone). You can also manually install GCPy using a clone of the source
-code, but this option requires you to add the package to your
-:envvar:`PYTHONPATH` manually and to install properly versioned
-dependencies on your own.
+============
+Requirements
+============
 
-.. _install-reqs:
+GCPy is currently supported for Linux and MacOS operating systems. Due
+to a reliance on several packages without Windows support, **GCPy is
+not currently supported for Windows**. You will receive an error
+message if you attempt to use GCPy on Windows.
 
-=====================
-Requirements for GCPy
-=====================
+The only essential software you need before installing GCPy is a
+distribution of the Conda package manager. This is used to create a
+python environment for GCPy containing all of its software dependences,
+including what version of python you use. We recommend using GCPy with
+python version 3.9.
 
-.. _install-reqs-software:
+You can check if you already have conda installed by running the
+following command:
 
-Software Prerequisites
-----------------------
+.. code:: console
 
-GCPy is currently supported for Linux and MacOS operating systems. Due
-to a reliance on several packages without Windows support, **GCPy is
-not currently supported for Windows**. You will receive an error
-message if you attempt to install GCPy through Conda on Windows.
-
-The only essential software package you need before installing GCPy is a
-distribution of the Conda package manager, which is used to install GCPy
-and its dependencies. It is also highly recommended that you create an
-environment in Conda for working with GCPy. Steps to setup Conda are
-described below:
-
-#. Install `Miniconda or Anaconda <https://github.com/conda/conda>`__.
-   Miniconda is much more lightweight and functions perfectly well for
-   GCPy purposes, while Anaconda automatically includes many extra
-   packages that are not directly relevant to GCPy.
-#. After installing Miniconda or Anaconda, create a Conda environment
-   for using GCPy. The basic usage (also found on the `Conda Github
-   hompeage <https://github.com/conda/conda>`__) is:
-
-   .. code-block:: bash
-
-      # Navigate to the top-level GCPy folder
-      cd /path/to/gcpy
-
-      # Create a Conda environment for working with GCPy
-      conda env create -n gcpy_env --file=environment.yml
-
-      # Activate (enter) your new Conda environment
-      $ conda activate gcpy_env
-
-      # Deactivate (exit) your Conda environment
-      $ conda deactivate
-
-   From within your Conda environment, you can follow the instructions
-   on `Installing normally through Conda (if you don't plan on
-   modifying GCPy source code) <#installing-gcpy-for-non-developers-using-conda>`__ or `Installing in development
-   mode through Conda-build (for developers) <#install_dev>`__.
-
-.. _install-reqs-pydeps:
-
-Python dependencies
--------------------
-
-Conda handles the installation of all dependencies for GCPy
-automatically. Most dependencies have minimum version
-requirements. We recommend using GCPy with Python 3.9. The list of
-dependencies (not including sub-dependencies) that are installed by
-Conda includes:
-
--  `Python 3.9 <https://www.python.org/>`_
--  `cartopy <https://scitools.org.uk/cartopy/docs/latest/>`_
--  `matplotlib <https://matplotlib.org/>`_
--  `numpy <http://www.numpy.org/>`_
--  `scipy <http://www.scipy.org/>`_
--  `xarray <http://xarray.pydata.org>`_
--  `xesmf <https://xesmf.readthedocs.io>`_
--  `esmpy <https://www.earthsystemcog.org/projects/esmpy/>`_
--  `pypdf2 <https://pythonhosted.org/PyPDF2/>`_
--  `joblib <https://joblib.readthedocs.io/en/latest/>`_
--  `xbpch <https://github.com/darothen/xbpch>`_
--  `pandas <https://pandas.pydata.org/docs/>`_
--  `sparselt >= 0.1.3 <https://github.com/liambindle/sparselt>`_
-
-A full list of package version requirements may be found in
-:file:`docs/source/environment.yml`. There is also a symbolic link to
-this file from the top-level gcpy folder.
-
-.. _install-non-devs:
-
-==============================================
-Installing GCPy for non-developers using Conda
-==============================================
-
-GCPy is available through the :code:`conda-forge` channel under the
-name :code:`geoschem-gcpy`. Installing GCPy in your Conda environment
-requires two commands:
+   $ conda --version
+
+If conda is not already installed then we recommend using Miniconda to
+install it. Miniconda is a minimal installer for conda that generally
+includes many fewer packages in the base environment than are available
+for download. This provides a lightweight conda install from which you
+can create custom python environments with whatever python packages you
+wish to use, including an environment with GCPy dependencies. To install
+Miniconda follow instructions in the  `Miniconda docs <https://docs.conda.io/en/latest/miniconda.html>`__. We recommend using Python 3.9.
+
+==========================================
+Steps to install GCPy and its dependencies
+==========================================
+
+#. Step 0: Install conda if not already installed.
+
+See the Requirements section above.
+
+#. Step 1: Download GCPy
+
+Create and go to the directory in which you would like to store GCPy. In
+this example we will store GCPy in a python/packages subdirectory in the
+home directory, but you can store it wherever you wish. You can also name
+the GCPy download whatever you want. In this example the GCPy directory
+is called GCPy.
 
 .. code:: console
 
-   $ conda config --add channels conda-forge
-   $ conda install geoschem-gcpy
+   $ cd $HOME/python/packages
+   $ git clone https://github.com/geoschem/gcpy.git GCPy
+   $ cd GCPy
+
+#. Step 2: Create new python virtual environment for GCPy
+
+A python virtual environment is a named set of python installs,
+e.g. packages, that are independent of other virtual environments.
+Using an environment dedicated to GCPy is useful to maintain a set
+of package dependencies compatible with GCPy without interfering with
+python packages you use for other work. You can create a python virtual
+environment from anywhere on your system. It will be stored in your
+conda install rather than the directory from which you create it.
 
-Conda will handle the installation of all dependencies and
-sub-dependencies for GCPy, which includes many Python packages and
-several non-Python libraries.
+You can create a python virtual environment using a file that lists
+all packages and their versions to be included in the environment.
+GCPy includes such as file, environment.yml, located in the top-level
+directory of the package.
 
-.. _install-devs:
+Run the following command at the command prompt to create a virtual
+environment for use with GCPy. You can name environment whatever you
+wish. This example names it gcpy_env.
 
-==============================
-Installing GCPy for developers
-==============================
+.. code:: console
+
+   $ conda env create -n gcpy_env --file=environment.yml
 
-If you wish to make changes to the GCPy source code with the goal of
-contributing to GCPy development, you will need to install GCPy from a
-clone of the GCPy Git repository:
+Once successfully created you can load the environment by running the
+following command, specifying the name of your environment.
 
 .. code:: console
 
-   $ git clone https://github.com/geoschem/gcpy.git
-   $ cd gcpy
-   $ conda config --add channels conda-forge
-   $ conda install geoschem-gcpy --only-deps
-   $ pip install -e .
+   $ conda activate gcpy_env
+
+To exit the environment do the following:
+
+.. code:: console
+
+   $ conda deactivate
+
+#. Step 3: Add GCPy to python path
+
+The environment variable PYTHONPATH specifies the locations of python
+libraries on your system that are not included in your conda environment.
+If GCPy is included in PYTHONPATH then python will recognize its
+existence when you try to use. Add the following line to your startup
+script, e.g. .bashrc, and edit the path to where you are storing GCPy.
+
+.. code:: console
+
+   PYTHONPATH=$PYTHONPATH:$HOME/python/packages/GCPy
+
+#. Step 4: Perform a simple test
+
+Run the following commands in your terminal to check if the 
+installation was succcesful.
+
+.. code:: console
+
+   $ source $HOME/.bashrc     # Alternatively close and reopen your terminal
+   $ echo $PYTHONPATH         # Check it contains path to your GCPy clone
+   $ conda activate gcpy_env    
+   $ conda list               # Check it contains contents of gcpy env file
+   $ python
+   \>>> import gcpy
+
+If no errors were encountered then you successfully installed GCPy and
+its dependencies.
+
+=======================
+Upgrading GCPy versions
+=======================
+
+Sometimes the GCPy dependency list changes with a new GCPy version,
+either through the addition of new packages or a change in the minimum
+version. You can always update to the latest GCPy version from within
+you GCPy clone, and then update your virtual environment using the
+environment.yml file included in the package.
+
+Run the following commands to update both your GCPy version to the
+latest available.
+
+.. code:: console
+
+   $ cd $HOME/python/packages/GCPy
+   $ git fetch -p
+   $ git checkout main
+   $ git pull
+
+You can also checkout an older version by doing the following:
+
+.. code:: console
+
+   $ cd $HOME/python/packages/GCPy
+   $ git fetch -p
+   $ git tag
+   $ git checkout tags/version_you_want
+
+Once you have the version you wish you use you can do the following
+commands to then update your virtual environment:
+
+.. code:: console
 
-Conda will handle the installation of dependencies when you install
-from this clone, and pip will point all GCPy links to this directory.
+   $ source activate gcpy_env
+   $ cd $HOME/python/packages/GCPy
+   $ conda env update --file environment.yml --prune

From 53b75ad17b61ee907bd56145551aa6901d43f02d Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Tue, 7 Feb 2023 15:53:42 -0500
Subject: [PATCH 23/54] Add list of species that differ to top of emissions,
 mass tables

gcpy/benchmark.py
- In routines create_benchmark_mass_tables and
  create_benchmark_emission_tables
  - Add placeholder text for the list of species diffs
  - Define "diff_list" list to hold the list of species names
    for species w/ nonzero diffs
  - Remove references to np.array_equal and xr.dataset.equal
  - Header lines are now 89 chars wide
  - Now pass diff_list to util.print_totals
  - Trim trailing whitespace
- Comment out code in create_benchmark_summary_table for now,
  we will add this functionality in soon.

gcpy/util.py
- Now import textwrap.wrap function
- In function "print_totals"
  - Now accepts and returns "diff_list"
  - Add more error checks
  - Compute total_ref and total_dev in 64-bit floating point
- Added function "unique_values"
- Added function "insert_text_into_file"

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 gcpy/benchmark.py | 206 +++++++++++++++++++++++++++++++++++++---------
 gcpy/util.py      | 131 ++++++++++++++++++++++++++---
 2 files changed, 288 insertions(+), 49 deletions(-)

diff --git a/gcpy/benchmark.py b/gcpy/benchmark.py
index 532caac3..71ca17d2 100644
--- a/gcpy/benchmark.py
+++ b/gcpy/benchmark.py
@@ -1,4 +1,4 @@
-"""
+6"""
 Specific utilities for creating plots from GEOS-Chem benchmark simulations.
 """
 
@@ -180,8 +180,16 @@ def create_total_emissions_table(
     try:
         f = open(outfilename, "w")
     except (IOError, OSError, FileNotFoundError) as e:
-        msg = f"Could not open {outfilename} for writing!"
-        raise FileNotFoundError(msg)
+        raise e(f"Could not open {outfilename} for writing!")
+
+    # Write a placeholder to the file that denotes where
+    # the list of species with differences will be written
+    placeholder = "@%% insert diff_list here %%@"
+    print(f"Species that differ between {refstr} and {devstr}", file=f)
+    print(f"{placeholder}\n\n", file=f)
+
+    # Define a list for differences
+    diff_list = []
 
     # =================================================================
     # Loop through all of the species are in species_dict
@@ -231,25 +239,12 @@ def create_total_emissions_table(
 
         title2 = f"### Ref = {refstr}; Dev = {devstr}"
 
-        # Determine if the all DataArrays for a given species or
-        # have identical data, and define a display string.
-        diagnames = [v for v in varnames if species_name in v]
-        diff_ct = 0
-        for v in diagnames:
-            if np.array_equal(refdata[v].values, devdata[v].values):
-                diff_ct += 1
-        diff_str = f"### Dev differs from Ref {title0}"
-        if diff_ct == len(diagnames):
-            diff_str = f"### Dev is identical to Ref {title0}"
-
         # Print header to file
-        print("#" * 91, file=f)
-        print(f"{title1 : <88}{'###'}", file=f)
-        print(f"{title2 : <88}{'###'}", file=f)
-        print(f"{'###' : <88}{'###'}", file=f)
-        print(f"{diff_str : <88}{'###'}", file=f)
-        print("#" * 91, file=f)
-        print(f"{'' : <19}{'Ref' : >20}{'Dev' : >20}{'Dev - Ref' : >14}{'% diff' : >10} {'no-diff'}", file=f)
+        print("#" * 89, file=f)
+        print(f"{title1 : <86}{'###'}", file=f)
+        print(f"{title2 : <86}{'###'}", file=f)
+        print("#" * 89, file=f)
+        print(f"{'' : <19}{'Ref' : >20}{'Dev' : >20}{'Dev - Ref' : >14}{'% diff' : >10} {'diffs'}", file=f)
 
         # =============================================================
         # Loop over all emissions variables corresponding to this
@@ -335,17 +330,30 @@ def create_total_emissions_table(
             # ==========================================================
             # Print emission totals for Ref and Dev
             # ==========================================================
-            util.print_totals(refarray, devarray, f)
+            util.print_totals(
+                refarray,
+                devarray,
+                f,
+                diff_list
+            )
 
         # Add newlines before going to the next species
         print(file=f)
         print(file=f)
 
     # =================================================================
-    # Close file
+    # Cleanup and quit
     # =================================================================
+
+    # Close file
     f.close()
 
+    # Reopen file and insert list of species with nonzero diffs
+    util.insert_text_into_file(
+        filename=outfilename,
+        search_text=placeholder,
+        replace_text=util.unique_values(diff_list, drop=[None])
+    )
 
 def create_global_mass_table(
         refdata,
@@ -444,10 +452,8 @@ def create_global_mass_table(
         msg = f"Could not open {outfilename} for writing!"
         raise FileNotFoundError(msg)
 
-    # Determine if the two data sets are identical
-    diff_str="### Dev differs from Ref"
-    if xr.Dataset.equals(refdata, devdata):
-        diff_str="### Dev is identical to Ref"
+    # Define a list for differences
+    diff_list = []
 
     # Title strings
     title1 = f"### Global mass (Gg) {label} (Trop + Strat)"
@@ -456,18 +462,25 @@ def create_global_mass_table(
     title2 = f"### Ref = {refstr}; Dev = {devstr}"
 
     # Print header to file
-    print("#" * 91, file=f)
-    print(f"{title1 : <88}{'###'}", file=f)
-    print(f"{title2 : <88}{'###'}", file=f)
-    print(f"{'###' : <88}{'###'}", file=f)
-    print(f"{diff_str : <88}{'###'}", file=f)
-    print("#" * 91, file=f)
-    print(f"{'' : <19}{'Ref' : >20}{'Dev' : >20}{'Dev - Ref' : >14}{'% diff' : >10} {'no-diff'}", file=f)
+    print("#" * 89, file=f)
+    print(f"{title1 : <86}{'###'}", file=f)
+    print(f"{title2 : <86}{'###'}", file=f)
+    print("#" * 89, file=f)
+
+    # Write a placeholder to the file that denotes where
+    # the list of species with differences will be written
+    placeholder = "@%% insert diff_list here %%@"
+    print(f"\nSpecies that differ between {refstr} and {devstr}", file=f)
+    print(f"{placeholder}\n\n", file=f)
+
+    # Column headers
+    print(f"{title2}", file=f)
+    print(f"{'' : <19}{'Ref' : >20}{'Dev' : >20}{'Dev - Ref' : >14}{'% diff' : >10} {'diffs'}", file=f)
 
     # ==================================================================
     # Print global masses for all species
     #
-    # NOTE: By this point, all species will be in both Ref and Dev'
+    # NOTE: By this point, all secies will be in both Ref and Dev'
     # because we have added them in the calling routine
     # ==================================================================
     for v in varlist:
@@ -525,7 +538,7 @@ def create_global_mass_table(
                 delta_p=met_and_masks["Dev_Delta_P"],
                 box_height=met_and_masks["Dev_BxHeight"],
             )
-            
+
         # ==============================================================
         # Print global masses for Ref and Dev
         # (we will mask out tropospheric boxes in util.print_totals)
@@ -535,20 +548,31 @@ def create_global_mass_table(
                 refarray,
                 devarray,
                 f,
+                diff_list,
                 masks=met_and_masks
             )
         else:
             util.print_totals(
                 refarray,
                 devarray,
-                f
+                f,
+                diff_list
             )
 
     # ==================================================================
-    # Close files
+    # Cleanup and quit
     # ==================================================================
+
+    # Close file
     f.close()
 
+    # Reopen file and insert list of species with nonzero diffs
+    util.insert_text_into_file(
+        filename=outfilename,
+        search_text=placeholder,
+        replace_text=util.unique_values(diff_list, drop=[None])
+    )
+
 
 def make_benchmark_conc_plots(
         ref,
@@ -4367,3 +4391,109 @@ def get_species_database_dir(config):
     else:
         msg = f"Could not find the {spcdb_dir}/species_database.yml file!"
         raise FileNotFoundError(msg)
+
+
+def create_benchmark_summary_table(
+        refpath,
+        refstr,
+        devpath,
+        devstr,
+        dst="./benchmark",
+        overwrite=False,
+        outfilename="Summary.txt",
+        verbose=False,
+        spcdb_dir=os.path.dirname(__file__)
+):
+    """
+    Creates a table of global masses for a list of species in contained in
+    two data sets.  The data sets,  which typically represent output from two
+    different model versions, are usually contained in netCDF data files.
+
+    Args:
+        refdata: xarray Dataset
+            The first data set to be compared (aka "Reference").
+        refstr: str
+            A string that can be used to identify refdata
+            (e.g. a model v2ersion number or other identifier).
+        devdata: xarray Dataset
+            The second data set to be compared (aka "Development").
+        devstr: str
+            A string that can be used to identify the data set specified
+            by devfile (e.g. a model version number or other identifier).
+        varlist: list of strings
+            List of species concentation variable names to include
+            in the list of global totals.
+        met_and_masks: dict of xarray DataArray
+            Dictionary containing the meterological variables and
+            masks for the Ref and Dev datasets.
+        label: str
+            Label to go in the header string.  Can be used to
+            pass the month & year.
+
+    Keyword Args (optional):
+        trop_only: bool
+            Set this switch to True if you wish to print totals
+            only for the troposphere.
+            Default value: False (i.e. print whole-atmosphere totals).
+        outfilename: str
+            Name of the text file which will contain the table of
+            emissions totals.
+            Default value: "GlobalMass_TropStrat.txt"
+        verbose: bool
+            Set this switch to True if you wish to print out extra
+            informational messages.
+            Default value: False
+        spcdb_dir: str
+            Directory of species_datbase.yml file
+            Default value: Directory of GCPy code repository
+
+    Remarks:
+        This method is mainly intended for model benchmarking purposes,
+        rather than as a general-purpose tool.
+
+        Species properties (such as molecular weights) are read from a
+        YAML file called "species_database.yml".
+    """
+
+    # ==================================================================
+    # Initialization and data read
+    # ==================================================================
+    if os.path.isdir(dst) and not overwrite:
+        msg = "Directory {} exists. Pass overwrite=True to overwrite " \
+            + "files in that directory, if any."
+        msg = msg.format(dst)
+        raise ValueError(msg)
+    if not os.path.isdir(dst):
+        os.mkdir(dst)
+
+#    # ==================================================================
+#    # Close files
+#    # ==================================================================
+#
+#    # Get a list of files in the Ref path
+#    ref_files = []
+#    for (path, names, files) in os.walk(refpath):
+#        for rf in files:
+#            ref_files.append(os.path.join(path, rf))
+#
+#    # Get a list of files in the Ref path
+#    dev_files = []
+#    for (path, names, files) in os.walk(devpath):
+#        for df in files:
+#            dev_files.append(os.path.join(path, df)
+#
+#    # ==================================================================
+#    # Open file for output
+#    # ==================================================================
+#
+#    # Create file
+#    try:
+#        f = open(os.path.join(dst, outfilename), "w")
+#    except (IOError, OSError, FileNotFoundError) as e:
+#        msg = f"Could not open {outfilename} for writing!"
+#        raise e(msg)
+#
+#    # ==================================================================
+#    # Close files
+#    # ==================================================================
+#    f.close()
diff --git a/gcpy/util.py b/gcpy/util.py
index 0232722b..ea1ebba9 100644
--- a/gcpy/util.py
+++ b/gcpy/util.py
@@ -10,6 +10,7 @@
 import numpy as np
 import xarray as xr
 from PyPDF2 import PdfFileWriter, PdfFileReader
+from textwrap import wrap
 
 def convert_lon(
         data,
@@ -161,7 +162,8 @@ def print_totals(
         ref,
         dev,
         f,
-        masks=None
+        diff_list,
+        masks=None,
 ):
     """
     Computes and prints Ref and Dev totals (as well as the difference
@@ -194,9 +196,11 @@ def print_totals(
 
     # Make sure that both Ref and Dev are xarray DataArray objects
     if not isinstance(ref, xr.DataArray):
-        raise TypeError("The ref argument must be an xarray DataArray!")
+        raise TypeError("The 'ref' argument must be an xarray DataArray!")
     if not isinstance(dev, xr.DataArray):
-        raise TypeError("The dev argument must be an xarray DataArray!")
+        raise TypeError("The 'dev' argument must be an xarray DataArray!")
+    if not isinstance(diff_list, list):
+        raise TypeError("The 'diff_list' argument must be a list!")
 
     # Determine if either Ref or Dev have all NaN values:
     ref_is_all_nan = np.isnan(ref.values).all()
@@ -220,6 +224,12 @@ def print_totals(
     # Create the display name by editing the diagnostic name
     display_name = create_display_name(diagnostic_name)
 
+    # Get the species name from the display name
+    species_name = display_name
+    c = species_name.find(" ")
+    if c > 0:
+        species_name = display_name[0:c]
+
     # Special handling for totals
     if "_TOTAL" in diagnostic_name.upper():
         print("-"*90, file=f)
@@ -233,7 +243,7 @@ def print_totals(
     else:
         if masks is not None:
             refarr = np.ma.masked_array(refarr, masks["Ref_TropMask"])
-        total_ref = np.sum(refarr)
+        total_ref = np.sum(refarr, dtype=np.float64)
 
     # ==================================================================
     # Sum the Dev array (or set to NaN if missing)
@@ -244,17 +254,25 @@ def print_totals(
     else:
         if masks is not None:
             devarr = np.ma.masked_array(devarr, masks["Dev_TropMask"])
-        total_dev = np.sum(devarr)
+        total_dev = np.sum(devarr, dtype=np.float64)
 
     # ==================================================================
     # Compute differences (or set to NaN if missing)
     # ==================================================================
     if ref_is_all_nan or dev_is_all_nan:
         diff = np.nan
-        zero_diff = False
     else:
         diff = total_dev - total_ref
-        zero_diff = np.array_equal(refarr, devarr)
+    has_diffs = abs(diff) > np.float64(0.0)
+
+    # Append to the list of differences.  If no differences then append
+    # None.  Duplicates can be stripped out in the calling routine.
+    if has_diffs:
+        diff_str = " * "
+        diff_list.append(species_name)
+    else:
+        diff_str = ""
+        diff_list.append(None)
 
     # ==================================================================
     # Compute % differences (or set to NaN if missing)
@@ -268,9 +286,11 @@ def print_totals(
             pctdiff = np.nan
 
     # ==================================================================
-    # Write output to file
+    # Write output to file and return
     # ==================================================================
-    print(f"{display_name.ljust(19)}: {total_ref:18.6f}  {total_dev:18.6f}  {diff:12.6f}  {pctdiff:8.3f}  {zero_diff}", file=f)
+    print(f"{display_name.ljust(19)}: {total_ref:18.6f}  {total_dev:18.6f}  {diff:12.6f}  {pctdiff:8.3f}  {diff_str}", file=f)
+
+    return diff_list
 
 
 def get_species_categories(
@@ -706,9 +726,9 @@ def slice_by_lev_and_time(
         raise ValueError(msg)
 
     # NOTE: isel no longer seems to work on a Dataset, so
-    # first createthe DataArray object, then use isel on it. 
+    # first createthe DataArray object, then use isel on it.
     #  -- Bob Yantosca (19 Jan 2023)
-    dr = ds[varname]    
+    dr = ds[varname]
     vdims = dr.dims
     if ("time" in vdims and dr.time.size > 0) and "lev" in vdims:
         if flip:
@@ -2117,3 +2137,92 @@ def read_config_file(config_file, quiet=False):
         raise Exception(msg) from err
 
     return config
+
+
+def unique_values(
+        this_list,
+        drop=None,
+):
+    """
+    Given a list, returns a sorted list of unique values.
+
+    Args:
+    -----
+    this_list : list
+        Input list (may contain duplicate values)
+
+    drop: list of str
+        List of variable names to exclude
+
+    Returns:
+    --------
+    unique: list
+        List of unique values from this_list
+    """
+    if not isinstance(this_list, list):
+        raise ValueError("Argument 'this_list' must be a list object!")
+    if not isinstance(drop, list):
+        raise ValueError("Argument 'drop' must be a list object!")
+
+    unique = list(set(this_list))
+
+    if drop is not None:
+        for d in drop:
+            unique.remove(d)
+
+    unique.sort()
+
+    return unique
+
+
+def insert_text_into_file(
+        filename,
+        search_text,
+        replace_text,
+        width=80
+):
+    """
+    Convenience routine to insert text into a file.  The best way
+    to do this is to read the contents of the file, manipulate the
+    text, and then overwrite the file.
+
+    Args:
+    -----
+    filename: str
+        The file with text to be replaced.
+
+    search_text: str
+        Text string in the file that will be replaced.
+
+    replace_text: str or list of str
+        Text that will replace 'search_text'
+
+    width: int
+        Will "word-wrap" the text in 'replace_text' to this width
+    """
+    if not isinstance(search_text, str):
+        raise ValueError("Argument 'search_text' needs to be a string!")
+    if not isinstance(replace_text, str):
+        if isinstance(replace_text, list):
+            replace_text = ' '.join(replace_text)
+        else:
+            raise ValueError(
+                "Argument 'replace_text' needs to be a list or a string"
+            )
+
+    # Word-wrap the replacement text
+    replace_text = wrap(replace_text, width=width)
+    replace_text = '\n'.join(replace_text)
+
+    with open(filename, "r") as f:
+        filedata = f.read()
+        f.close()
+
+    filedata = filedata.replace(
+        search_text,
+        replace_text
+    )
+
+    with open(filename, "w") as f:
+        f.write(filedata)
+        f.close()

From c27c7d28484ea41257468b8fcbced3e762dc6b3a Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Wed, 8 Feb 2023 16:36:34 -0500
Subject: [PATCH 24/54] Further updates for printing information about diffs
 btw Dev & Ref

benchmark/run_benchmark.py
- Now call "make_benchmark_summary_table" for GCC vs GCC, GCHP vs GCC,
  and GCHP vs GCHP benchmarks.

benchmark/1mo_benchmark.yml
- Added "summary_table" switch under "plot_options"

gcpy/benchmark.py
- Removed typo at top of script
- In create_benchmark_emissions_table and create_global_mass_table:
  - Write an alternate message if the string with species differences
    is longer than ~80 chars
- Added new function "diff_list_to_text"
- Added new function "make_directory"
- Updated "create_benchmrk_summary_table" to call get_filepaths for
  either GCHP or GCC Ref and/or Dev data

gcpy/util.py
- In routine print_totals:
  - Now keep track of which species have differences btw Dev & Ref
    in "diff_list", which is passed to the calling routine
- Added new function "wrap_text"
- Added new function "array_equals"

CHANGELOG.md
- Updated accordingly

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 CHANGELOG.md                |   5 +
 benchmark/1mo_benchmark.yml |   1 +
 benchmark/run_benchmark.py  | 131 ++++++++++++---
 gcpy/benchmark.py           | 311 +++++++++++++++++++++++++++---------
 gcpy/util.py                | 150 +++++++++++++----
 5 files changed, 465 insertions(+), 133 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9d414db8..37327b9c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 
 ## Unreleased
 
+### Added
+- Benchmark summary table output (intended for 1hr & 1mo benchmarks)
+- Species/emissions/inventories that differ between Dev & Ref versions are now printed at the top of the benchmark emissions, inventory, and global mass tables.  if there are too many species with diffs, an alternate message is printed.
+- New functions in `benchmark.py` and `util.py` to facilitate printing of the species/emissions/inventories that differ between Dev & Ref versions.
+
 ## [1.3.2] -- 2022-10-25
 
 ### Fixes
diff --git a/benchmark/1mo_benchmark.yml b/benchmark/1mo_benchmark.yml
index 4f5f2f67..3a15f591 100644
--- a/benchmark/1mo_benchmark.yml
+++ b/benchmark/1mo_benchmark.yml
@@ -108,6 +108,7 @@ options:
     ops_budget_table: False
     OH_metrics: True
     ste_table: True # GCC only
+    summary_table: True
     plot_options: # Plot concentrations and emissions by category?
       by_spc_cat: True
       by_hco_cat: True
diff --git a/benchmark/run_benchmark.py b/benchmark/run_benchmark.py
index db5b2be3..e7c14e92 100755
--- a/benchmark/run_benchmark.py
+++ b/benchmark/run_benchmark.py
@@ -642,29 +642,6 @@ def run_benchmark_default(config):
         if config["options"]["outputs"]["OH_metrics"]:
             print("\n%%% Creating GCC vs. GCC OH metrics table %%%")
 
-            # Use this for benchmarks prior to GEOS-Chem 13.0.0
-            #        # Diagnostic collection files to read
-            #        col  = "ConcAfterChem"
-            #        ref = get_filepath(gcc_vs_gcc_refdir, col, gcc_ref_date)
-            #        dev = get_filepath(gcc_vs_gcc_devdir, col, gcc_dev_date)
-            #
-            #        # Meteorology data needed for calculations
-            #        col = "StateMet"
-            #        refmet = get_filepath(gcc_vs_gcc_refdir, col, gcc_ref_date)
-            #        devmet = get_filepath(gcc_vs_gcc_devdir, col, gcc_dev_date)
-            #
-            #        # Print OH metrics
-            #        bmk.make_benchmark_oh_metrics(
-            #            ref,
-            #            refmet,
-            #            config["data"]["ref"]["gcc"]["version"],
-            #            dev,
-            #            devmet,
-            #            config["data"]["dev"]["gcc"]["version"],
-            #            dst=gcc_vs_gcc_tablesdir,
-            #            overwrite=True
-            #        )
-
             # Filepaths
             ref = get_filepath(gcc_vs_gcc_refdir, "Metrics", gcc_ref_date)
             dev = get_filepath(gcc_vs_gcc_devdir, "Metrics", gcc_dev_date)
@@ -703,6 +680,48 @@ def run_benchmark_default(config):
                     month=gcc_dev_date.astype(datetime).month,
                 )
 
+        # ==================================================================
+        # GCC vs. GCC summary table
+        # ==================================================================
+        if config["options"]["outputs"]["summary_table"]:
+            print("\n%%% Creating GCC vs. GCC summary table %%%")
+
+            # Diagnostic collections to check
+            collections = [
+                'AerosolMass',
+                'Aerosols',
+                'Emissions',
+                'JValues',
+                'Metrics',
+                'SpeciesConc',
+                'StateMet',
+            ]
+
+            # Print summary of which collections are identical
+            # between Ref & Dev, and which are not identical.
+            bmk.create_benchmark_summary_table(
+                gcc_vs_gcc_refdir,
+                config["data"]["ref"]["gcc"]["version"],
+                gcc_ref_date,
+                gcc_vs_gcc_devdir,
+                config["data"]["dev"]["gcc"]["version"],
+                gcc_dev_date,
+                collections = [
+                    'AerosolMass',
+                    'Aerosols',
+                    'Emissions',
+                    'JValues',
+                    'Metrics',
+                    'SpeciesConc',
+                    'StateMet'
+                ],
+                dst=gcc_vs_gcc_tablesdir,
+                outfilename="Summary.txt",
+                overwrite=True,
+                verbose=False,
+            )
+
+
     # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     # Create GCHP vs GCC benchmark plots and tables
     #
@@ -1014,6 +1033,38 @@ def run_benchmark_default(config):
             title = "\n%%% Skipping GCHP vs. GCC Strat-Trop Exchange table %%%"
             print(title)
 
+
+        # ==================================================================
+        # GCHP vs. GCC summary table
+        # ==================================================================
+        if config["options"]["outputs"]["summary_table"]:
+            print("\n%%% Creating GCHP vs. GCC summary table %%%")
+
+            # Print summary of which collections are identical
+            # between Ref & Dev, and which are not identical.
+            bmk.create_benchmark_summary_table(
+                gchp_vs_gcc_refdir,
+                config["data"]["dev"]["gcc"]["version"],
+                gcc_dev_date,
+                gchp_vs_gcc_devdir,
+                config["data"]["dev"]["gchp"]["version"],
+                gchp_dev_date,
+                collections=[
+                    'AerosolMass',
+                    'Aerosols',
+                    'Emissions',
+                    'JValues',
+                    'Metrics',
+                    'SpeciesConc',
+                    'StateMet',
+                ],
+                dst=gchp_vs_gcc_tablesdir,
+                outfilename="Summary.txt",
+                overwrite=True,
+                verbose=False,
+                dev_gchp=True
+            )
+
     # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     # Create GCHP vs GCHP benchmark plots and tables
     # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -1376,6 +1427,38 @@ def run_benchmark_default(config):
         if config["options"]["outputs"]["ste_table"]:
             print("\n%%% Skipping GCHP vs. GCHP Strat-Trop Exchange table %%%")
 
+        # ==================================================================
+        # GCHP vs. GCHP summary table
+        # ==================================================================
+        if config["options"]["outputs"]["summary_table"]:
+            print("\n%%% Creating GCHP vs. GCHP summary table %%%")
+
+            # Print summary of which collections are identical
+            # between Ref & Dev, and which are not identical.
+            bmk.create_benchmark_summary_table(
+                gchp_vs_gchp_refdir,
+                config["data"]["ref"]["gchp"]["version"],
+                gchp_ref_date,
+                gchp_vs_gchp_devdir,
+                config["data"]["dev"]["gchp"]["version"],
+                gchp_dev_date,
+                collections=[
+                    'AerosolMass',
+                    'Aerosols',
+                    'Emissions',
+                    'JValues',
+                    'Metrics',
+                    'SpeciesConc', 
+                    'StateMet',
+                ],
+                dst=gchp_vs_gchp_tablesdir,
+                outfilename="Summary.txt",
+                overwrite=True,
+                verbose=False,
+                ref_gchp=True,
+                dev_gchp=True,
+            )
+
     # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     # Create GCHP vs GCC difference of differences benchmark plots
     # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -1423,7 +1506,7 @@ def run_benchmark_default(config):
     # ==================================================================
     # Print a message indicating that the benchmarks finished
     # ==================================================================
-    print("\n %%%% All requested benchmark plots/tables created! %%%%")
+    print("\n%%%% All requested benchmark plots/tables created! %%%%")
 
 
 def main():
diff --git a/gcpy/benchmark.py b/gcpy/benchmark.py
index 71ca17d2..c92bc03b 100644
--- a/gcpy/benchmark.py
+++ b/gcpy/benchmark.py
@@ -1,4 +1,4 @@
-6"""
+"""
 Specific utilities for creating plots from GEOS-Chem benchmark simulations.
 """
 
@@ -185,7 +185,10 @@ def create_total_emissions_table(
     # Write a placeholder to the file that denotes where
     # the list of species with differences will be written
     placeholder = "@%% insert diff_list here %%@"
-    print(f"Species that differ between {refstr} and {devstr}", file=f)
+    if "Inv" in template:
+        print(f"Inventories that differ btw {refstr} and {devstr}:", file=f)
+    else:
+        print(f"Species that differ btw {refstr} and {devstr}:", file=f)
     print(f"{placeholder}\n\n", file=f)
 
     # Define a list for differences
@@ -262,7 +265,7 @@ def create_total_emissions_table(
 
             # If no properties are found, then skip to next species
             if species_properties is None:
-                print("No properties found for {} ... skippping".format(spc_name))
+                print(f"No properties found for {spc_name} ... skippping")
                 continue
 
             # Convert units of Ref and Dev and save to numpy ndarray objects
@@ -348,11 +351,12 @@ def create_total_emissions_table(
     # Close file
     f.close()
 
-    # Reopen file and insert list of species with nonzero diffs
+    # Reopen file and replace placeholder with list of diffs
     util.insert_text_into_file(
         filename=outfilename,
         search_text=placeholder,
-        replace_text=util.unique_values(diff_list, drop=[None])
+        replace_text=diff_list_to_text(diff_list),
+        width=90
     )
 
 def create_global_mass_table(
@@ -460,21 +464,23 @@ def create_global_mass_table(
     if trop_only:
         title1 = f"### Global mass (Gg) {label} (Trop only)"
     title2 = f"### Ref = {refstr}; Dev = {devstr}"
+    title3 = f"### Species that differ btw {refstr} and {devstr}:"
+
+    # Write a placeholder to the file that denotes where
+    # the list of species with differences will be written
+    placeholder = "@%% insert diff_list here %%@"
+    title4 = f"{placeholder}"
 
     # Print header to file
     print("#" * 89, file=f)
     print(f"{title1 : <86}{'###'}", file=f)
     print(f"{title2 : <86}{'###'}", file=f)
+    print(f"{'###'  : <86}{'###'}", file=f)
+    print(f"{title3 : <86}{'###'}", file=f)
+    print(f"{placeholder}", file=f)
     print("#" * 89, file=f)
 
-    # Write a placeholder to the file that denotes where
-    # the list of species with differences will be written
-    placeholder = "@%% insert diff_list here %%@"
-    print(f"\nSpecies that differ between {refstr} and {devstr}", file=f)
-    print(f"{placeholder}\n\n", file=f)
-
     # Column headers
-    print(f"{title2}", file=f)
     print(f"{'' : <19}{'Ref' : >20}{'Dev' : >20}{'Dev - Ref' : >14}{'% diff' : >10} {'diffs'}", file=f)
 
     # ==================================================================
@@ -566,11 +572,15 @@ def create_global_mass_table(
     # Close file
     f.close()
 
-    # Reopen file and insert list of species with nonzero diffs
+    # Reopen file and replace placeholder text by diff_text
     util.insert_text_into_file(
         filename=outfilename,
         search_text=placeholder,
-        replace_text=util.unique_values(diff_list, drop=[None])
+        replace_text=diff_list_to_text(
+            diff_list,
+            fancy_format=True
+        ),
+        width=100  # Force it not to wrap
     )
 
 
@@ -4396,49 +4406,61 @@ def get_species_database_dir(config):
 def create_benchmark_summary_table(
         refpath,
         refstr,
+        refdate,
         devpath,
         devstr,
+        devdate,
+        collections,
         dst="./benchmark",
         overwrite=False,
         outfilename="Summary.txt",
         verbose=False,
-        spcdb_dir=os.path.dirname(__file__)
+        spcdb_dir=os.path.dirname(__file__),
+        ref_gchp=False,
+        dev_gchp=False
 ):
     """
-    Creates a table of global masses for a list of species in contained in
-    two data sets.  The data sets,  which typically represent output from two
-    different model versions, are usually contained in netCDF data files.
+    Creates a benchmark summary table that shows which data collections
+    have difference.  Useful for scanning the 1-hr and 1-month benchmark
+    outputs.
 
     Args:
-        refdata: xarray Dataset
-            The first data set to be compared (aka "Reference").
+        refpath: str
+            Path to the first data set to be compared (aka "Ref").
         refstr: str
             A string that can be used to identify refdata
-            (e.g. a model v2ersion number or other identifier).
-        devdata: xarray Dataset
-            The second data set to be compared (aka "Development").
+            (e.g. a model version number or other identifier).
+        refdate: np.datetime64
+            Date/time stamp used by the "Ref" data files.
+        ref_gchp: bool
+            Set to True if the "Ref" data comes from a GCHP run.
+            Default value: False
+        devpath: str
+            Path to the second data set to be compared (aka "Dev").
         devstr: str
             A string that can be used to identify the data set specified
             by devfile (e.g. a model version number or other identifier).
-        varlist: list of strings
-            List of species concentation variable names to include
-            in the list of global totals.
-        met_and_masks: dict of xarray DataArray
-            Dictionary containing the meterological variables and
-            masks for the Ref and Dev datasets.
-        label: str
-            Label to go in the header string.  Can be used to
-            pass the month & year.
+        dev_gchp: bool
+            Set to True if the "Ref" data comes from a GCHP run.
+            Default value: False
+        devdate: np.datetime64
+            Date/time stamp used by the "Dev" data files.
+        collections: list of strings
+            List of diagnostic collections to examine.
 
     Keyword Args (optional):
-        trop_only: bool
-            Set this switch to True if you wish to print totals
-            only for the troposphere.
-            Default value: False (i.e. print whole-atmosphere totals).
+        dst: str
+            A string denoting the destination folder where the file
+            containing emissions totals will be written.
+            Default value: "./benchmark"
+        overwrite: bool
+            Set this flag to True to overwrite files in the
+            destination folder (specified by the dst argument).
+            Default value: False
         outfilename: str
             Name of the text file which will contain the table of
             emissions totals.
-            Default value: "GlobalMass_TropStrat.txt"
+            Default value: "Summary.txt"
         verbose: bool
             Set this switch to True if you wish to print out extra
             informational messages.
@@ -4456,44 +4478,183 @@ def create_benchmark_summary_table(
     """
 
     # ==================================================================
-    # Initialization and data read
+    # Open file for output
     # ==================================================================
-    if os.path.isdir(dst) and not overwrite:
-        msg = "Directory {} exists. Pass overwrite=True to overwrite " \
-            + "files in that directory, if any."
-        msg = msg.format(dst)
+
+    # Create the directory for output
+    make_directory(dst, overwrite)
+
+    # Create file
+    try:
+        f = open(os.path.join(dst, outfilename), "w")
+    except (IOError, OSError, FileNotFoundError) as e:
+        msg = f"Could not open {outfilename} for writing!"
+        raise e(msg)
+
+    # Title strings
+    title1 = f"### Benchmark summary table"
+    title2 = f"### Ref = {refstr}; Dev = {devstr}"
+
+    # Write a placeholder to the file that denotes where
+    # the list of species with differences will be written
+    placeholder = "@%% insert diff_list here %%@"
+    title4 = f"{placeholder}"
+
+    # Print header to file
+    print("#" * 80, file=f)
+    print(f"{title1 : <77}{'###'}", file=f)
+    print(f"{title2 : <77}{'###'}", file=f)
+    print("#" * 80, file=f)
+    print(file=f)
+
+    # ==================================================================
+    # Read data and look differences btw Ref & Dev versions
+    # ==================================================================
+
+    # Variables to skip
+    skip_vars = gcon.skip_these_vars
+    skip_vars.append("corner_lats")
+    skip_vars.append("corner_lons")
+
+    # Pick the proper function to read the data
+    reader = util.dataset_reader(
+        multi_files=False,
+        verbose=verbose
+    )
+
+    # Make a directory to store the list of species that differ
+    diff_dict = {}
+
+    # Loop over diagnostic files
+    for col in collections:
+
+        # Read Ref data
+        refdata = reader(
+            util.get_filepath(
+                refpath,
+                col,
+                refdate,
+                is_gchp=ref_gchp
+            ),
+            drop_variables=skip_vars
+        ).load()
+
+        # Get Dev data
+        devdata = reader(
+            util.get_filepath(
+                devpath,
+                col,
+                devdate,
+                is_gchp=dev_gchp
+            ),
+            drop_variables=skip_vars
+        ).load()
+
+        # Make sure that Ref and Dev datasets have the same variables.
+        # Variables that are in Ref but not in Dev will be added to Dev
+        # with all missing values (NaNs). And vice-versa.
+        [refdata, devdata] = util.add_missing_variables(
+            refdata,
+            devdata
+        )
+
+        # Find all common variables between the two datasets
+        vardict = util.compare_varnames(
+            refdata,
+            devdata,
+            quiet=True
+        )
+
+        # List of differences for this collection
+        diff_list = []
+
+        # Keep track of which variables are different
+        # Loop over the common variables
+        for v in vardict["commonvarsData"]:
+            if not util.array_equals(refdata[v], devdata[v]):
+                diff_list.append(v)
+
+        # Drop duplicate values from diff_list
+        diff_list = util.unique_values(diff_list, drop=[None])
+
+        if len(diff_list) == 0:
+            print("-" *  79, file=f)
+            print(f"{col}: {devstr} is identical to {refstr}", file=f)
+            print(file=f)
+        else:
+            c = 0
+            print("-" *  79, file=f)
+            print(f"{col}: {devstr} differs from {refstr}", file=f)
+            print("\n  Diagnostics that differ", file=f)
+            for i, v in enumerate(diff_list):
+                print(f"    {v}", file=f)
+                if i > 10:
+                    print(f"    ... and {len(diff_list) - 10} others", file=f)
+                    break
+            print(file=f)
+
+    # ==================================================================
+    # Close files
+    # ==================================================================
+    f.close()
+
+
+def diff_list_to_text(
+        diff_list,
+        fancy_format=False
+):
+    """
+    Converts a list of species/emissions/inventories/diagnostics that
+    show differences between GEOS-Chem versions ot a printable text
+    string.
+
+    Args:
+    -----
+    diff_list : list
+        List to be converted into text.  "None" values will be dropped.
+    fancy_format: bool
+        Set to True if you wish output text to be bookended with '###'.
+
+    Returns:
+    diff_text : str
+        String with concatenated list values.
+    """
+    if not isinstance(diff_list, list):
+        raise ValueError("Argument 'diff_list' must be a list!")
+
+    # Strip out duplicates from diff_list
+    # Prepare a message about species differences (or alternate msg)
+    diff_list = util.unique_values(diff_list, drop=[None])
+    diff_text = util.wrap_text(diff_list, width=85)
+    if len(diff_text) > 85:
+        diff_text = "... Too many diffs to print (see below for details)"
+
+    if fancy_format:
+        diff_text = f"### {diff_text : <82}{'###'}"
+
+    return diff_text.strip()
+
+
+def make_directory(
+        dir_name,
+        overwrite
+):
+    """
+    Creates a directory where benchmark plots/tables will be placed.
+
+    Args:
+    -----
+    dir_name : str
+        Name of the directory to be created.
+    overwrite : bool
+        Set to True if you wish to overwrite prior contents in
+        the directory 'dir_name'
+    """
+
+    if os.path.isdir(dir_name) and not overwrite:
+        msg = f"Directory {dir_name} exists!\n"
+        msg += "Pass overwrite=True to overwrite files in that directory."
         raise ValueError(msg)
-    if not os.path.isdir(dst):
-        os.mkdir(dst)
 
-#    # ==================================================================
-#    # Close files
-#    # ==================================================================
-#
-#    # Get a list of files in the Ref path
-#    ref_files = []
-#    for (path, names, files) in os.walk(refpath):
-#        for rf in files:
-#            ref_files.append(os.path.join(path, rf))
-#
-#    # Get a list of files in the Ref path
-#    dev_files = []
-#    for (path, names, files) in os.walk(devpath):
-#        for df in files:
-#            dev_files.append(os.path.join(path, df)
-#
-#    # ==================================================================
-#    # Open file for output
-#    # ==================================================================
-#
-#    # Create file
-#    try:
-#        f = open(os.path.join(dst, outfilename), "w")
-#    except (IOError, OSError, FileNotFoundError) as e:
-#        msg = f"Could not open {outfilename} for writing!"
-#        raise e(msg)
-#
-#    # ==================================================================
-#    # Close files
-#    # ==================================================================
-#    f.close()
+    if not os.path.isdir(dir_name):
+        os.mkdir(dir_name)
diff --git a/gcpy/util.py b/gcpy/util.py
index ea1ebba9..e6a85033 100644
--- a/gcpy/util.py
+++ b/gcpy/util.py
@@ -842,6 +842,8 @@ def compare_varnames(
             commonvars3D     List of variables that are common to
                              refdata and devdata, and that have lat,
                              lon, and level dimensions.
+            commonvarsData   List of all commmon 2D or 3D data variables,
+                             excluding index variables.
             refonly          List of 2D or 3D variables that are only
                              present in refdata.
             devonly          List of 2D or 3D variables that are only
@@ -854,30 +856,36 @@ def compare_varnames(
     devonly = [v for v in devvars if v not in refvars]
     dimmismatch = [v for v in commonvars if refdata[v].ndim != devdata[v].ndim]
     commonvarsOther = [
-        v
-        for v in commonvars
-        if (
+        v for v in commonvars if (
+          (
             ("lat" not in refdata[v].dims or "Xdim" not in refdata[v].dims)
-            and ("lon" not in refdata[v].dims or "Ydim" not in refdata[v].dims)
-            and ("lev" not in refdata[v].dims)
+            and
+            ("lon" not in refdata[v].dims or "Ydim" not in refdata[v].dims)
+            and
+            ("lev" not in refdata[v].dims)
+          )
+          or
+          (
+            ("hyam" in v or "hybm" in v)  # Omit these from plottable data
+          )
         )
     ]
     commonvars2D = [
-        v
-        for v in commonvars
-        if (
-            ("lat" in refdata[v].dims or "Xdim" in refdata[v].dims)
-            and ("lon" in refdata[v].dims or "Ydim" in refdata[v].dims)
-            and ("lev" not in refdata[v].dims)
+        v for v in commonvars if (
+          ("lat" in refdata[v].dims or "Xdim" in refdata[v].dims)
+          and
+          ("lon" in refdata[v].dims or "Ydim" in refdata[v].dims)
+          and
+          ("lev" not in refdata[v].dims)
         )
     ]
     commonvars3D = [
-        v
-        for v in commonvars
-        if (
-            ("lat" in refdata[v].dims or "Xdim" in refdata[v].dims)
-            and ("lon" in refdata[v].dims or "Ydim" in refdata[v].dims)
-            and ("lev" in refdata[v].dims)
+        v for v in commonvars if (
+          ("lat" in refdata[v].dims or "Xdim" in refdata[v].dims)
+          and
+          ("lon" in refdata[v].dims or "Ydim" in refdata[v].dims)
+          and
+          ("lev" in refdata[v].dims)
         )
     ]
 
@@ -903,18 +911,20 @@ def compare_varnames(
                     print("All variables have same dimensions in ref and dev")
 
     # For safety's sake, remove the 0-D and 1-D variables from
-    # refonly and devonly.  This will ensure that refonly and
-    # devonly will only contain variables that can be plotted.
+    # commonvarsData, refonly, and devonly.  This will ensure that
+    # these lists will only contain variables that can be plotted.
+    commonvarsData = [v for v in commonvars if v not in commonvarsOther]
     refonly = [v for v in refonly if v not in commonvarsOther]
     devonly = [v for v in devonly if v not in commonvarsOther]
 
     return {
         "commonvars": commonvars,
-        "commonvarsOther": commonvarsOther,
         "commonvars2D": commonvars2D,
         "commonvars3D": commonvars3D,
+        "commonvarsData": commonvarsData,
+        "commonvarsOther": commonvarsOther,
         "refonly": refonly,
-        "devonly": devonly,
+        "devonly": devonly
     }
 
 
@@ -2168,13 +2178,46 @@ def unique_values(
 
     if drop is not None:
         for d in drop:
-            unique.remove(d)
+            if d in unique:
+                unique.remove(d)
 
     unique.sort()
 
     return unique
 
 
+def wrap_text(
+        text,
+        width=80
+):
+    """
+    Wraps text so that it fits within a certain line width.
+
+    Args:
+    -----
+    text: str or list of str
+        Input text to be word-wrapped.
+    width: int
+        Line width, in characters.
+        Default value: 80
+
+    Returns:
+    --------
+    Original text reformatted so that it fits within lines
+    of 'width' characters or less.
+    """
+    if not isinstance(text, str):
+        if isinstance(text, list):
+            text = ' '.join(text)  # List -> str conversion
+        else:
+            raise ValueError("Argument 'text' must be either str or list!")
+
+    text = wrap(text, width=width)
+    text = '\n'.join(text)
+
+    return text
+
+
 def insert_text_into_file(
         filename,
         search_text,
@@ -2190,29 +2233,27 @@ def insert_text_into_file(
     -----
     filename: str
         The file with text to be replaced.
-
     search_text: str
         Text string in the file that will be replaced.
-
     replace_text: str or list of str
         Text that will replace 'search_text'
-
     width: int
         Will "word-wrap" the text in 'replace_text' to this width
     """
     if not isinstance(search_text, str):
         raise ValueError("Argument 'search_text' needs to be a string!")
-    if not isinstance(replace_text, str):
-        if isinstance(replace_text, list):
-            replace_text = ' '.join(replace_text)
-        else:
-            raise ValueError(
-                "Argument 'replace_text' needs to be a list or a string"
-            )
+    if not isinstance(replace_text, str) and \
+       not isinstance(replace_text, list):
+        raise ValueError(
+            "Argument 'replace_text' needs to be a list or a string"
+        )
 
     # Word-wrap the replacement text
-    replace_text = wrap(replace_text, width=width)
-    replace_text = '\n'.join(replace_text)
+    # (does list -> str conversion if necessary)
+    replace_text = wrap_text(
+        replace_text,
+        width=width
+    )
 
     with open(filename, "r") as f:
         filedata = f.read()
@@ -2226,3 +2267,44 @@ def insert_text_into_file(
     with open(filename, "w") as f:
         f.write(filedata)
         f.close()
+
+
+def array_equals(
+        refdata,
+        devdata
+):
+    """
+    Tests two arrays for equality.  Useful for checking which
+    species have nonzero differences in benchmark output.
+
+    Args:
+    -----
+    refdata: xarray DataArray or numpy ndarray
+        The first array to be checked.
+    devdata: xarray DataArray or numpy ndarray
+        The second array to be checked.
+
+    Returns:
+    --------
+    True if both arrays are equal; False if not
+    """
+    if not isinstance(refdata, np.ndarray):
+        if isinstance(refdata, xr.DataArray):
+            refdata = refdata.values
+        else:
+            raise ValueError(
+            "Argument 'refdata' must be an xarray DataArray or numpy ndarray!"
+            )
+    if not isinstance(devdata, np.ndarray):
+        if isinstance(devdata, xr.DataArray):
+            devdata = devdata.values
+        else:
+            raise ValueError(
+            "Argument 'devdata' must be an xarray DataArray or numpy ndarray!"
+            )
+
+    # This method will work if the arrays hve different dimensions
+    # but an element-by-element search will not!
+    refsum = np.sum(refdata, dtype=np.float64)
+    devsum = np.sum(devdata, dtype=np.float64)
+    return np.abs(devsum - refsum) > np.float64(0.0)

From 6a340222ffd6b2c0468c009fdc8ddc0092620a1d Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Wed, 8 Feb 2023 17:23:05 -0500
Subject: [PATCH 25/54] Replace yaml.load() statements w/ read_config_file
 (Closes #188)

This commit abstracts the code to read YAML files to util.py,
so that we only need to import yaml in util.py

gcpy/util.py
- Added "from yaml import safe_load as yaml_safe_load"
- read_config_file now calls yaml_safe_load

gcpy/benchmark.py
gcpy/oh_metrics.py
gcpy/plot.py
- Call read_config_file from util.py (mostly with quiet=True)

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 gcpy/benchmark.py  | 83 ++++++++++++++++++++++++++++++++++------------
 gcpy/oh_metrics.py | 10 ++++--
 gcpy/plot.py       | 21 ++++++++----
 gcpy/util.py       | 34 +++++++++++--------
 4 files changed, 104 insertions(+), 44 deletions(-)

diff --git a/gcpy/benchmark.py b/gcpy/benchmark.py
index c92bc03b..eacc7e80 100644
--- a/gcpy/benchmark.py
+++ b/gcpy/benchmark.py
@@ -154,8 +154,13 @@ def create_total_emissions_table(
     # molecular weights), which we will need for unit conversions.
     # This is located in the "data" subfolder of this folder where
     # this benchmark.py file is found.
-    properties_path = os.path.join(spcdb_dir, "species_database.yml")
-    properties = yaml.load(open(properties_path), Loader=yaml.FullLoader)
+    properties = util.read_config_file(
+        os.path.join(
+            spcdb_dir, 
+            "species_database.yml"
+        ),
+        quiet=True
+    )
 
     # ==================================================================
     # Get the list of emission variables for which we will print totals
@@ -442,8 +447,13 @@ def create_global_mass_table(
     # Load a YAML file containing species properties (such as
     # molecular weights), which we will need for unit conversions.
     # This is located in the "data" subfolder of this current directory.2
-    properties_path = os.path.join(spcdb_dir, "species_database.yml")
-    properties = yaml.load(open(properties_path), Loader=yaml.FullLoader)
+    properties = util.read_config_file(
+        os.path.join(
+            spcdb_dir,
+            "species_database.yml"
+        ),
+        quiet=True
+    )
 
     # ==================================================================
     # Open file for output
@@ -1812,14 +1822,20 @@ def make_benchmark_emis_tables(
     # ==================================================================
 
     # Emissions species dictionary
-    spc_dict = yaml.load(
-        open(os.path.join(os.path.dirname(__file__), emission_spc)),
-        Loader=yaml.FullLoader
+    spc_dict = util.read_config_file(
+        os.path.join(
+            os.path.dirname(__file__),
+            emission_spc
+        ),
+        quiet=True
     )
     species=spc_dict[benchmark_type]
-    inv_dict = yaml.load(
-        open(os.path.join(os.path.dirname(__file__), emission_inv)),
-        Loader=yaml.FullLoader
+    inv_dict = util.read_config_file(
+        os.path.join(
+            os.path.dirname(__file__),
+            emission_inv
+        ),
+        quiet=True
     )
     inventories=inv_dict[benchmark_type]
 
@@ -2439,9 +2455,12 @@ def make_benchmark_aod_plots(
         varlist = [v for v in cmn3D if "AOD" in v and "_bin" not in v]
 
     # Dictionary and list for new display names
-    newvars = yaml.load(
-        open(os.path.join(os.path.dirname(__file__), aod_spc)),
-        Loader=yaml.FullLoader
+    newvars = util.read_config_file(
+        os.path.join(
+            os.path.dirname(__file__),
+            aod_spc
+        ),
+        quiet=True
     )
     newvarlist = []
 
@@ -3419,8 +3438,13 @@ def make_benchmark_aerosol_tables(
     species_list = ["BCPI", "OCPI", "SO4", "DST1", "SALA", "SALC"]
 
     # Read the species database
-    path = os.path.join(spcdb_dir, "species_database.yml")
-    spcdb = yaml.load(open(path), Loader=yaml.FullLoader)
+    spcdb = util.read_config_file(
+        os.path.join(
+            spcdb_dir, 
+            "species_database.yml"
+        ),
+        quiet=True
+    )
 
     # Molecular weights [g mol-1], as taken from the species database
     mw = {}
@@ -3429,8 +3453,14 @@ def make_benchmark_aerosol_tables(
     mw["Air"] = gcon.MW_AIR_g
 
     # Get the list of relevant AOD diagnostics from a YAML file
-    path = os.path.join(os.path.dirname(__file__), "aod_species.yml")
-    aod = yaml.load(open(path), Loader=yaml.FullLoader)
+    aod = util.read_config_file(
+        os.path.join(
+            os.path.dirname(__file__),
+            "aod_species.yml"
+        ),
+        quiet=True
+    )
+
     aod_list = [v for v in aod.keys() if "Dust" in v or "Hyg" in v]
     # different names for GCHP
     if is_gchp:
@@ -3861,9 +3891,13 @@ def make_benchmark_operations_budget(
     # ------------------------------------------
 
     # Load a YAML file containing species properties
-    spc_properties = yaml.load(open(os.path.join(os.path.dirname(__file__),
-                                                 "species_database.yml")),
-                               Loader=yaml.FullLoader)
+    spc_properties = util.read_config_file(
+        os.path.join(
+            os.path.dirname(__file__),
+            "species_database.yml"
+        ),
+        quiet=True
+    )
 
     # Determine what the converted units and conversion factor should be
     # based on benchmark type and species (tracer) name. Assume raw data [kg/s]
@@ -4242,8 +4276,13 @@ def make_benchmark_mass_conservation_table(
 
     # Load a YAML file containing species properties (such as
     # molecular weights), which we will need for unit conversions.
-    properties_path = os.path.join(spcdb_dir, "species_database.yml")
-    properties = yaml.load(open(properties_path), Loader=yaml.FullLoader)
+    properties = util.read_config_file(
+        os.path.join(
+            spcdb_dir,
+            "species_database.yml"
+        ),
+        quiet=True
+    )
 
     # Get the species name
     spc_name = 'PassiveTracer'
diff --git a/gcpy/oh_metrics.py b/gcpy/oh_metrics.py
index 908ce4a4..8f305a1c 100644
--- a/gcpy/oh_metrics.py
+++ b/gcpy/oh_metrics.py
@@ -14,7 +14,6 @@
 import warnings
 import numpy as np
 import xarray as xr
-import yaml
 import gcpy.constants as const
 
 # =====================================================================
@@ -221,8 +220,13 @@ def init_common_vars(ref, refstr, dev, devstr, spcdb_dir):
     """
 
     # Get species database
-    spcdb_file = os.path.join(spcdb_dir, "species_database.yml")
-    spcdb = yaml.load(open(spcdb_file), Loader=yaml.FullLoader)
+    spcdb = util.read_config_file(
+        os.path.join(
+            spcdb_dir,
+            "species_database.yml"
+        ),
+        quiet=True
+    )
 
     # Define common_vars dictionary
     common_vars = {
diff --git a/gcpy/plot.py b/gcpy/plot.py
index c6bdd6c9..b9102073 100644
--- a/gcpy/plot.py
+++ b/gcpy/plot.py
@@ -1,6 +1,5 @@
 import os
 import copy
-import yaml
 import matplotlib as mpl
 import matplotlib.colors as mcolors
 import matplotlib.pyplot as plt
@@ -16,7 +15,7 @@
 from .regrid import regrid_comparison_data, create_regridders, gen_xmat, \
     regrid_vertical
 from .util import reshape_MAPL_CS, get_diff_of_diffs, get_nan_mask, \
-    all_zero_or_nan, slice_by_lev_and_time, compare_varnames                                                                               
+    all_zero_or_nan, slice_by_lev_and_time, compare_varnames, read_config_file
 from .units import check_units, data_unit_is_mol_per_mol
 from .constants import MW_AIR_g
 from joblib import Parallel, delayed
@@ -516,8 +515,13 @@ def compare_single_level(
     if pdfname == "":
         savepdf = False
     if convert_to_ugm3:
-        properties_path = os.path.join(spcdb_dir, "species_database.yml")
-        properties = yaml.load(open(properties_path), Loader=yaml.FullLoader)
+        properties = read_config_file(
+            os.path.join(
+                spcdb_dir,
+                "species_database.yml"
+            ),
+            quiet=True
+        )
 
     sg_ref_params = [1, 170, -90]
     sg_dev_params = [1, 170, -90]
@@ -1659,8 +1663,13 @@ def compare_zonal_mean(
         savepdf = False
     # If converting to ug/m3, load the species database
     if convert_to_ugm3:
-        properties_path = os.path.join(spcdb_dir, "species_database.yml")
-        properties = yaml.load(open(properties_path), Loader=yaml.FullLoader)
+        properties = read_config_file(
+            os.path.join(
+                spcdb_dir,
+                "species_database.yml"
+            ),
+            quiet=True
+        )
 
     # Get mid-point pressure and edge pressures for this grid
     ref_pedge, ref_pmid, _ = get_vert_grid(refdata, *ref_vert_params)
diff --git a/gcpy/util.py b/gcpy/util.py
index e6a85033..7f91231c 100644
--- a/gcpy/util.py
+++ b/gcpy/util.py
@@ -6,7 +6,7 @@
 import os
 import warnings
 import shutil
-import yaml
+from yaml import safe_load as yaml_safe_load
 import numpy as np
 import xarray as xr
 from PyPDF2 import PdfFileWriter, PdfFileReader
@@ -314,9 +314,12 @@ def get_species_categories(
     NOTE: The benchmark categories are specified in YAML file
     benchmark_species.yml.
     """
-    spc_categories = "benchmark_categories.yml"
-    yamlfile = os.path.join(os.path.dirname(__file__), spc_categories)
-    spc_cat_dict = read_config_file(yamlfile)
+    spc_cat_dict = read_config_file(
+        os.path.join(
+            os.path.dirname(__file__), 
+            "benchmark_categories.yml"
+        )
+    )
     return spc_cat_dict[benchmark_type]
 
 
@@ -1002,9 +1005,13 @@ def convert_bpch_names_to_netcdf_names(
     # Names dictionary (key = bpch id, value[0] = netcdf id,
     # value[1] = action to create full name using id)
     # Now read from YAML file (bmy, 4/5/19)
-    bpch_to_nc_names = "bpch_to_nc_names.yml"
-    yamlfile = os.path.join(os.path.dirname(__file__), bpch_to_nc_names)
-    names = yaml.load(open(yamlfile), Loader=yaml.FullLoader)
+    names = read_config_file(
+        os.path.join(
+            os.path.dirname(__file__),
+            "bpch_to_nc_names.yml"
+        ).
+        quiet=True
+    )
 
     # define some special variable to overwrite above
     special_vars = {
@@ -1180,11 +1187,12 @@ def get_lumped_species_definitions():
         lumped_spc_dict : dict of str
             Dictionary of lumped species
     """
-    lumped_spc = "lumped_species.yml"
-    yamlfile = os.path.join(os.path.dirname(__file__), lumped_spc)
-    with open(yamlfile, "r") as f:
-        lumped_spc_dict = yaml.load(f.read(), Loader=yaml.FullLoader)
-    return lumped_spc_dict
+    return read_config_file(
+        os.path.join(
+            os.path.dirname(__file__), 
+            "lumped_species.yml"
+        )
+    )
 
 
 def archive_lumped_species_definitions(
@@ -2141,7 +2149,7 @@ def read_config_file(config_file, quiet=False):
     try:
         if not quiet:
             print(f"Using configuration file {config_file}")
-        config = yaml.safe_load(open(config_file))
+        config = yaml_safe_load(open(config_file))
     except Exception as err:
         msg = f"Error reading configuration in {config_file}: {err}"
         raise Exception(msg) from err

From f819b49c20010f4130b96569def99927ddba7728 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Thu, 9 Feb 2023 10:29:18 -0500
Subject: [PATCH 26/54] Cleaned up gcpy/benchmark.py and util.py

gcpy/benchmark.py
gcpy/util.py
- Changed string formatting with .format to f-strings
- Adopt suggestions from the pylint linter (remove unused variables, etc)
- Fix the import order of modules

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 gcpy/benchmark.py | 445 +++++++++++++++++++++-------------------------
 gcpy/util.py      |  12 +-
 2 files changed, 205 insertions(+), 252 deletions(-)

diff --git a/gcpy/benchmark.py b/gcpy/benchmark.py
index eacc7e80..b5e83ec9 100644
--- a/gcpy/benchmark.py
+++ b/gcpy/benchmark.py
@@ -6,7 +6,7 @@
 import warnings
 import itertools
 from distutils.version import LooseVersion
-import yaml
+import gc
 import numpy as np
 import pandas as pd
 import xarray as xr
@@ -20,7 +20,6 @@
 from gcpy.grid import get_troposphere_mask
 from gcpy.units import convert_units
 import gcpy.constants as gcon
-import gc
 
 # Save warnings format to undo overwriting built into PyPDF2
 warning_format = warnings.showwarning
@@ -185,7 +184,7 @@ def create_total_emissions_table(
     try:
         f = open(outfilename, "w")
     except (IOError, OSError, FileNotFoundError) as e:
-        raise e(f"Could not open {outfilename} for writing!")
+        raise e(f"Could not open {outfilename} for writing!") from e
 
     # Write a placeholder to the file that denotes where
     # the list of species with differences will be written
@@ -205,7 +204,7 @@ def create_total_emissions_table(
     for species_name, target_units in species.items():
 
         # Get a list of emission variable names for each species
-        diagnostic_template = template.format(species_name)
+        diagnostic_template = f"{species_name}"
         varnames = util.get_emissions_varnames(cvars, diagnostic_template)
 
         # Also add variables that might be in either Ref or Dev
@@ -223,8 +222,8 @@ def create_total_emissions_table(
 
         # If no emissions are found, then skip to next species
         if len(varnames) == 0:
-            msg = "No emissions found for {} ... skippping"
-            print(msg.format(species_name))
+            msg = f"No emissions found for {species_name} ... skippping"
+            print(msg)
             continue
 
         # Check if there is a total emissions variable in the list
@@ -463,8 +462,7 @@ def create_global_mass_table(
     try:
         f = open(outfilename, "w")
     except (IOError, OSError, FileNotFoundError) as e:
-        msg = f"Could not open {outfilename} for writing!"
-        raise FileNotFoundError(msg)
+        raise e(f"Could not open {outfilename} for writing!") from e
 
     # Define a list for differences
     diff_list = []
@@ -479,7 +477,6 @@ def create_global_mass_table(
     # Write a placeholder to the file that denotes where
     # the list of species with differences will be written
     placeholder = "@%% insert diff_list here %%@"
-    title4 = f"{placeholder}"
 
     # Print header to file
     print("#" * 89, file=f)
@@ -510,8 +507,8 @@ def create_global_mass_table(
         # If no properties are found, then skip to next species
         if species_properties is None:
             if verbose:
-                msg = "No properties found for {} ... skippping"
-                print(msg.format(spc_name))
+                msg = f"No properties found for {spc_name} ... skippping"
+                print(msg)
             continue
 
         # Specify target units
@@ -519,8 +516,9 @@ def create_global_mass_table(
         mol_wt_g = species_properties.get("MW_g")
         if mol_wt_g is None:
             if verbose:
-                msg = "No molecular weight found for {} ... skippping"
-                print(msg.format(spc_name))
+                msg = \
+                  f"No molecular weight found for {spc_name} ... skippping"
+                print(msg)
             continue
 
         # ==============================================================
@@ -738,13 +736,9 @@ def make_benchmark_conc_plots(
     # ==================================================================
     # Initialization and data read
     # ==================================================================
-    if os.path.isdir(dst) and not overwrite:
-        msg = "Directory {} exists. Pass overwrite=True to overwrite " \
-            + "files in that directory, if any."
-        msg = msg.format(dst)
-        raise ValueError(msg)
-    if not os.path.isdir(dst):
-        os.mkdir(dst)
+
+    # Create the destination folder
+    make_directory(dst, overwrite)
 
     # Define extra title text (usually a date string)
     # for the top-title of the plot
@@ -794,10 +788,10 @@ def make_benchmark_conc_plots(
         devmetds = reader(devmet, drop_variables=gcon.skip_these_vars).load()
 
     # Determine if doing diff-of-diffs
+    diff_of_diffs = False
     if second_ref is not None and second_dev is not None:
         diff_of_diffs = True
-    else:
-        diff_of_diffs = False
+   
 
     # Open second datasets if passed as arguments (used for diff of diffs)
     # Regrid to same horz grid resolution if two refs or two devs do not match.
@@ -1040,8 +1034,8 @@ def createplots(filecat):
                     continue
                 varlist.append(varname)
         if warninglist != []:
-            msg = "\n\nWarning: variables in {} category not in dataset: {}"
-            print(msg.format(filecat, warninglist))
+            msg = f"\n\nWarning: variables in {filecat} category not in dataset: {warninglist}"
+            print(msg)
 
         # -----------------------
         # Surface plots
@@ -1050,12 +1044,15 @@ def createplots(filecat):
 
             if subdst is not None:
                 pdfname = os.path.join(
-                    catdir, "{}_Surface_{}.pdf".format(filecat, subdst)
+                    catdir,
+                    f"{filecat}_Surface_{subdst}.pdf"
                 )
-                print('creating {}'.format(pdfname))
+                print(f"creating {pdfname}")
             else:
                 pdfname = os.path.join(
-                    catdir, "{}_Surface.pdf".format(filecat))
+                    catdir,
+                    f"{filecat}_Surface.pdf"
+                )
 
             diff_sfc = []
             compare_single_level(
@@ -1095,10 +1092,13 @@ def createplots(filecat):
 
             if subdst is not None:
                 pdfname = os.path.join(
-                    catdir, "{}_500hPa_{}.pdf".format(filecat, subdst)
+                    catdir, f"{filecat}_500hPa_{subdst}.pdf"
                 )
             else:
-                pdfname = os.path.join(catdir, "{}_500hPa.pdf".format(filecat))
+                pdfname = os.path.join(
+                    catdir,
+                    f"{filecat}_500hPa.pdf"
+                )
 
             diff_500 = []
             compare_single_level(
@@ -1139,12 +1139,13 @@ def createplots(filecat):
 
             if subdst is not None:
                 pdfname = os.path.join(
-                    catdir, "{}_FullColumn_ZonalMean_{}.pdf".format(
-                        filecat, subdst)
+                    catdir,
+                    f"{filecat}_FullColumn_ZonalMean_{subdst}.pdf"
                 )
             else:
                 pdfname = os.path.join(
-                    catdir, "{}_FullColumn_ZonalMean.pdf".format(filecat)
+                    catdir,
+                    f"{filecat}_FullColumn_ZonalMean.pdf"
                 )
 
             diff_zm = []
@@ -1181,11 +1182,14 @@ def createplots(filecat):
             # a range of 1..100 hPa, as per GCSC request. (bmy, 8/13/19)
             if subdst is not None:
                 pdfname = os.path.join(
-                    catdir, "{}_Strat_ZonalMean_{}.pdf".format(filecat, subdst)
+                    catdir,
+                    f"{filecat}_Strat_ZonalMean_{subdst}.pdf"
                 )
             else:
-                pdfname = os.path.join(catdir, "{}_Strat_ZonalMean.pdf".format(
-                    filecat))
+                pdfname = os.path.join(
+                    catdir,
+                    f"{filecat}_Strat_ZonalMean.pdf"
+                )
 
             compare_zonal_mean(
                 refds,
@@ -1240,9 +1244,9 @@ def createplots(filecat):
                 if "sfc" in filename:
                     with open(filename, "a+") as f:
                         for c, diff_list in dict_sfc.items():
-                            print("* {}: ".format(c), file=f, end="")
+                            print(f"* {c}: ", file=f, end="")
                             for v in diff_list:
-                                print("{} ".format(v), file=f, end="")
+                                print(f"{v} ", file=f, end="")
                             print(file=f)
                         f.close()
 
@@ -1250,9 +1254,9 @@ def createplots(filecat):
                 if "500hpa" in filename:
                     with open(filename, "a+") as f:
                         for c, diff_list in dict_500.items():
-                            print("* {}: ".format(c), file=f, end="")
+                            print(f"* {c}: ", file=f, end="")
                             for v in diff_list:
-                                print("{} ".format(v), file=f, end="")
+                                print(f"{v} ", file=f, end="")
                             print(file=f)
                         f.close()
 
@@ -1260,9 +1264,9 @@ def createplots(filecat):
                 if "zonalmean" in filename or "zm" in filename:
                     with open(filename, "a+") as f:
                         for c, diff_list in dict_zm.items():
-                            print("* {}: ".format(c), file=f, end="")
+                            print(f"* {c}: ", file=f, end="")
                             for v in diff_list:
-                                print("{} ".format(v), file=f, end="")
+                                print(f"{v} ", file=f, end="")
                             print(file=f)
                         f.close()
 
@@ -1401,14 +1405,8 @@ def make_benchmark_emis_plots(
     # Initialization and data read
     # =================================================================
 
-    # Create destination folder if it does not exist
-    if os.path.isdir(dst) and not overwrite:
-        msg = "Directory {} exists. Pass overwrite=True to overwrite "\
-            + "files in that directory, if any."
-        msg = msg.format(dst)
-        raise ValueError(msg)
-    elif not os.path.isdir(dst):
-        os.mkdir(dst)
+    # Create the destination folder
+    make_directory(dst, overwrite)
 
     # Create the "Emissions" category folder.  If subdst is passed,
     # then create a sub-folder (needed for the 1-year benchmarks).
@@ -1429,14 +1427,14 @@ def make_benchmark_emis_plots(
     # Ref dataset
     try:
         refds = reader(ref, drop_variables=gcon.skip_these_vars)
-    except FileNotFoundError:
-        raise FileNotFoundError("Could not find Ref file: {}".format(ref))
+    except (OSError, IOError, FileNotFoundError) as e:
+        raise e(f"Could not find Ref file: {ref}") from e
 
     # Dev dataset
     try:
         devds = reader(dev, drop_variables=gcon.skip_these_vars)
-    except FileNotFoundError:
-        raise FileNotFoundError("Could not find Dev file: {}".format(dev))
+    except (OSError, IOError, FileNotFoundError) as e:
+        raise e(f"Could not find Ref file: {dev}") from e
 
     # Compute mean of data over the time dimension (if time_mean=True)
     if time_mean:
@@ -1473,9 +1471,15 @@ def make_benchmark_emis_plots(
     # ==================================================================
     if not plot_by_spc_cat and not plot_by_hco_cat:
         if subdst is not None:
-            pdfname = os.path.join(emisdir, "Emissions_{}.pdf".format(subdst))
+            pdfname = os.path.join(
+                emisdir,
+                f"Emissions_{subdst}.pdf"
+            )
         else:
-            pdfname = os.path.join(emisdir, "Emissions.pdf")
+            pdfname = os.path.join(
+                emisdir,
+                "Emissions.pdf"
+            )
 
         compare_single_level(
             refds,
@@ -1538,11 +1542,14 @@ def createfile_hco_cat(c):
             # subdst to the file name (e.g. as for 1-year benchmarks).
             if subdst is not None:
                 pdfname = os.path.join(
-                    emisspcdir, "{}_Emissions_{}.pdf".format(c, subdst)
+                    emisspcdir,
+                    f"{c}_Emissions_{subdst}.pdf"
                 )
             else:
                 pdfname = os.path.join(
-                    emisspcdir, "{}_Emissions.pdf".format(c))
+                    emisspcdir,
+                    f"{c}_Emissions.pdf"
+                )
             diff_dict = {}
             diff_emis = []
             compare_single_level(
@@ -1587,9 +1594,9 @@ def createfile_hco_cat(c):
                 if "emis" in filename:
                     with open(filename, "w+") as f:
                         for c, diff_list in dict_emis.items():
-                            print("* {}: ".format(c), file=f, end="")
+                            print(f"* {c}: ", file=f, end="")
                             for v in diff_list:
-                                print("{} ".format(v), file=f, end="")
+                                print(f"{v} ", file=f, end="")
                             print(file=f)
                         f.close()
 
@@ -1627,9 +1634,7 @@ def createfile_bench_cat(filecat):
             if not varlist:
                 print(
                     "\nWarning: no emissions species in benchmark species" + \
-                    "category {}".format(
-                        filecat
-                    )
+                    f"category {filecat}"
                 )
                 return catspc
 
@@ -1647,11 +1652,14 @@ def createfile_bench_cat(filecat):
             # is needed for the 1-year benchmarks).
             if subdst is not None:
                 pdfname = os.path.join(
-                    catdir, "{}_Emissions_{}.pdf".format(filecat, subdst)
+                    catdir,
+                    f"{filecat}_Emissions_{subdst}.pdf"
                 )
             else:
-                pdfname = os.path.join(catdir, "{}_Emissions.pdf".format(
-                    filecat))
+                pdfname = os.path.join(
+                    catdir,
+                    f"{filecat}_Emissions.pdf"
+                )
             # Create the PDF
             compare_single_level(
                 refds,
@@ -1683,8 +1691,10 @@ def createfile_bench_cat(filecat):
         # category
         for spc in emis_spc:
             if spc not in allcatspc:
-                print("Warning: species {} has emissions diagnostics but is not"
-                      " in benchmark_categories.yml".format(spc))
+                print(\
+                    f"Warning: species {spc} has emissions diagnostics but is not"
+                      " in benchmark_categories.yml"
+                )
 
     # -------------------------------------------
     # Clean up
@@ -1765,14 +1775,8 @@ def make_benchmark_emis_tables(
     # Initialization
     # ==================================================================
 
-    # Create destination folder
-    if os.path.isdir(dst) and not overwrite:
-        msg = "Directory {} exists. Pass overwrite=True to overwrite " \
-            + "files in that directory, if any."
-        msg = msg.format(dst)
-        raise ValueError(msg)
-    if not os.path.isdir(dst):
-        os.mkdir(dst)
+    # Create the destination folder
+    make_directory(dst, overwrite)
 
     # Create the "Tables" category folder if it does not exist
     emisdir = os.path.join(dst, "Tables")
@@ -2016,14 +2020,8 @@ def make_benchmark_jvalue_plots(
     # Initialization
     # ==================================================================
 
-    # Create the destination folder if it does not exist
-    if os.path.isdir(dst) and not overwrite:
-        msg = "Directory {} exists. Pass overwrite=True to overwrite " \
-            + "files in tht directory, if any."
-        msg = msg.format(dst)
-        raise ValueError(msg)
-    if not os.path.isdir(dst):
-        os.mkdir(dst)
+    # Create the directory for output
+    make_directory(dst, overwrite)
 
     # Get the function that will read file(s) into a Dataset
     reader = util.dataset_reader(time_mean, verbose=verbose)
@@ -2031,14 +2029,14 @@ def make_benchmark_jvalue_plots(
     # Ref dataset
     try:
         refds = reader(ref, drop_variables=gcon.skip_these_vars)
-    except FileNotFoundError:
-        raise FileNotFoundError("Could not find Ref file: {}".format(ref))
+    except (OSError, IOError, FileNotFoundError) as e:
+        raise e(f"Could not find Ref file: {ref}") from e
 
     # Dev dataset
     try:
         devds = reader(dev, drop_variables=gcon.skip_these_vars)
-    except FileNotFoundError:
-        raise FileNotFoundError("Could not find Dev file: {}".format(dev))
+    except (OSError, IOError, FileNotFoundError) as e:
+        raise e(f"Could not find Ref file: {dev}") from e
 
     # Compute mean of data over the time dimension (if time_mean=True)
     if time_mean:
@@ -2117,10 +2115,15 @@ def make_benchmark_jvalue_plots(
     # Surface plots
     if "sfc" in plots:
         if subdst is not None:
-            pdfname = os.path.join(jvdir, "{}_Surface_{}.pdf".format(
-                prefix, subdst))
+            pdfname = os.path.join(
+                jvdir,
+                f"{prefix}_Surface_{subdst}.pdf"
+            )
         else:
-            pdfname = os.path.join(jvdir, "{}_Surface.pdf".format(prefix))
+            pdfname = os.path.join(
+                jvdir,
+                f"{prefix}_Surface.pdf"
+            )
 
         diff_sfc = []
         compare_single_level(
@@ -2150,10 +2153,14 @@ def make_benchmark_jvalue_plots(
     # 500hPa plots
     if "500hpa" in plots:
         if subdst is not None:
-            pdfname = os.path.join(jvdir, "{}_500hPa_{}.pdf".format(
-                prefix, subdst))
+            pdfname = os.path.join(
+                jvdir,
+                f"{prefix}_500hPa_{subdst}.pdf"
+            )
         else:
-            pdfname = os.path.join(jvdir, "{}_500hPa.pdf".format(prefix))
+            pdfname = os.path.join(
+                jvdir, f"{prefix}_500hPa.pdf"
+            )
 
         diff_500 = []
         compare_single_level(
@@ -2181,11 +2188,13 @@ def make_benchmark_jvalue_plots(
     if "zonalmean" in plots:
         if subdst is not None:
             pdfname = os.path.join(
-                jvdir, "{}_FullColumn_ZonalMean_{}.pdf".format(prefix, subdst)
+                jvdir,
+                f"{prefix}_FullColumn_ZonalMean_{subdst}.pdf"
             )
         else:
-            pdfname = os.path.join(jvdir, "{}_FullColumn_ZonalMean.pdf".format(
-                prefix))
+            pdfname = os.path.join(
+                jvdir, f"{prefix}_FullColumn_ZonalMean.pdf"
+            )
 
         diff_zm = []
         compare_zonal_mean(
@@ -2215,11 +2224,14 @@ def make_benchmark_jvalue_plots(
         # a range of 1..100 hPa, as per GCSC request. (bmy, 8/13/19)
         if subdst is not None:
             pdfname = os.path.join(
-                jvdir, "{}_Strat_ZonalMean_{}.pdf".format(prefix, subdst)
+                jvdir,
+                f"{prefix}_Strat_ZonalMean_{subdst}.pdf"
             )
         else:
             pdfname = os.path.join(
-                jvdir, "{}_Strat_ZonalMean.pdf".format(prefix))
+                jvdir,
+                f"{prefix}_Strat_ZonalMean.pdf"
+            )
 
         compare_zonal_mean(
             refds,
@@ -2252,7 +2264,7 @@ def make_benchmark_jvalue_plots(
                         with open(filename, "a+") as f:
                             print("* J-Values: ", file=f, end="")
                             for v in diff_sfc:
-                                print("{} ".format(v), file=f, end="")
+                                print(f"{v} ", file=f, end="")
                             print(file=f)
                             f.close()
 
@@ -2261,7 +2273,7 @@ def make_benchmark_jvalue_plots(
                         with open(filename, "a+") as f:
                             print("* J-Values: ", file=f, end="")
                             for v in diff_500:
-                                print("{} ".format(v), file=f, end="")
+                                print(f"{v} ", file=f, end="")
                             print(file=f)
                             f.close()
 
@@ -2270,7 +2282,7 @@ def make_benchmark_jvalue_plots(
                         with open(filename, "a+") as f:
                             print("* J-Values: ", file=f, end="")
                             for v in diff_zm:
-                                print("{} ".format(v), file=f, end="")
+                                print(f"{v} ", file=f, end="")
                             print(file=f)
                             f.close()
 
@@ -2373,14 +2385,8 @@ def make_benchmark_aod_plots(
     # Initialization and also read data
     # ==================================================================
 
-    # Create the destination directory if it does not exist
-    if os.path.isdir(dst) and not overwrite:
-        msg = "Directory {} exists. Pass overwrite=True to overwrite " \
-            + "files in that directory, if any."
-        msg = msg.format(dst)
-        raise ValueError(msg)
-    if not os.path.isdir(dst):
-        os.mkdir(dst)
+    # Create destination plots directory
+    make_directory(dst, overwrite)
 
     # Create the "Aerosols" directory as a subfolder of dst.
     # If subdst is passed, then create a subdirectory of the "Aerosols"
@@ -2402,14 +2408,14 @@ def make_benchmark_aod_plots(
     # Read the Ref dataset
     try:
         refds = reader(ref, drop_variables=gcon.skip_these_vars)
-    except FileNotFoundError:
-        raise FileNotFoundError("Could not find Ref file: {}".format(ref))
+    except (OSError, IOError, FileNotFoundError) as e:
+        raise e(f"Could not find Ref file: {ref}") from e
 
     # Read the Dev dataset
     try:
         devds = reader(dev, drop_variables=gcon.skip_these_vars)
-    except FileNotFoundError:
-        raise FileNotFoundError("Could not find Dev file: {}".format(dev))
+    except (OSError, IOError, FileNotFoundError) as e:
+        raise e(f"Could not find Ref file: {dev}") from e
 
     # Compute mean of data over the time dimension (if time_mean=True)
     if time_mean:
@@ -2533,7 +2539,7 @@ def make_benchmark_aod_plots(
             newname = newvars[v]
             newvarlist.append(newname)
         else:
-            raise ValueError("Could not find a display name for {}".format(v))
+            raise ValueError(f"Could not find a display name for {v}")
 
         # Don't clobber existing DataArray and Dataset attributes
         with xr.set_options(keep_attrs=True):
@@ -2554,10 +2560,15 @@ def make_benchmark_aod_plots(
     # Create the plots
     # ==================================================================
     if subdst is not None:
-        pdfname = os.path.join(aoddir, "Aerosols_ColumnOptDepth_{}.pdf".format(
-            subdst))
+        pdfname = os.path.join(
+            aoddir,
+            f"Aerosols_ColumnOptDepth_{subdst}.pdf"
+        )
     else:
-        pdfname = os.path.join(aoddir, "Aerosols_ColumnOptDepth.pdf")
+        pdfname = os.path.join(
+            aoddir,
+            "Aerosols_ColumnOptDepth.pdf"
+        )
 
     diff_aod = []
     compare_single_level(
@@ -2591,7 +2602,7 @@ def make_benchmark_aod_plots(
                 with open(filename, "a+") as f:
                     print("* Column AOD: ", file=f, end="")
                     for v in diff_aod:
-                        print("{} ".format(v), file=f, end="")
+                        print(f"{v} ", file=f, end="")
                     print(file=f)
                     f.close()
 
@@ -2782,8 +2793,8 @@ def make_benchmark_mass_tables(
         for v in varlist:
             if v not in commonspc:
                 raise ValueError(
-                    '{} folder error: Variable {} in varlist passed to make_benchmark_mass_tables ' + \
-                    'is not present in ref and dev datasets'.format(dst, v))
+                    f"{dst} folder error: Variable {v} in varlist passed to make_benchmark_mass_tables is not present in Ref and Dev datasets"
+                )
     else:
         varlist = commonspc
 
@@ -2815,7 +2826,7 @@ def make_benchmark_mass_tables(
     # Create global mass table
     # ==================================================================
     if subdst is not None:
-        mass_filename = "GlobalMass_TropStrat_{}.txt".format(subdst)
+        mass_filename = f"GlobalMass_TropStrat_{subdst}.txt"
     else:
         mass_filename = "GlobalMass_TropStrat.txt"
     mass_file = os.path.join(dst, mass_filename)
@@ -2836,7 +2847,7 @@ def make_benchmark_mass_tables(
     # Create tropospheric mass table
     # ==================================================================
     if subdst is not None:
-        mass_filename = 'GlobalMass_Trop_{}.txt'.format(subdst)
+        mass_filename = f"GlobalMass_Trop_{subdst}.txt"
     else:
         mass_filename = 'GlobalMass_Trop.txt'
     mass_file = os.path.join(dst, mass_filename)
@@ -2905,13 +2916,7 @@ def make_benchmark_oh_metrics(
     # ==================================================================
     # Define destination directory
     # ==================================================================
-    if os.path.isdir(dst) and not overwrite:
-        msg = "Directory {} exists. Pass overwrite=True to overwrite " \
-            + "files in that directory, if any."
-        msg = msg.format(dst)
-        raise ValueError(msg)
-    if not os.path.isdir(dst):
-        os.makedirs(dst)
+    make_directory(dst, overwrite)
 
     # ==================================================================
     # Read data from netCDF into Dataset objects
@@ -3047,14 +3052,12 @@ def make_benchmark_oh_metrics(
 
     def print_metrics_to_file(f, title1, title2, ref, dev, diff, pctdiff):
         print("#" * 79, file=f)
-        print("{}{}".format(title1.ljust(76), "###"), file=f)
-        print("{}{}".format(title2.ljust(76), "###"), file=f)
+        print(f"{title1 : <76}{'###'}", file=f)
+        print(f"{title2 : <76}{'###'}", file=f)
         print("#" * 79, file=f)
-        print("{}{}{}{}".format("  Ref".ljust(15),
-                                "Dev".ljust(13), "Dev - Ref".ljust(13),
-                                "% diff".ljust(11),), file=f)
-        print("{:11.6f}  {:11.6f}  {:11.6f}  {:9.4f}".format(ref, dev, diff,
-                                                             pctdiff), file=f,)
+        print("'{Ref' : <15}{'Dev' : <13}{'Dev - Ref` : <13}{'% diff' : <11}",
+              file=f)
+        print("{ref:11.6f}  {dev:11.6f}  {diff:11.6f}  {pctdiff:9.4f}", file=f)
 
     # ==================================================================
     # Print metrics to file
@@ -3066,19 +3069,19 @@ def print_metrics_to_file(f, title1, title2, ref, dev, diff, pctdiff):
 
     # Write mean OH
     title1 = "### Global mass-weighted OH concentration [1e5 molec/cm3]"
-    title2 = "### Ref = {}; Dev = {}".format(refstr, devstr)
+    title2 = f"### Ref = {refstr}; Dev = {devstr}"
     print_metrics_to_file(f, title1, title2, ref_mean_oh, dev_mean_oh,
                           oh_diff, oh_pctdiff)
 
     # Write MCF lifetime
     title1 = "### MCF lifetime w/r/t tropospheric OH [years]"
-    title2 = "### Ref = {}; Dev = {}".format(refstr, devstr)
+    title2 = f"### Ref = {refstr}; Dev = {devstr}"
     print_metrics_to_file(f, title1, title2, ref_mcf_lifetime,
                           dev_mcf_lifetime, mcf_diff, mcf_pctdiff)
 
     # Write CH4 lifetime
     title1 = "### CH4 lifetime w/r/t tropospheric OH [years]"
-    title2 = "### Ref = {}; Dev = {}".format(refstr, devstr)
+    title2 = f"### Ref = {refstr}; Dev = {devstr}"
     print_metrics_to_file(f, title1, title2, ref_ch4_lifetime,
                           dev_ch4_lifetime, ch4_diff, ch4_pctdiff)
 
@@ -3183,14 +3186,8 @@ def make_benchmark_wetdep_plots(
             Default value: False
     """
 
-    #  Make sure destination directory exists
-    if os.path.isdir(dst) and not overwrite:
-        msg = "Directory {} exists. Pass overwrite=True to overwrite " \
-            + "files in that directory, if any."
-        msg = msg.format(dst)
-        raise ValueError(msg)
-    if not os.path.isdir(dst):
-        os.mkdir(dst)
+    # Create destination plot directory
+    make_directory(dst, overwrite)
 
     # Make a collection subdirectory
     targetdst = os.path.join(dst, collection)
@@ -3242,9 +3239,9 @@ def make_benchmark_wetdep_plots(
     # Surface plots
     if "sfc" in plots:
         if datestr is not None:
-            plotfilename = "{}_Surface_{}.pdf".format(collection, datestr)
+            plotfilename = f"{collection}_Surface_{datestr}.pdf"
         else:
-            plotfilename = "{}_Surface.pdf".format(collection)
+            plotfilename = f"{collection}_Surface.pdf"
         pdfname = os.path.join(targetdst, plotfilename)
         compare_single_level(
             refds,
@@ -3272,9 +3269,9 @@ def make_benchmark_wetdep_plots(
     # 500 hPa plots
     if "500hpa" in plots:
         if datestr is not None:
-            plotfilename = "{}_500hPa_{}.pdf".format(collection, datestr)
+            plotfilename = f"{collection}_500hPa_{datestr}.pdf"
         else:
-            plotfilename = "{}_500hPa.pdf".format(collection)
+            plotfilename = f"{collection}_500hPa.pdf"
         pdfname = os.path.join(targetdst, plotfilename)
         compare_single_level(
             refds,
@@ -3305,12 +3302,9 @@ def make_benchmark_wetdep_plots(
 
         # Full column
         if datestr is not None:
-            plotfilename = "{}_FullColumn_ZonalMean_{}.pdf".format(
-                collection,
-                datestr
-            )
+            plotfilename = f"{collection}_FullColumn_ZonalMean_{datestr}.pdf"
         else:
-            plotfilename = "{}_FullColumn_ZonalMean.pdf".format(collection)
+            plotfilename = f"{collection}_FullColumn_ZonalMean.pdf"
         pdfname = os.path.join(targetdst, plotfilename)
         compare_zonal_mean(
             refds,
@@ -3337,12 +3331,9 @@ def make_benchmark_wetdep_plots(
 
         # Stratosphere
         if datestr is not None:
-            plotfilename = "{}_Strat_ZonalMean_{}.pdf".format(
-                collection,
-                datestr
-            )
+            plotfilename = f"{collection}_Strat_ZonalMean_{datestr}.pdf"
         else:
-            plotfilename = "{}_Strat_ZonalMean.pdf".format(collection)
+            plotfilename = f"{collection}_Strat_ZonalMean.pdf"
         pdfname = os.path.join(targetdst, plotfilename)
         compare_zonal_mean(
             refds,
@@ -3426,13 +3417,8 @@ def make_benchmark_aerosol_tables(
 
     """
 
-    # Create the plot directory hierarchy if it doesn't already exist
-    if os.path.isdir(dst) and not overwrite:
-        err_str = "Pass overwrite=True to overwrite files in that directory"
-        print("Directory {} exists. {}".format(dst, err_str))
-        return
-    if not os.path.isdir(dst):
-        os.makedirs(dst)
+    # Create destination directory
+    make_directory(dst, overwrite)
 
     # List of species (and subsets for the trop & strat)
     species_list = ["BCPI", "OCPI", "SO4", "DST1", "SALA", "SALC"]
@@ -3557,7 +3543,7 @@ def print_aerosol_metrics(data, species_list, filename, title, label):
 
             # Print top header
             print("%" * 79, file=f)
-            print(" {} for {} in {}".format(title, year, devstr), file=f)
+            print(f" {title} for {year} in {devstr}")
             print(" (weighted by the number of days per month)", file=f)
             print("%" * 79, file=f)
             line = "\n" + " " * 40 + "Strat         Trop         Strat+Trop\n"
@@ -3566,13 +3552,7 @@ def print_aerosol_metrics(data, species_list, filename, title, label):
 
             # Print data
             for spc in species_list:
-                line = "{} ({}) {} :  {:11.9f}   {:10.8f}   {:10.8f}\n".format(
-                    spc2name[spc].ljust(17),
-                    spc.ljust(4),
-                    label,
-                    data[spc + "_s"],
-                    data[spc + "_t"],
-                    data[spc + "_f"])
+                line = f"{spc2name[spc] : <17} ({spc : <4}) {label} :  {data[spc + '_s']:11.9f}   {data[spc + '_t']:10.8f}   {data[spc + '_f']:10.8f}\n"
                 print(line, file=f)
 
     # --------------------------------------
@@ -3580,11 +3560,11 @@ def print_aerosol_metrics(data, species_list, filename, title, label):
     # --------------------------------------
 
     # Table info
-    filename = "{}/Aerosol_Burdens.txt".format(dst)
+    filename = f"{dst}/Aerosol_Burdens.txt"
     if n_mon == 12:
         title = "Annual average global aerosol burdens"
     else:
-        title = "Average global aerosol burdens across {} months".format(n_mon)
+        title = f"Average global aerosol burdens across {n_mon} months"
     label = "burden [Tg]"
 
     # Initialize
@@ -3626,11 +3606,11 @@ def print_aerosol_metrics(data, species_list, filename, title, label):
     # -------------------------------------------
 
     # Table info
-    filename = "{}/Global_Mean_AOD.txt".format(dst)
+    filename = f"{dst}/Global_Mean_AOD.txt"
     if n_mon == 12:
         title = "Annual average global AODs"
     else:
-        title = "Average global AODs across {} months".format(n_mon)
+        title = f"Average global AODs across {n_mon} months"
     label = "mean AOD [1]"
 
     # Initialize
@@ -3769,7 +3749,7 @@ def make_benchmark_operations_budget(
     # Print info. Only allow Strat if Trop and Full are present
     print("Column sections:")
     for col_section in col_sections:
-        print("  {}".format(col_section))
+        print(f"  {col_section}")
     n_sections = len(col_sections)
     compute_strat = False
     if "Strat" in col_sections:
@@ -3800,7 +3780,7 @@ def make_benchmark_operations_budget(
     # Print info
     print("Operations:")
     for all_operation in all_operations:
-        print("  {}".format(all_operation))
+        print(f"  {all_operation}")
     if compute_accum:
         if "ACCUMULATION" in all_operations:
             print("*** Will compute ACCUMULATION operation as sum of all "
@@ -3966,8 +3946,7 @@ def make_benchmark_operations_budget(
 
             # Keep track of progress
             if (i + 1) % 50 == 0:
-                print('  {}: species {} of {}'.format(gc_section, i + 1,
-                                                      n_spc))
+                print(f"  {gc_section}: species {i + 1} of {n_spc}")
 
             # Loop over operations (only those with data in files)
             for gc_operation in gc_operations:
@@ -4031,7 +4010,7 @@ def make_benchmark_operations_budget(
 
             # Keep track of progress
             if (i + 1) % 50 == 0:
-                print('  Strat: species {} of {}'.format(i + 1, n_spc))
+                print(f"  Strat: species {i + 1} of {n_spc}")
 
             # Loop over operations (only those with data in files)
             for gc_operation in gc_operations:
@@ -4096,8 +4075,7 @@ def make_benchmark_operations_budget(
 
                 # Keep track of progress
                 if (i + 1) % 50 == 0:
-                    print('  {}: species {} of {}'.
-                          format(col_section, i + 1, n_spc))
+                    print(f"  {col_section}: species {i + 1} of {n_spc}")
 
                 # Get the accumulation dataframe row to fill.Skip if not found.
                 dfrow = (df["Column_Section"] == col_section) \
@@ -4142,25 +4120,19 @@ def make_benchmark_operations_budget(
     # ------------------------------------------
 
     # Create the target output directory hierarchy if it doesn't already exist
-    if os.path.isdir(dst) and not overwrite:
-        msg = "Directory {} exists. ".format(dst)
-        msg += "Pass overwrite=True to overwrite files in that directory"
-        raise ValueError(msg)
-    if not os.path.isdir(dst):
-        os.makedirs(dst)
+    make_directory(dst, overwrite)
 
     # Print budgets to file
     if label is not None:
-        filename = "{}/Budgets_After_Operations_{}.txt".format(dst, label)
+        filename = f"{dst}/Budgets_After_Operations_{label}.txt"
     else:
-        filename = "{}/Budgets_After_Operations.txt".format(dst)
+        filename = f"{dst}/Budgets_After_Operations.txt".format(dst)
     with open(filename, "w+") as f:
         print("#" * 78, file=f)
         if label is not None and benchmark_type is not None:
-            print("{} budgets for {}".format(benchmark_type, label),
-                  file=f)
+            print(f"{benchmark_type} budgets for {label}", file=f)
         else:
-            print("Budgets across {}/{} sec".format(ref_interval, dev_interval), file=f)
+            print(f"Budgets across {ref_interval}/{dev_interval} sec", file=f)
         print("\n", file=f)
         print("NOTES:", file=f)
         msg = " - When using the non-local mixing scheme (default), "\
@@ -4177,8 +4149,7 @@ def make_benchmark_operations_budget(
 
         # Loop over species
         for i, spc in enumerate(spclist):
-            print("{} budgets (Ref={}; Dev={})".format(
-                spc, refstr, devstr), file=f)
+            print(f"{spc} budgets (Ref={refstr}; Dev={devstr})", file=f)
 
             # Print a table for each column section
             for col_section in col_sections:
@@ -4191,23 +4162,20 @@ def make_benchmark_operations_budget(
                     continue
 
                 # Print dataframe subset to file
-                print(
-                    "{} {} : {}".format(
-                        col_section,
-                        units[spc],
-                        spc),
-                    file=f)
-                print(tabulate(df.loc[dfrows, ["Operation",
-                                               "Ref",
-                                               "Dev",
-                                               "Diff",
-                                               "Pct_diff"]],
-                               headers='keys',
-                               tablefmt='psql',
-                               showindex=False,
-                               floatfmt=(".5f", ".5f", ".5f", ".5f", ".5f"),
-                               ), file=f
-                      )
+                print(f"{col_section} {units[spc]} : {spc}", file=f)
+                print(tabulate(
+                    df.loc[dfrows,
+                           ["Operation",
+                            "Ref",
+                            "Dev",
+                            "Diff",
+                            "Pct_diff"]],
+                    headers='keys',
+                    tablefmt='psql',
+                    showindex=False,
+                    floatfmt=(".5f", ".5f", ".5f", ".5f", ".5f"),
+                ), file=f
+            )
             print("\n", file=f)
 
     # ------------------------------------------
@@ -4264,15 +4232,11 @@ def make_benchmark_mass_conservation_table(
     """
 
     # ==================================================================
-    # Define destination directory
+    # Initialize
     # ==================================================================
-    if os.path.isdir(dst) and not overwrite:
-        msg = "Directory {} exists. Pass overwrite=True to overwrite " \
-            + "files in that directory, if any."
-        msg = msg.format(dst)
-        raise ValueError(msg)
-    if not os.path.isdir(dst):
-        os.makedirs(dst)
+
+    # Create the destination folder
+    make_directory(dst, overwrite)
 
     # Load a YAML file containing species properties (such as
     # molecular weights), which we will need for unit conversions.
@@ -4386,14 +4350,14 @@ def make_benchmark_mass_conservation_table(
         print(' ' + '-' * 10 + '  ' + '-' * 16, file=f)
         #masses
         for i in range(len(masses)):
-            print(' {}  {:11.13f}'.format(dates[i], masses[i]), file=f)
+            print(f" {dates[i]}  {masses[i] : 11.13f}", file=f)
         print(' ', file=f)
         print(' Summary', file=f)
         print(' ' + '-' * 30, file=f)
-        print(' Max mass =  {:2.13f} Tg'.format(max_mass), file=f)
-        print(' Min mass =  {:2.13f} Tg'.format(min_mass), file=f)
-        print(' Abs diff =  {:>16.3f} g'.format(absdiff), file=f)
-        print(' Pct diff =  {:>16.10f} %'.format(pctdiff), file=f)
+        print(f" Max mass =  {max_mass : 2.13f} Tg")
+        print(f" Min mass =  {min_mass : 2.13f} Tg")
+        print(f" Abs diff =  {absdiff : >16.3f} g")
+        print(f" Pct diff =  {pctdiff : >16.10f} %")
 
     gc.collect()
 
@@ -4437,9 +4401,8 @@ def get_species_database_dir(config):
         msg = f"Using species database {spcdb_dir}/species_database.yml"
         print(msg)
         return spcdb_dir
-    else:
-        msg = f"Could not find the {spcdb_dir}/species_database.yml file!"
-        raise FileNotFoundError(msg)
+    msg = f"Could not find the {spcdb_dir}/species_database.yml file!"
+    raise FileNotFoundError(msg)
 
 
 def create_benchmark_summary_table(
@@ -4454,7 +4417,6 @@ def create_benchmark_summary_table(
         overwrite=False,
         outfilename="Summary.txt",
         verbose=False,
-        spcdb_dir=os.path.dirname(__file__),
         ref_gchp=False,
         dev_gchp=False
 ):
@@ -4528,17 +4490,12 @@ def create_benchmark_summary_table(
         f = open(os.path.join(dst, outfilename), "w")
     except (IOError, OSError, FileNotFoundError) as e:
         msg = f"Could not open {outfilename} for writing!"
-        raise e(msg)
+        raise e(msg) from e
 
     # Title strings
-    title1 = f"### Benchmark summary table"
+    title1 = "### Benchmark summary table"
     title2 = f"### Ref = {refstr}; Dev = {devstr}"
 
-    # Write a placeholder to the file that denotes where
-    # the list of species with differences will be written
-    placeholder = "@%% insert diff_list here %%@"
-    title4 = f"{placeholder}"
-
     # Print header to file
     print("#" * 80, file=f)
     print(f"{title1 : <77}{'###'}", file=f)
@@ -4561,9 +4518,6 @@ def create_benchmark_summary_table(
         verbose=verbose
     )
 
-    # Make a directory to store the list of species that differ
-    diff_dict = {}
-
     # Loop over diagnostic files
     for col in collections:
 
@@ -4621,7 +4575,6 @@ def create_benchmark_summary_table(
             print(f"{col}: {devstr} is identical to {refstr}", file=f)
             print(file=f)
         else:
-            c = 0
             print("-" *  79, file=f)
             print(f"{col}: {devstr} differs from {refstr}", file=f)
             print("\n  Diagnostics that differ", file=f)
@@ -4696,4 +4649,4 @@ def make_directory(
         raise ValueError(msg)
 
     if not os.path.isdir(dir_name):
-        os.mkdir(dir_name)
+        os.makedirs(dir_name)
diff --git a/gcpy/util.py b/gcpy/util.py
index 7f91231c..aef8c534 100644
--- a/gcpy/util.py
+++ b/gcpy/util.py
@@ -6,11 +6,11 @@
 import os
 import warnings
 import shutil
+from textwrap import wrap
 from yaml import safe_load as yaml_safe_load
 import numpy as np
 import xarray as xr
 from PyPDF2 import PdfFileWriter, PdfFileReader
-from textwrap import wrap
 
 def convert_lon(
         data,
@@ -316,7 +316,7 @@ def get_species_categories(
     """
     spc_cat_dict = read_config_file(
         os.path.join(
-            os.path.dirname(__file__), 
+            os.path.dirname(__file__),
             "benchmark_categories.yml"
         )
     )
@@ -1009,7 +1009,7 @@ def convert_bpch_names_to_netcdf_names(
         os.path.join(
             os.path.dirname(__file__),
             "bpch_to_nc_names.yml"
-        ).
+        ),
         quiet=True
     )
 
@@ -1167,7 +1167,7 @@ def convert_bpch_names_to_netcdf_names(
     if verbose:
         print("\nList of bpch names and netCDF names")
         for key in old_to_new:
-            print("{} ==> {}".format(key.ljust(25), old_to_new[key].ljust(40)))
+            print(f"{key : <25} ==> {old_to_new[key] : <40}")
 
     # Rename the variables in the dataset
     if verbose:
@@ -1189,7 +1189,7 @@ def get_lumped_species_definitions():
     """
     return read_config_file(
         os.path.join(
-            os.path.dirname(__file__), 
+            os.path.dirname(__file__),
             "lumped_species.yml"
         )
     )
@@ -1211,7 +1211,7 @@ def archive_lumped_species_definitions(
     src = os.path.join(os.path.dirname(__file__), lumped_spc)
     copy = os.path.join(dst, lumped_spc)
     if not os.path.exists(copy):
-        print("\nArchiving {} in {}".format(lumped_spc, dst))
+        print(f"\nArchiving {lumped_spc} in {dst}")
         shutil.copyfile(src, copy)
 
 
From 636a8debfcdbee2478872fd5126d05fa07156db3 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Thu, 9 Feb 2023 11:05:32 -0500
Subject: [PATCH 27/54] Clean up gcpy/plot.py

gcpy/plot.py
- Convert strings with .format() to f-strings
- Trimmed whitespace
- Removed unnecessary else after return statements & de-indented
- Use isinstance to check if an object is of a certain type
- Use the proper import order
- Added docstring where missing
- Implemented other suggestions from pylint

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 gcpy/plot.py | 261 ++++++++++++++++++++++++---------------------------
 1 file changed, 122 insertions(+), 139 deletions(-)

diff --git a/gcpy/plot.py b/gcpy/plot.py
index b9102073..de89ac4d 100644
--- a/gcpy/plot.py
+++ b/gcpy/plot.py
@@ -1,5 +1,11 @@
+"""
+Module containing functions for creating plots
+"""
 import os
 import copy
+import warnings
+from multiprocessing import current_process
+from tempfile import TemporaryDirectory
 import matplotlib as mpl
 import matplotlib.colors as mcolors
 import matplotlib.pyplot as plt
@@ -8,6 +14,7 @@
 import xarray as xr
 import cartopy.crs as ccrs
 from matplotlib.backends.backend_pdf import PdfPages
+from joblib import Parallel, delayed
 from PyPDF2 import PdfFileMerger
 from .grid import get_vert_grid, get_pressure_indices, \
     pad_pressure_edges, convert_lev_to_pres, get_grid_extents, call_make_grid, \
@@ -18,11 +25,6 @@
     all_zero_or_nan, slice_by_lev_and_time, compare_varnames, read_config_file
 from .units import check_units, data_unit_is_mol_per_mol
 from .constants import MW_AIR_g
-from joblib import Parallel, delayed
-from multiprocessing import current_process
-from tempfile import TemporaryDirectory
-import warnings
-import copy
 
 # Save warnings format to undo overwriting built into PyPDF2
 _warning_format = warnings.showwarning
@@ -76,7 +78,7 @@ def six_plot(
 
     Args:
         subplot: str
-            Type of plot to create (ref, dev, absolute difference or 
+            Type of plot to create (ref, dev, absolute difference or
             fractional difference)
         all_zero: bool
             Set this flag to True if the data to be plotted consist only of zeros
@@ -87,7 +89,7 @@ def six_plot(
         grid: dict
             Dictionary mapping plot_val to plottable coordinates
         ax: matplotlib axes
-            Axes object to plot information. Will create a new axes 
+            Axes object to plot information. Will create a new axes
             if none is passed.
         rowcol: tuple
             Subplot position in overall Figure
@@ -102,7 +104,7 @@ def six_plot(
         masked_data: numpy array
             Masked area for cubed-sphere plotting
         other_all_nan: bool
-            Set this flag to True if plotting ref/dev and the other of ref/dev 
+            Set this flag to True if plotting ref/dev and the other of ref/dev
             is all nan
         gridtype: str
             "ll" for lat/lon or "cs" for cubed-sphere
@@ -113,7 +115,7 @@ def six_plot(
         use_cmap_RdBu: bool
             Set this flag to True to use a blue-white-red colormap
         match_cbar: bool
-            Set this flag to True if you are plotting with the same colorbar 
+            Set this flag to True if you are plotting with the same colorbar
             for ref and dev
         verbose: bool
             Set this flag to True to enable informative printout.
@@ -128,7 +130,7 @@ def six_plot(
             Indices where edge pressure values are within a given pressure range
             Default value: np.full((1,1), -1)
         log_yaxis: bool
-            Set this flag to True to enable log scaling of pressure in zonal 
+            Set this flag to True to enable log scaling of pressure in zonal
             mean plots
             Default value: False
         xtick_positions: list of float
@@ -216,7 +218,7 @@ def six_plot(
             else:
                 [vmin, vmax] = [0.5, 2]
     if verbose:
-        print("Subplot ({}) vmin, vmax: {}, {}".format(rowcol, vmin, vmax))
+        print(f"Subplot ({rowcol}) vmin, vmax: {vmin}, {vmax}")
 
     # Normalize colors (put into range [0..1] for matplotlib methods)
     if subplot in ("ref", "dev"):
@@ -431,12 +433,12 @@ def compare_single_level(
             Default value: None
         extent: list
             Defines the extent of the region to be plotted in form
-            [minlon, maxlon, minlat, maxlat]. 
+            [minlon, maxlon, minlat, maxlat].
             Default value plots extent of input grids.
             Default value: [-1000, -1000, -1000, -1000]
         n_job: int
             Defines the number of simultaneous workers for parallel plotting.
-            Set to 1 to disable parallel plotting. 
+            Set to 1 to disable parallel plotting.
             Value of -1 allows the application to decide.
             Default value: -1
         sigdiff_list: list of str
@@ -444,22 +446,22 @@ def compare_single_level(
             differences (where |max(fractional difference)| > 0.1).
             Default value: []
         second_ref: xarray Dataset
-            A dataset of the same model type / grid as refdata, 
+            A dataset of the same model type / grid as refdata,
             to be used in diff-of-diffs plotting.
             Default value: None
         second_dev: xarray Dataset
-            A dataset of the same model type / grid as devdata, 
+            A dataset of the same model type / grid as devdata,
             to be used in diff-of-diffs plotting.
             Default value: None
         spcdb_dir: str
             Directory containing species_database.yml file.
             Default value: Path of GCPy code repository
         sg_ref_path: str
-            Path to NetCDF file containing stretched-grid info 
+            Path to NetCDF file containing stretched-grid info
             (in attributes) for the ref dataset
             Default value: '' (will not be read in)
         sg_dev_path: str
-            Path to NetCDF file containing stretched-grid info 
+            Path to NetCDF file containing stretched-grid info
             (in attributes) for the dev dataset
             Default value: '' (will not be read in)
         ll_plot_func: str
@@ -559,7 +561,7 @@ def compare_single_level(
     # Get lat/lon extents, if applicable
     refminlon, refmaxlon, refminlat, refmaxlat = get_grid_extents(refgrid)
     devminlon, devmaxlon, devminlat, devmaxlat = get_grid_extents(devgrid)
-    
+
     if -1000 not in extent:
         cmpminlon, cmpmaxlon, cmpminlat, cmpmaxlat = extent
     else:
@@ -576,7 +578,7 @@ def compare_single_level(
              np.min([uniform_refmaxlon, uniform_devmaxlon]),
              np.max([refminlat, devminlat]),
              np.min([refmaxlat, devmaxlat])]
-    
+
     # Set plot bounds for non cubed-sphere regridding and plotting
     ref_extent = (refminlon, refmaxlon, refminlat, refmaxlat)
     dev_extent = (devminlon, devmaxlon, devminlat, devmaxlat)
@@ -670,10 +672,10 @@ def compare_single_level(
             if refmet is None or devmet is None:
                 msg = "Met mata ust be passed to convert units to ug/m3."
                 raise ValueError(msg)
-            elif normalize_by_area:
+            if normalize_by_area:
                 msg = "Normalizing by area is not allowed if plotting ug/m3"
                 raise ValueError(msg)
-            elif ds_refs[i].units != "ppb" or ds_devs[i].units != "ppb":
+            if ds_refs[i].units != "ppb" or ds_devs[i].units != "ppb":
                 msg = "Units must be mol/mol if converting to ug/m3."
                 raise ValueError(msg)
 
@@ -705,15 +707,15 @@ def compare_single_level(
                 if spc_name in ["Simple_SOA", "Complex_SOA"]:
                     spc_mw_g = 150.0
                 else:
-                    msg = "No properties found for {}. Cannot convert" \
+                    msg = f"No properties found for {spc_name}. Cannot convert" \
                           + " to ug/m3."
-                    raise ValueError(msg.format(spc_name))
+                    raise ValueError(msg)
             else:
                 spc_mw_g = species_properties.get("MW_g")
                 if spc_mw_g is None:
-                    msg = "Molecular weight not found for species {}!" \
+                    msg = f"Molecular weight not found for species {spc_name}!" \
                           + " Cannot convert to ug/m3."
-                    raise ValueError(msg.format(spc_name))
+                    raise ValueError(msg)
 
             # Convert values from ppb to ug/m3:
             # ug/m3 = mol/mol * mol/g air * kg/m3 air * 1e3g/kg
@@ -808,8 +810,8 @@ def compare_single_level(
     else:
         regional_cmp_extent = [-180, 180, -90, 90]
 
-    regional_cmp_grid = call_make_grid(cmpres, cmpgridtype, 
-                                       in_extent=[-180,180,-90,90], 
+    regional_cmp_grid = call_make_grid(cmpres, cmpgridtype,
+                                       in_extent=[-180,180,-90,90],
                                        out_extent=regional_cmp_extent)[0]
 
     # Get comparison data extents in same midpoint format as lat-lon grid.
@@ -914,7 +916,7 @@ def createfig(ivar, temp_dir=''):
         warnings.filterwarnings('ignore', category=UserWarning)
 
         if savepdf and verbose:
-            print("{} ".format(ivar), end="")
+            print(f"{ivar} ", end="")
         varname = varlist[ivar]
 
         ds_ref = ds_refs[ivar]
@@ -931,9 +933,9 @@ def createfig(ivar, temp_dir=''):
             exclude_list = ["WetLossConvFrac", "Prod_", "Loss_"]
             if not any(s in varname for s in exclude_list):
                 if "/" in cmn_units:
-                    cmn_units = "{}/m2".format(cmn_units)
+                    cmn_units = f"{cmn_units}/m2"
                 else:
-                    cmn_units = "{} m-2".format(cmn_units)
+                    cmn_units = f"{cmn_units} m-2"
                 ds_ref.attrs["units"] = cmn_units
                 ds_dev.attrs["units"] = cmn_units
                 subtitle_extra = ", Normalized by Area"
@@ -959,9 +961,9 @@ def createfig(ivar, temp_dir=''):
         if cmpgridtype == "cs":
             def call_reshape(cmp_data):
                 new_data = None
-                if type(cmp_data) == xr.DataArray:
+                if isinstance(cmp_data, xr.DataArray):
                     new_data = cmp_data.data.reshape(6, cmpres, cmpres)
-                elif type(cmp_data) == np.ndarray:
+                elif isinstance(cmp_data, np.ndarray):
                     new_data = cmp_data.reshape(6, cmpres, cmpres)
                 return new_data
 
@@ -1027,11 +1029,11 @@ def get_extent_for_colors(ds, minlon, maxlon, minlat, maxlat):
             vmax_ref_cmp = float(np.nanmax(ds_ref_cmp))
             vmin_dev_cmp = float(np.nanmin(ds_dev_cmp))
             vmax_dev_cmp = float(np.nanmax(ds_dev_cmp))
-            vmin_cmp = np.nanmin([vmin_ref_cmp, vmin_dev_cmp])
-            vmax_cmp = np.nanmax([vmax_ref_cmp, vmax_dev_cmp])
-        else:
-            vmin_cmp = np.nanmin([np.nanmin(ds_ref_cmp), np.nanmin(ds_dev_cmp)])
-            vmax_cmp = np.nanmax([np.nanmax(ds_ref_cmp), np.nanmax(ds_dev_cmp)])
+#            vmin_cmp = np.nanmin([vmin_ref_cmp, vmin_dev_cmp])
+#            vmax_cmp = np.nanmax([vmax_ref_cmp, vmax_dev_cmp])
+#        else:
+#            vmin_cmp = np.nanmin([np.nanmin(ds_ref_cmp), np.nanmin(ds_dev_cmp)])
+#            vmax_cmp = np.nanmax([np.nanmax(ds_ref_cmp), np.nanmax(ds_dev_cmp)])
 
         # Get overall min & max
         vmin_abs = np.nanmin([vmin_ref, vmin_dev])#, vmin_cmp])
@@ -1129,13 +1131,13 @@ def get_extent_for_colors(ds, minlon, maxlon, minlat, maxlat):
                 levstr = "Level " + str(ilev - 1)
             if extra_title_txt is not None:
                 figs.suptitle(
-                    "{}, {} ({})".format(varname, levstr, extra_title_txt),
+                    f"{varname}, {levstr} ({extra_title_txt})",
                     fontsize=fontsize,
                     y=offset,
                 )
             else:
                 figs.suptitle(
-                    "{}, {}".format(varname, levstr),
+                    f"{varname}, {levstr}",
                     fontsize=fontsize, y=offset
                 )
         elif (
@@ -1146,17 +1148,17 @@ def get_extent_for_colors(ds, minlon, maxlon, minlat, maxlat):
         ):
             if extra_title_txt is not None:
                 figs.suptitle(
-                    "{} ({})".format(varname, extra_title_txt),
+                    f"{varname} ({extra_title_txt})",
                     fontsize=fontsize,
                     y=offset,
                 )
             else:
                 figs.suptitle(
-                    "{}".format(varname),
+                    f"{varname}",
                     fontsize=fontsize,
                     y=offset)
         else:
-            print("Incorrect dimensions for {}!".format(varname))
+            print(f"Incorrect dimensions for {varname}!")
 
         # ==============================================================
         # Set colormaps for data plots
@@ -1196,46 +1198,38 @@ def get_extent_for_colors(ds, minlon, maxlon, minlat, maxlat):
         # ==============================================================
 
         if refgridtype == "ll":
-            ref_title = "{} (Ref){}\n{}".format(refstr, subtitle_extra, refres)
+            ref_title = f"{refstr} (Ref){subtitle_extra}\n{refres}"
         else:
-            ref_title = "{} (Ref){}\nc{}".format(
-                refstr, subtitle_extra, refres)
+            ref_title = f"{refstr} (Ref){subtitle_extra}\nc{refres}"
 
         if devgridtype == "ll":
-            dev_title = "{} (Dev){}\n{}".format(devstr, subtitle_extra, devres)
+            dev_title = f"{devstr} (Dev){subtitle_extra}\n{devres}"
         else:
-            dev_title = "{} (Dev){}\nc{}".format(
-                devstr, subtitle_extra, devres)
-
+            dev_title = f"{devstr} (Dev){subtitle_extra}\nc{devres}"
         if regridany:
             absdiff_dynam_title = \
-                "Difference ({})\nDev - Ref, Dynamic Range".format(cmpres)
+                f"Difference ({cmpres})\nDev - Ref, Dynamic Range"
             absdiff_fixed_title = \
-                "Difference ({})\nDev - Ref, Restricted Range [5%,95%]".\
-                format(cmpres)
+                f"Difference ({cmpres})\nDev - Ref, Restricted Range [5%,95%]"
             if diff_of_diffs:
                 fracdiff_dynam_title = \
-                    "Difference ({}), Dynamic Range\n{} - {}".\
-                    format(cmpres, frac_devstr, frac_refstr)
+                    f"Difference ({cmpres}), Dynamic Range\n{frac_devstr} - {frac_refstr}"
                 fracdiff_fixed_title = \
-                    "Difference ({}), Restricted Range [5%,95%]\n{} - {}".\
-                    format(cmpres, frac_devstr, frac_refstr)
+                    f"Difference ({cmpres}), Restricted Range [5%,95%]\n{frac_devstr} - {frac_refstr}"
             else:
                 fracdiff_dynam_title = \
-                    "Ratio ({})\nDev/Ref, Dynamic Range".format(cmpres)
+                    f"Ratio ({cmpres})\nDev/Ref, Dynamic Range"
                 fracdiff_fixed_title = \
-                    "Ratio ({})\nDev/Ref, Fixed Range".format(cmpres)
+                    f"Ratio ({cmpres})\nDev/Ref, Fixed Range"
         else:
             absdiff_dynam_title = "Difference\nDev - Ref, Dynamic Range"
             absdiff_fixed_title = \
                 "Difference\nDev - Ref, Restricted Range [5%,95%]"
             if diff_of_diffs:
                 fracdiff_dynam_title = \
-                    "Difference, Dynamic Range\n{} - {}".\
-                    format(frac_devstr, frac_refstr)
+                    f"Difference, Dynamic Range\n{frac_devstr} - {frac_refstr}"
                 fracdiff_fixed_title = \
-                    "Difference, Restricted Range [5%,95%]\n{} - {}".\
-                    format(frac_devstr, frac_refstr)
+                    "Difference, Restricted Range [5%,95%]\n{frac_devstr} - {frac_refstr}"
             else:
                 fracdiff_dynam_title = "Ratio \nDev/Ref, Dynamic Range"
                 fracdiff_fixed_title = "Ratio \nDev/Ref, Fixed Range"
@@ -1286,7 +1280,7 @@ def get_extent_for_colors(ds, minlon, maxlon, minlat, maxlat):
                        plot_extent[:], plot_extent[:],
                        plot_extent[:], plot_extent[:]]
         plot_vals = [ds_ref, ds_dev, absdiff, absdiff, fracdiff, fracdiff]
-        grids = [refgrid, devgrid, regional_cmp_grid.copy(), regional_cmp_grid.copy(), 
+        grids = [refgrid, devgrid, regional_cmp_grid.copy(), regional_cmp_grid.copy(),
                  regional_cmp_grid.copy(), regional_cmp_grid.copy()]
         axs = [ax0, ax1, ax2, ax3, ax4, ax5]
         rowcols = [(0, 0), (0, 1), (1, 0), (1, 1), (2, 0), (2, 1)]
@@ -1400,8 +1394,7 @@ def get_extent_for_colors(ds, minlon, maxlon, minlat, maxlat):
         if np.abs(1 - np.nanmax(fracdiff)) > 0.1:
             sigdiff_list.append(varname)
             return varname
-        else:
-            return
+        return ""
 
     # ==================================================================
     # Call figure generation function in a parallel loop over variables
@@ -1423,7 +1416,7 @@ def get_extent_for_colors(ds, minlon, maxlon, minlat, maxlat):
             # update sig diffs after parallel calls
             if current_process().name == "MainProcess":
                 for varname in results:
-                    if type(varname) is str:
+                    if isinstance(varname, str):
                         sigdiff_list.append(varname)
 
             # ==================================================================
@@ -1574,7 +1567,7 @@ def compare_zonal_mean(
             Default value: None
         n_job: int
             Defines the number of simultaneous workers for parallel plotting.
-            Set to 1 to disable parallel plotting. 
+            Set to 1 to disable parallel plotting.
             Value of -1 allows the application to decide.
             Default value: -1
         sigdiff_list: list of str
@@ -1582,30 +1575,30 @@ def compare_zonal_mean(
             differences (where |max(fractional difference)| > 0.1).
             Default value: []
         second_ref: xarray Dataset
-            A dataset of the same model type / grid as refdata, 
+            A dataset of the same model type / grid as refdata,
             to be used in diff-of-diffs plotting.
             Default value: None
         second_dev: xarray Dataset
-            A dataset of the same model type / grid as devdata, 
+            A dataset of the same model type / grid as devdata,
             to be used in diff-of-diffs plotting.
             Default value: None
         spcdb_dir: str
             Directory containing species_database.yml file.
             Default value: Path of GCPy code repository
         sg_ref_path: str
-            Path to NetCDF file containing stretched-grid info 
+            Path to NetCDF file containing stretched-grid info
             (in attributes) for the ref dataset
             Default value: '' (will not be read in)
         sg_dev_path: str
-            Path to NetCDF file containing stretched-grid info 
+            Path to NetCDF file containing stretched-grid info
             (in attributes) for the dev dataset
             Default value: '' (will not be read in)
         ref_vert_params: list(AP, BP) of list-like types
-            Hybrid grid parameter A in hPa and B (unitless). 
+            Hybrid grid parameter A in hPa and B (unitless).
             Needed if ref grid is not 47 or 72 levels.
             Default value: [[], []]
         dev_vert_params: list(AP, BP) of list-like types
-            Hybrid grid parameter A in hPa and B (unitless). 
+            Hybrid grid parameter A in hPa and B (unitless).
             Needed if dev grid is not 47 or 72 levels.
             Default value: [[], []]
         extra_plot_args: various
@@ -1835,10 +1828,10 @@ def compare_zonal_mean(
             if refmet is None or devmet is None:
                 msg = "Met mata ust be passed to convert units to ug/m3."
                 raise ValueError(msg)
-            elif normalize_by_area:
+            if normalize_by_area:
                 msg = "Normalizing by area is now allowed if plotting ug/m3"
                 raise ValueError(msg)
-            elif ds_refs[i].units != "ppb" or ds_devs[i].units != "ppb":
+            if ds_refs[i].units != "ppb" or ds_devs[i].units != "ppb":
                 msg = "Units must be mol/mol if converting to ug/m3."
                 raise ValueError(msg)
 
@@ -1866,16 +1859,16 @@ def compare_zonal_mean(
                 if spc_name in ["Simple_SOA", "Complex_SOA"]:
                     spc_mw_g = 150.0
                 else:
-                    msg = "No properties found for {}. Cannot convert" \
+                    msg = f"No properties found for {spc_name}. Cannot convert" \
                           + " to ug/m3."
-                    raise ValueError(msg.format(spc_name))
+                    raise ValueError(msg)
             else:
                 # Get the species molecular weight in g/mol
                 spc_mw_g = species_properties.get("MW_g")
                 if spc_mw_g is None:
-                    msg = "Molecular weight not found for for species {}!" \
+                    msg = f"Molecular weight not found for for species {spc_name}!" \
                           + " Cannot convert to ug/m3."
-                    raise ValueError(msg.format(spc_name))
+                    raise ValueError(msg)
 
             # Convert values from ppb to ug/m3:
             # ug/m3 = 1e-9ppb * mol/g air * kg/m3 air * 1e3g/kg
@@ -2085,7 +2078,7 @@ def createfig(ivar, temp_dir=''):
         warnings.filterwarnings('ignore', category=UserWarning)
 
         if savepdf and verbose:
-            print("{} ".format(ivar), end="")
+            print(f"{ivar} ", end="")
         varname = varlist[ivar]
 
         # ==============================================================
@@ -2110,9 +2103,9 @@ def createfig(ivar, temp_dir=''):
             exclude_list = ["WetLossConvFrac", "Prod_", "Loss_"]
             if not any(s in varname for s in exclude_list):
                 if "/" in cmn_units:
-                    cmn_units = "{}/m2".format(cmn_units)
+                    cmn_units = f"{cmn_units}/m2"
                 else:
-                    cmn_units = "{} m-2".format(cmn_units)
+                    cmn_units = f"{cmn_units} m-2"
                 ref_units[ivar] = cmn_units
                 dev_units[ivar] = cmn_units
                 subtitle_extra = ", Normalized by Area"
@@ -2163,8 +2156,8 @@ def createfig(ivar, temp_dir=''):
         # This will have implications as to how we set min and max
         # values for the color ranges below.
         # ==============================================================
-        ref_values = ds_ref.values if type(ds_ref) == xr.DataArray else ds_ref
-        dev_values = ds_dev.values if type(ds_dev) == xr.DataArray else ds_dev
+        ref_values = ds_ref.values if isinstance(ds_ref, xr.DataArray) else ds_ref
+        dev_values = ds_dev.values if isinstance(ds_dev, xr.DataArray) else ds_dev
         ref_is_all_zero, ref_is_all_nan = all_zero_or_nan(ref_values)
         dev_is_all_zero, dev_is_all_nan = all_zero_or_nan(dev_values)
 
@@ -2211,12 +2204,12 @@ def createfig(ivar, temp_dir=''):
         fontsize = 25
         if extra_title_txt is not None:
             figs.suptitle(
-                "{}, Zonal Mean ({})".format(varname, extra_title_txt),
+                f"{varname}, Zonal Mean ({extra_title_txt})",
                 fontsize=fontsize,
                 y=offset,
             )
         else:
-            figs.suptitle("{}, Zonal Mean".format(varname),
+            figs.suptitle(f"{varname}, Zonal Mean",
                           fontsize=fontsize, y=offset)
 
         # ==============================================================
@@ -2242,47 +2235,39 @@ def createfig(ivar, temp_dir=''):
         # ==============================================================
 
         if refgridtype == "ll":
-            ref_title = "{} (Ref){}\n{}".format(refstr, subtitle_extra, refres)
+            ref_title = f"{refstr} (Ref){subtitle_extra}\n{refres}"
         else:
-            ref_title = "{} (Ref){}\n{} regridded from c{}".format(
-                refstr, subtitle_extra, cmpres, refres
-            )
+            ref_title = f"{refstr} (Ref){subtitle_extra}\n{cmpres} regridded from c{refres}"
 
         if devgridtype == "ll":
-            dev_title = "{} (Dev){}\n{}".format(devstr, subtitle_extra, devres)
+            dev_title = f"{devstr} (Dev){subtitle_extra}\n{devres}"
         else:
-            dev_title = "{} (Dev){}\n{} regridded from c{}".format(
-                devstr, subtitle_extra, cmpres, devres)
+            dev_title = f"{devstr} (Dev){subtitle_extra}\n{cmpres} regridded from c{devres}"
 
         if regridany:
             absdiff_dynam_title = \
-                "Difference ({})\nDev - Ref, Dynamic Range".format(cmpres)
+                f"Difference ({cmpres})\nDev - Ref, Dynamic Range"
             absdiff_fixed_title = \
-                "Difference ({})\nDev - Ref, Restricted Range [5%,95%]".\
-                format(cmpres)
+                f"Difference ({cmpres})\nDev - Ref, Restricted Range [5%,95%]"
             if diff_of_diffs:
                 fracdiff_dynam_title = \
-                    "Difference ({}), Dynamic Range\n{} - {}".\
-                    format(cmpres, frac_devstr, frac_refstr)
+                    f"Difference ({cmpres}), Dynamic Range\n{frac_devstr} - {frac_refstr}"
                 fracdiff_fixed_title = \
-                    "Difference ({}), Restricted Range [5%,95%]\n{} - {}".\
-                    format(cmpres, frac_devstr, frac_refstr)
+                    f"Difference ({cmpres}), Restricted Range [5%,95%]\n{frac_devstr} - {frac_refstr}"
             else:
                 fracdiff_dynam_title = \
-                    "Ratio ({})\nDev/Ref, Dynamic Range".format(cmpres)
+                    f"Ratio ({cmpres})\nDev/Ref, Dynamic Range"
                 fracdiff_fixed_title = \
-                    "Ratio ({})\nDev/Ref, Fixed Range".format(cmpres)
+                    f"Ratio ({cmpres})\nDev/Ref, Fixed Range"
         else:
             absdiff_dynam_title = "Difference\nDev - Ref, Dynamic Range"
             absdiff_fixed_title = \
                 "Difference\nDev - Ref, Restricted Range [5%,95%]"
             if diff_of_diffs:
                 fracdiff_dynam_title = \
-                    "Difference, Dynamic Range\n{} - {}".\
-                    format(frac_devstr, frac_refstr)
+                    f"Difference, Dynamic Range\n{frac_devstr} - {frac_refstr}".\
                 fracdiff_fixed_title = \
-                    "Difference, Restricted Range [5%,95%]\n{} - {}".\
-                    format(frac_devstr, frac_refstr)
+                    f"Difference, Restricted Range [5%,95%]\n{frac_devstr} - {frac_refstr}"
             else:
                 fracdiff_dynam_title = "Ratio \nDev/Ref, Dynamic Range"
                 fracdiff_fixed_title = "Ratio \nDev/Ref, Fixed Range"
@@ -2433,8 +2418,7 @@ def createfig(ivar, temp_dir=''):
         if np.abs(1 - np.nanmax(zm_fracdiff)) > 0.1:
             sigdiff_list.append(varname)
             return varname
-        else:
-            return
+        return ""
 
     # ==================================================================
     # Call figure generation function in a parallel loop over variables
@@ -2517,6 +2501,9 @@ def normalize_colors(vmin, vmax, is_difference=False,
 
     # Define class for logarithmic non-symmetric color scheme
     class MidpointLogNorm(mcolors.LogNorm):
+        """
+        Class for logarithmic non-symmetric color scheme
+        """
         def __init__(self, vmin=None, vmax=None, midpoint=None, clip=False):
             mcolors.LogNorm.__init__(self, vmin=vmin, vmax=vmax, clip=clip)
             self.midpoint = midpoint
@@ -2537,18 +2524,15 @@ def __call__(self, value, clip=None):
         # add a single tick.
         if is_difference:
             return mcolors.Normalize(vmin=-1.0, vmax=1.0)
-        else:
-            return mcolors.Normalize(vmin=0.0, vmax=1.0)
+        return mcolors.Normalize(vmin=0.0, vmax=1.0)
 
-    else:
-        # For log color scales, assume a range 3 orders of magnitude
-        # below the maximum value.  Otherwise use a linear scale.
-        if log_color_scale and not ratio_log:
-            return mcolors.LogNorm(vmin=vmax / 1e3, vmax=vmax)
-        elif log_color_scale:
-            return MidpointLogNorm(vmin=vmin, vmax=vmax, midpoint=1)
-        else:
-            return mcolors.Normalize(vmin=vmin, vmax=vmax)
+    # For log color scales, assume a range 3 orders of magnitude
+    # below the maximum value.  Otherwise use a linear scale.
+    if log_color_scale and not ratio_log:
+        return mcolors.LogNorm(vmin=vmax / 1e3, vmax=vmax)
+    if log_color_scale:
+        return MidpointLogNorm(vmin=vmin, vmax=vmax, midpoint=1)
+    return mcolors.Normalize(vmin=vmin, vmax=vmax)
 
 
 def single_panel(plot_vals,
@@ -2634,7 +2618,7 @@ def single_panel(plot_vals,
             Range from minimum to maximum pressure for zonal mean plotting
             Default value: [0, 2000] (will plot entire atmosphere)
         pedge: numpy array
-            Edge pressures of vertical grid cells in plot_vals 
+            Edge pressures of vertical grid cells in plot_vals
             for zonal mean plotting
             Default value: np.full((1, 1), -1) (will determine automatically)
         pedge_ind: numpy array
@@ -2691,7 +2675,7 @@ def single_panel(plot_vals,
 
     # Eliminate 1D level or time dimensions
     plot_vals = plot_vals.squeeze()
-    data_is_xr = type(plot_vals) is xr.DataArray
+    data_is_xr = isinstance(plot_vals, xr.DataArray)
     if xtick_positions == []:
         # if plot_type == "single_level":
         #    xtick_positions = np.arange(extent[0], extent[1], (extent[1]-extent[0])/12)
@@ -2776,7 +2760,7 @@ def single_panel(plot_vals,
             # average across longitude bands
             # assume lon dim is index 2 (no time dim) if a numpy array is passed
             lon_ind = 2
-            if type(plot_vals) is xr.DataArray:
+            if isinstance(plot_vals, xr.DataArray):
                 lon_ind = plot_vals.dims.index('lon')
             # calculate zonal means
             plot_vals = plot_vals.mean(axis=lon_ind)
@@ -2804,12 +2788,13 @@ def single_panel(plot_vals,
             # needed for numpy arrays if doing pcolormesh / imshow, and xarray DataArrays
             # if using imshow
             proj = ccrs.PlateCarree(central_longitude=180)
-            if ll_plot_func == "imshow" or type(plot_vals) is not xr.DataArray:
+            if ll_plot_func == "imshow" or \
+               not isinstance(plot_vals, xr.DataArray):
                 i = 0
                 while grid['lon_b'][i] < 0:
                     i = i+1
                 plot_vals_holder = copy.deepcopy(plot_vals)
-                if type(plot_vals) is not xr.DataArray:
+                if not isinstance(plot_vals, xr.DataArray):
                     plot_vals_holder[:,:-i] = plot_vals[:,i:]
                     plot_vals_holder[:,-i:] = plot_vals[:,:i]
                 else:
@@ -2820,17 +2805,18 @@ def single_panel(plot_vals,
             extent[1] = extent[1] % 360 - 180
             grid["lon_b"] = grid["lon_b"] % 360 - 180
             grid["lon"] = grid["lon"] % 360 - 180
-            if type(plot_vals) is xr.DataArray:                
+            if isinstance(plot_vals, xr.DataArray):
                 plot_vals['lon'] = plot_vals['lon'] % 360 - 180
             # realign grid also if doing imshow or using numpy arrays
-            if ll_plot_func == "imshow" or type(plot_vals) is not xr.DataArray:
+            if ll_plot_func == "imshow" or \
+               not isinstance(plot_vals, xr.DataArray):
                 temp_grid = copy.deepcopy(grid)
                 temp_grid['lon_b'][:-i] = grid['lon_b'][i:]
                 temp_grid['lon_b'][-i:] = grid['lon_b'][:i]
                 temp_grid['lon'][:-i] = grid['lon'][i:]
                 temp_grid['lon'][-i:] = grid['lon'][:i]
                 grid = temp_grid
-                if type(plot_vals) is xr.DataArray:
+                if isinstance(plot_vals, xr.DataArray):
                     plot_vals = plot_vals.assign_coords({'lon' : grid['lon']})
         if gridtype == "cs":
             proj = ccrs.PlateCarree(central_longitude=180)
@@ -2846,13 +2832,13 @@ def single_panel(plot_vals,
             ax = plt.axes(projection=proj)
 
     fig = plt.gcf()
-    data_is_xr = type(plot_vals) is xr.DataArray
+    data_is_xr = isinstance(plot_vals, xr.DataArray)
     # Normalize colors (put into range [0..1] for matplotlib methods)
     if norm == []:
         if data_is_xr:
             vmin = plot_vals.data.min() if vmin is None else vmin
             vmax = plot_vals.data.max() if vmax is None else vmax
-        elif type(plot_vals) is np.ndarray:
+        elif isinstance(plot_vals, np.ndarray):
             vmin = np.min(plot_vals) if vmin is None else vmin
             vmax = np.max(plot_vals) if vmax is None else vmax
         norm = normalize_colors(
@@ -2891,7 +2877,7 @@ def single_panel(plot_vals,
             #[dlat,dlon] = list(map(float, res.split('x')))
             dlon = grid['lon'][2] - grid['lon'][1]
             dlat = grid['lat'][2] - grid['lat'][1]
-            
+
             def get_nearest_extent(val, array, direction, spacing):
                 # choose nearest values in grid to desired extent to minimize distortion
                 grid_vals = np.asarray(array)
@@ -2903,8 +2889,7 @@ def get_nearest_extent(val, array, direction, spacing):
                         # expand extent to value beyond grid limits if extent
                         # is already > max grid value
                         return grid_vals[(np.abs(grid_vals - val)).argmin()]
-                    else:
-                        return grid_vals[i]
+                    return grid_vals[i]
                 else:
                     diff[diff > 0] = -np.inf
                     i = diff.argmax()
@@ -2914,8 +2899,7 @@ def get_nearest_extent(val, array, direction, spacing):
                         # cartopy issues
                         return grid_vals[(
                             np.abs(grid_vals - val)).argmin()] - spacing
-                    else:
-                        return max(grid_vals[i], -180)
+                    return max(grid_vals[i], -180)
             closest_minlon = get_nearest_extent(
                 minlon, grid['lon_b'], 'less', dlon)
             closest_maxlon = get_nearest_extent(
@@ -2944,7 +2928,7 @@ def get_nearest_extent(val, array, direction, spacing):
                 closest_maxlon,
                 closest_minlat,
                 closest_maxlat]
-            if type(plot_vals) is xr.DataArray:
+            if isinstance(plot_vals, xr.DataArray):
                 # filter data by bounds of extent
                 plot_vals = plot_vals.where(
                     plot_vals.lon > closest_minlon,
@@ -2966,7 +2950,7 @@ def get_nearest_extent(val, array, direction, spacing):
                 if len(maxlon_i) == 0:
                     maxlon_i = -1
                 else:
-                    maxlon_i = int(maxlon_i)                
+                    maxlon_i = int(maxlon_i)
                 minlat_i = np.where(grid['lat_b']==closest_minlat)[0]
                 if len(minlat_i) == 0:
                     minlat_i = 0
@@ -3081,8 +3065,7 @@ def get_nearest_extent(val, array, direction, spacing):
         pdf.close()
 
     # in some cases users may wish to get a list of all associated plots
-    # eg. cubedsphere grids have six plots associated with them 
+    # eg. cubedsphere grids have six plots associated with them
     if return_list_of_plots:
         return plots if 'plots' in locals() else [plot]
-    else: 
-        return plot
+    return plot

From f87c2853ef71ec23793623fdced0a6671addd285 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Thu, 9 Feb 2023 11:10:18 -0500
Subject: [PATCH 28/54] Lumped species definitions now use read_config_file
 with quiet=True

gcpy/util.py
- add quiet=True keyword to get_lumped_species_definitions

CHANGELOG.md
- Updated accordingly

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 CHANGELOG.md | 6 ++++++
 gcpy/util.py | 3 ++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 37327b9c..ca52542a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Species/emissions/inventories that differ between Dev & Ref versions are now printed at the top of the benchmark emissions, inventory, and global mass tables.  if there are too many species with diffs, an alternate message is printed.
 - New functions in `benchmark.py` and `util.py` to facilitate printing of the species/emissions/inventories that differ between Dev & Ref versions.
 
+### Changed
+- Applied cleanup susggestions from pylint to `benchmark.py`, `util.py`, `plot.py`
+- Replaced format with f-strings in `benchmark.py`, `util.py`
+- Abstract some common in `benchmark.py` into functions
+- Replaced direct calls to `yaml.load` with `util.read_config.file` (mostly using `quiet=True`)
+
 ## [1.3.2] -- 2022-10-25
 
 ### Fixes
diff --git a/gcpy/util.py b/gcpy/util.py
index aef8c534..32d4868a 100644
--- a/gcpy/util.py
+++ b/gcpy/util.py
@@ -1191,7 +1191,8 @@ def get_lumped_species_definitions():
         os.path.join(
             os.path.dirname(__file__),
             "lumped_species.yml"
-        )
+        ),
+        quiet=True
     )
 
 
From fb0c26ab9024cd0d3d3fcc34ebba052926b731c7 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Thu, 9 Feb 2023 11:27:18 -0500
Subject: [PATCH 29/54] Move function make_directory back to util.py

gcpy/util.py
- Function make_directory (formerly in benchmark.py) is now here

gcpy/benchmark.py
- Now call util.make_directory in several places

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 gcpy/benchmark.py | 47 +++++++++++------------------------------------
 gcpy/util.py      | 25 +++++++++++++++++++++++++
 2 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/gcpy/benchmark.py b/gcpy/benchmark.py
index b5e83ec9..c826444e 100644
--- a/gcpy/benchmark.py
+++ b/gcpy/benchmark.py
@@ -738,7 +738,7 @@ def make_benchmark_conc_plots(
     # ==================================================================
 
     # Create the destination folder
-    make_directory(dst, overwrite)
+    util.make_directory(dst, overwrite)
 
     # Define extra title text (usually a date string)
     # for the top-title of the plot
@@ -1406,7 +1406,7 @@ def make_benchmark_emis_plots(
     # =================================================================
 
     # Create the destination folder
-    make_directory(dst, overwrite)
+    util.make_directory(dst, overwrite)
 
     # Create the "Emissions" category folder.  If subdst is passed,
     # then create a sub-folder (needed for the 1-year benchmarks).
@@ -1776,7 +1776,7 @@ def make_benchmark_emis_tables(
     # ==================================================================
 
     # Create the destination folder
-    make_directory(dst, overwrite)
+    util.make_directory(dst, overwrite)
 
     # Create the "Tables" category folder if it does not exist
     emisdir = os.path.join(dst, "Tables")
@@ -2021,7 +2021,7 @@ def make_benchmark_jvalue_plots(
     # ==================================================================
 
     # Create the directory for output
-    make_directory(dst, overwrite)
+    util.make_directory(dst, overwrite)
 
     # Get the function that will read file(s) into a Dataset
     reader = util.dataset_reader(time_mean, verbose=verbose)
@@ -2386,7 +2386,7 @@ def make_benchmark_aod_plots(
     # ==================================================================
 
     # Create destination plots directory
-    make_directory(dst, overwrite)
+    util.make_directory(dst, overwrite)
 
     # Create the "Aerosols" directory as a subfolder of dst.
     # If subdst is passed, then create a subdirectory of the "Aerosols"
@@ -2916,7 +2916,7 @@ def make_benchmark_oh_metrics(
     # ==================================================================
     # Define destination directory
     # ==================================================================
-    make_directory(dst, overwrite)
+    util.make_directory(dst, overwrite)
 
     # ==================================================================
     # Read data from netCDF into Dataset objects
@@ -3187,7 +3187,7 @@ def make_benchmark_wetdep_plots(
     """
 
     # Create destination plot directory
-    make_directory(dst, overwrite)
+    util.make_directory(dst, overwrite)
 
     # Make a collection subdirectory
     targetdst = os.path.join(dst, collection)
@@ -3418,7 +3418,7 @@ def make_benchmark_aerosol_tables(
     """
 
     # Create destination directory
-    make_directory(dst, overwrite)
+    util.make_directory(dst, overwrite)
 
     # List of species (and subsets for the trop & strat)
     species_list = ["BCPI", "OCPI", "SO4", "DST1", "SALA", "SALC"]
@@ -4120,7 +4120,7 @@ def make_benchmark_operations_budget(
     # ------------------------------------------
 
     # Create the target output directory hierarchy if it doesn't already exist
-    make_directory(dst, overwrite)
+    util.make_directory(dst, overwrite)
 
     # Print budgets to file
     if label is not None:
@@ -4236,7 +4236,7 @@ def make_benchmark_mass_conservation_table(
     # ==================================================================
 
     # Create the destination folder
-    make_directory(dst, overwrite)
+    util.make_directory(dst, overwrite)
 
     # Load a YAML file containing species properties (such as
     # molecular weights), which we will need for unit conversions.
@@ -4483,7 +4483,7 @@ def create_benchmark_summary_table(
     # ==================================================================
 
     # Create the directory for output
-    make_directory(dst, overwrite)
+    util.make_directory(dst, overwrite)
 
     # Create file
     try:
@@ -4625,28 +4625,3 @@ def diff_list_to_text(
         diff_text = f"### {diff_text : <82}{'###'}"
 
     return diff_text.strip()
-
-
-def make_directory(
-        dir_name,
-        overwrite
-):
-    """
-    Creates a directory where benchmark plots/tables will be placed.
-
-    Args:
-    -----
-    dir_name : str
-        Name of the directory to be created.
-    overwrite : bool
-        Set to True if you wish to overwrite prior contents in
-        the directory 'dir_name'
-    """
-
-    if os.path.isdir(dir_name) and not overwrite:
-        msg = f"Directory {dir_name} exists!\n"
-        msg += "Pass overwrite=True to overwrite files in that directory."
-        raise ValueError(msg)
-
-    if not os.path.isdir(dir_name):
-        os.makedirs(dir_name)
diff --git a/gcpy/util.py b/gcpy/util.py
index 32d4868a..ff6a03e2 100644
--- a/gcpy/util.py
+++ b/gcpy/util.py
@@ -2317,3 +2317,28 @@ def array_equals(
     refsum = np.sum(refdata, dtype=np.float64)
     devsum = np.sum(devdata, dtype=np.float64)
     return np.abs(devsum - refsum) > np.float64(0.0)
+
+
+def make_directory(
+        dir_name,
+        overwrite
+):
+    """
+    Creates a directory where benchmark plots/tables will be placed.
+
+    Args:
+    -----
+    dir_name : str
+        Name of the directory to be created.
+    overwrite : bool
+        Set to True if you wish to overwrite prior contents in
+        the directory 'dir_name'
+    """
+
+    if os.path.isdir(dir_name) and not overwrite:
+        msg = f"Directory {dir_name} exists!\n"
+        msg += "Pass overwrite=True to overwrite files in that directory."
+        raise ValueError(msg)
+
+    if not os.path.isdir(dir_name):
+        os.makedirs(dir_name)

From 1eb445c840f44bfdfd7ac1bcc7fe73ca97e09439 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Thu, 9 Feb 2023 11:29:41 -0500
Subject: [PATCH 30/54] Cleanup oh_metrics.py module

gcpy/oh_metrics.py
- Convert string format statements to f-strings
- Import make_directory, read_config_file from util.py
- Trim whitespace
- Apply other suggestinons from the pylint linter

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 gcpy/oh_metrics.py | 48 ++++++++++++++++++++--------------------------
 1 file changed, 21 insertions(+), 27 deletions(-)

diff --git a/gcpy/oh_metrics.py b/gcpy/oh_metrics.py
index 8f305a1c..4bda081e 100644
--- a/gcpy/oh_metrics.py
+++ b/gcpy/oh_metrics.py
@@ -15,6 +15,7 @@
 import numpy as np
 import xarray as xr
 import gcpy.constants as const
+from gcpy.util import make_directory, read_config_file
 
 # =====================================================================
 # %%% METHODS %%%
@@ -45,18 +46,18 @@ def combine_dataset(file_list=None):
                 combine="nested",
                 concat_dim="time"
             )
-        except FileNotFoundError:
-            msg = "Could not find one or more files in {}".format(file_list)
-            raise FileNotFoundError(msg)
+        except FileNotFoundError as exc:
+            msg = f"Could not find one or more files in {file_list}"
+            raise FileNotFoundError(msg) from exc
     else:
         try:
             ds = xr.open_mfdataset(
                 file_list,
                 drop_variables=const.skip_these_vars
             )
-        except FileNotFoundError:
-            msg = "Could not find one or more files in {}".format(file_list)
-            raise FileNotFoundError(msg)
+        except FileNotFoundError as exc:
+            msg = f"Could not find one or more files in {file_list}"
+            raise FileNotFoundError(msg) from exc
 
     return ds
 
@@ -220,7 +221,7 @@ def init_common_vars(ref, refstr, dev, devstr, spcdb_dir):
     """
 
     # Get species database
-    spcdb = util.read_config_file(
+    spcdb = read_config_file(
         os.path.join(
             spcdb_dir,
             "species_database.yml"
@@ -337,15 +338,15 @@ def write_to_file(f, title, ref, dev, absdiff, pctdiff, is_mean_oh=False):
     print("-" * 60, file=f)
 
     if is_mean_oh:
-        print("Ref      : {:14.11f}".format(ref), file=f)
-        print("Dev      : {:14.11f}".format(dev), file=f)
-        print("Abs diff : {:14.11f}".format(absdiff), file=f)
-        print("%   diff : {:9.6f}".format(pctdiff), file=f)
+        print(f"Ref      : {ref:14.11f}", file=f)
+        print(f"Dev      : {dev:14.11f}", file=f)
+        print(f"Abs diff : {absdiff:14.11f}", file=f)
+        print(f"%   diff : {pctdiff:9.6f}", file=f)
     else:
-        print("Ref      : {:9.6f}".format(ref), file=f)
-        print("Dev      : {:9.6f}".format(dev), file=f)
-        print("Abs diff : {:9.6f}".format(absdiff), file=f)
-        print("%   diff : {:9.6f}".format(pctdiff), file=f)
+        print(f"Ref      : {ref:9.6f}", file=f)
+        print(f"Dev      : {dev:9.6f}", file=f)
+        print(f"Abs diff : {absdiff:9.6f}", file=f)
+        print(f"%   diff : {pctdiff:9.6f}", file=f)
 
 
 def print_metrics(common_vars, dst):
@@ -369,10 +370,10 @@ def print_metrics(common_vars, dst):
         # ==============================================================
         print("#" * 79, file=f)
         print("### OH Metrics", file=f)
-        print("### Ref = {}; Dev = {}".format(
-            common_vars["refstr"],
-            common_vars["devstr"]
-        ), file=f)
+        print(\
+          f"### Ref = {common_vars['refstr'],}; Dev = {common_vars['devstr']}",
+          file=f
+        )
         print("#" * 79, file=f)
         print("\n")
 
@@ -477,14 +478,7 @@ def make_benchmark_oh_metrics(
 
     # Make sure that the destination directory exists
     # (or create it if it does not)
-    if os.path.isdir(dst):
-        if not overwrite:
-            msg = "Directory {} exists. Pass overwrite=True to overwrite " \
-                + "files in that directory, if any."
-            msg = msg.format(dst)
-            raise ValueError(msg)
-    else:
-        os.makedirs(dst)
+    make_directory(dst, overwrite)
 
     # Initialize a dictionary containing common variables
     common_vars = init_common_vars(

From 4eb1e59f7595abaeee21a5c9ce068e9d943090b2 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Thu, 9 Feb 2023 11:48:04 -0500
Subject: [PATCH 31/54] Bug fix, remove extraneous "," from f-string in
 oh_metrics.py

gcpy/oh_metrics.py
- Removed extraneous comma that cast the ref string to a tuple.
  Now fixed.

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 gcpy/oh_metrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcpy/oh_metrics.py b/gcpy/oh_metrics.py
index 4bda081e..b1d5443b 100644
--- a/gcpy/oh_metrics.py
+++ b/gcpy/oh_metrics.py
@@ -371,7 +371,7 @@ def print_metrics(common_vars, dst):
         print("#" * 79, file=f)
         print("### OH Metrics", file=f)
         print(\
-          f"### Ref = {common_vars['refstr'],}; Dev = {common_vars['devstr']}",
+          f"### Ref = {common_vars['refstr']}; Dev = {common_vars['devstr']}",
           file=f
         )
         print("#" * 79, file=f)

From c81dd5afea2194d055874aa7307c6504414f7c34 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Thu, 9 Feb 2023 12:02:39 -0500
Subject: [PATCH 32/54] Delete print statement in gcpy/oh_metrics.py

gcpy/oh_metrics.py
- Removed a print statement that was causing too much space
  between the header and the rest of the prinotout

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 gcpy/oh_metrics.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/gcpy/oh_metrics.py b/gcpy/oh_metrics.py
index b1d5443b..bc5c9b67 100644
--- a/gcpy/oh_metrics.py
+++ b/gcpy/oh_metrics.py
@@ -375,8 +375,7 @@ def print_metrics(common_vars, dst):
           file=f
         )
         print("#" * 79, file=f)
-        print("\n")
-
+        
         # ==============================================================
         # Mean OH concentration [1e5 molec/cm3]
         # ==============================================================

From cd925e7795c7aedbb4034e0ebfe8a0781b643176 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Thu, 9 Feb 2023 12:59:38 -0500
Subject: [PATCH 33/54] Cleanup gcpy/ste_flux.py

gcpy/ste_flux.py
- Implement suggestions from pylint
- Convert string formats to f-strings
- Now use util.make_directory

gcpy/oh_metrics.py
- Trimmed whitespace

CHANGELOG.md
- Updated accordingly

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 CHANGELOG.md       |  4 ++--
 gcpy/oh_metrics.py |  2 +-
 gcpy/ste_flux.py   | 24 +++++++++---------------
 3 files changed, 12 insertions(+), 18 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ca52542a..c086eaa9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,8 +12,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - New functions in `benchmark.py` and `util.py` to facilitate printing of the species/emissions/inventories that differ between Dev & Ref versions.
 
 ### Changed
-- Applied cleanup susggestions from pylint to `benchmark.py`, `util.py`, `plot.py`
-- Replaced format with f-strings in `benchmark.py`, `util.py`
+- Applied cleanup susggestions from pylint to `benchmark.py`, `util.py`, `plot.py`, `oh_metrics.py`, `ste_flux.py`
+- Replaced format with f-strings in `benchmark.py`, `util.py`, `plot.py`, `oh_metrics.py`, `ste_flux.py`
 - Abstract some common in `benchmark.py` into functions
 - Replaced direct calls to `yaml.load` with `util.read_config.file` (mostly using `quiet=True`)
 
diff --git a/gcpy/oh_metrics.py b/gcpy/oh_metrics.py
index bc5c9b67..96d5d45b 100644
--- a/gcpy/oh_metrics.py
+++ b/gcpy/oh_metrics.py
@@ -375,7 +375,7 @@ def print_metrics(common_vars, dst):
           file=f
         )
         print("#" * 79, file=f)
-        
+
         # ==============================================================
         # Mean OH concentration [1e5 molec/cm3]
         # ==============================================================
diff --git a/gcpy/ste_flux.py b/gcpy/ste_flux.py
index 83ca7ce6..e24499b6 100644
--- a/gcpy/ste_flux.py
+++ b/gcpy/ste_flux.py
@@ -11,11 +11,12 @@
 import os
 from calendar import monthrange, month_abbr
 import warnings
+import gc
 import numpy as np
 import pandas as pd
 import xarray as xr
 import gcpy.constants as physconsts
-import gc
+from gcpy.util import make_directory
 
 # Suppress harmless run-time warnings (mostly about underflow in division)
 warnings.filterwarnings("ignore", category=RuntimeWarning)
@@ -96,18 +97,18 @@ def __init__(self, devstr, files, dst, year,
                     combine="nested",
                     concat_dim="time"
                 )
-            except FileNotFoundError:
+            except FileNotFoundError as exc:
                 msg = f"Could not find one or more files in {files}"
-                raise FileNotFoundError(msg)
+                raise FileNotFoundError(msg) from exc
         else:
             try:
                 self.ds_flx = xr.open_mfdataset(
                     files,
                     drop_variables=physconsts.skip_these_vars,
                 )
-            except FileNotFoundError:
+            except FileNotFoundError as exc:
                 msg = f"Could not find one or more files in {files}"
-                raise FileNotFoundError(msg)
+                raise FileNotFoundError(msg) from exc
 
         # Set a flag to denote if this data is from GCHP
         self.is_gchp = "nf" in self.ds_flx.dims.keys()
@@ -135,8 +136,7 @@ def __init__(self, devstr, files, dst, year,
             # Month names
             self.mon_name = []
             for t in range(self.N_MONTHS):
-                self.mon_name.append("{} {}".format(
-                    month_abbr[t + 1], self.y0_str))
+                self.mon_name.append(f"{ month_abbr[t + 1]} {self.y0_str}")
             self.mon_name.append("Annual Mean")
 
             # Days in the benchmark year
@@ -154,8 +154,7 @@ def __init__(self, devstr, files, dst, year,
             self.d_per_mon = [monthrange(self.y0, self.month)[1] * 1.0]
 
             # Month name
-            self.mon_name = ["{} {}".format(month_abbr[self.month],
-                                            self.y0_str)]
+            self.mon_name = [f"{month_abbr[self.month]} {self.y0_str}"]
 
             # Days in benchmark year
             self.d_per_yr = 0.0
@@ -248,12 +247,7 @@ def print_ste(globvars, df):
             Strat-trop exchange table
     """
     # Create plot directory hierarchy if necessary
-    if os.path.isdir(globvars.dst) and not globvars.overwrite:
-        err_str = "Pass overwrite=True to overwrite files in that directory"
-        print(f"Directory {globvars.dst} exists. {err_str}")
-        return
-    elif not os.path.isdir(globvars.dst):
-        os.makedirs(globvars.dst)
+    make_directory(globvars.dst, globvars.overwrite)
 
     # Save the file in the Tables folder of dst
     filename = f"{globvars.dst}/Strat_trop_exchange.txt"

From af33f24d71d20e9b72c61a5d1dbcddf6982c8629 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Thu, 9 Feb 2023 13:08:19 -0500
Subject: [PATCH 34/54] Rework logic in compare_varnames

gcpy.util.py
- In function compare_varnames
  - Add variable names to commonvarsData if they have lon/Xdim or
    lat/Ydim coordinates.  Plottable data variables need at least
    these dimensions.
  - commonvarsOther are variables that are not in commonvarsData
  - commonvars2D and commonvars3D are plottable data variables
    that either do not have lev/ilev dimensions, or do have them.

This fixes an issue where 2D plottable data variables were getting
lumped together with the index variables.

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 gcpy/util.py | 37 +++++++++++++++----------------------
 1 file changed, 15 insertions(+), 22 deletions(-)

diff --git a/gcpy/util.py b/gcpy/util.py
index e6a85033..4b765496 100644
--- a/gcpy/util.py
+++ b/gcpy/util.py
@@ -843,7 +843,8 @@ def compare_varnames(
                              refdata and devdata, and that have lat,
                              lon, and level dimensions.
             commonvarsData   List of all commmon 2D or 3D data variables,
-                             excluding index variables.
+                             excluding index variables.  This is the
+                             list of "plottable" variables.
             refonly          List of 2D or 3D variables that are only
                              present in refdata.
             devonly          List of 2D or 3D variables that are only
@@ -855,37 +856,29 @@ def compare_varnames(
     refonly = [v for v in refvars if v not in devvars]
     devonly = [v for v in devvars if v not in refvars]
     dimmismatch = [v for v in commonvars if refdata[v].ndim != devdata[v].ndim]
-    commonvarsOther = [
+    # Assume plottable data has lon and lat
+    # This is OK for purposes of benchmarking
+    #  -- Bob Yantosca (09 Feb 2023)
+    commonvarsData = [
         v for v in commonvars if (
-          (
-            ("lat" not in refdata[v].dims or "Xdim" not in refdata[v].dims)
-            and
-            ("lon" not in refdata[v].dims or "Ydim" not in refdata[v].dims)
+            ("lat" in refdata[v].dims or "Ydim" in refdata[v].dims)
             and
-            ("lev" not in refdata[v].dims)
-          )
-          or
-          (
-            ("hyam" in v or "hybm" in v)  # Omit these from plottable data
-          )
+            ("lon" in refdata[v].dims or "Xdim" in refdata[v].dims)
         )
+    ]        
+    commonvarsOther = [
+        v for v in commonvars if (
+           v not in commonvarsData
+        )    
     ]
     commonvars2D = [
         v for v in commonvars if (
-          ("lat" in refdata[v].dims or "Xdim" in refdata[v].dims)
-          and
-          ("lon" in refdata[v].dims or "Ydim" in refdata[v].dims)
-          and
-          ("lev" not in refdata[v].dims)
+            (v in commonvarsData) and ("lev" not in refdata[v].dims)
         )
     ]
     commonvars3D = [
         v for v in commonvars if (
-          ("lat" in refdata[v].dims or "Xdim" in refdata[v].dims)
-          and
-          ("lon" in refdata[v].dims or "Ydim" in refdata[v].dims)
-          and
-          ("lev" in refdata[v].dims)
+            (v in commonvarsData) and ("lev" in refdata[v].dims)
         )
     ]
 

From b1b9295d1616e910f2ef76ef353f74dc3a2e0f81 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Thu, 9 Feb 2023 13:55:46 -0500
Subject: [PATCH 35/54] Fixed incorrect logic in util.array_equals; updates in
 benchmark.py

gcpy/util.py
- In routine array_equals:
  - Add dtype parameter to select numeric type for the comparison
  - Return the inverse of ABS(devsum-refsum) > 0, since this is the
    condition for not equals.

gcpy/benchmark.py
- Skip reading "AREA" when computing the summary table
- Pass dtype=np.float32 to util.array_equals

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 gcpy/benchmark.py | 12 ++++++++----
 gcpy/util.py      | 12 ++++++++----
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/gcpy/benchmark.py b/gcpy/benchmark.py
index c92bc03b..fc5c07c9 100644
--- a/gcpy/benchmark.py
+++ b/gcpy/benchmark.py
@@ -4513,8 +4513,7 @@ def create_benchmark_summary_table(
 
     # Variables to skip
     skip_vars = gcon.skip_these_vars
-    skip_vars.append("corner_lats")
-    skip_vars.append("corner_lons")
+    skip_vars.append("AREA")
 
     # Pick the proper function to read the data
     reader = util.dataset_reader(
@@ -4569,9 +4568,14 @@ def create_benchmark_summary_table(
         diff_list = []
 
         # Keep track of which variables are different
-        # Loop over the common variables
+        # NOTE: Use 32-point float for comparisons since this is
+        # the precision used for History diagnostics.
         for v in vardict["commonvarsData"]:
-            if not util.array_equals(refdata[v], devdata[v]):
+            if not util.array_equals(
+                    refdata[v],
+                    devdata[v],
+                    dtype=np.float32
+            ):
                 diff_list.append(v)
 
         # Drop duplicate values from diff_list
diff --git a/gcpy/util.py b/gcpy/util.py
index 4b765496..d9590f9c 100644
--- a/gcpy/util.py
+++ b/gcpy/util.py
@@ -2264,7 +2264,8 @@ def insert_text_into_file(
 
 def array_equals(
         refdata,
-        devdata
+        devdata,
+        dtype=np.float64
 ):
     """
     Tests two arrays for equality.  Useful for checking which
@@ -2276,6 +2277,9 @@ def array_equals(
         The first array to be checked.
     devdata: xarray DataArray or numpy ndarray
         The second array to be checked.
+    dtype : np.float32 or np.float64
+        The precision that will be used to make the evaluation.
+        Default: np.float64
 
     Returns:
     --------
@@ -2298,6 +2302,6 @@ def array_equals(
 
     # This method will work if the arrays hve different dimensions
     # but an element-by-element search will not!
-    refsum = np.sum(refdata, dtype=np.float64)
-    devsum = np.sum(devdata, dtype=np.float64)
-    return np.abs(devsum - refsum) > np.float64(0.0)
+    refsum = np.nansum(refdata, dtype=dtype)
+    devsum = np.nansum(devdata, dtype=dtype)
+    return (not np.abs(devsum - refsum) > dtype(0.0))

From 0155dd0c669d2c6112f8666d4324bb6685a00a3d Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Thu, 9 Feb 2023 14:27:14 -0500
Subject: [PATCH 36/54] Update the messages at the top of tables to just show
 one line

gcpy/benchmark.py:
- Updated routine diff_list_to_text so that it also accepts
  refstr & devstr args.  It now returns either line saying
  "dev & ref are identical" or "dev & ref show X differences"
- Now pass devstr and refstr in calls to diff_list_to_text
- Trimmed whitespace

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 gcpy/benchmark.py | 36 ++++++++++++++++++++++--------------
 1 file changed, 22 insertions(+), 14 deletions(-)

diff --git a/gcpy/benchmark.py b/gcpy/benchmark.py
index fc5c07c9..89735343 100644
--- a/gcpy/benchmark.py
+++ b/gcpy/benchmark.py
@@ -184,11 +184,7 @@ def create_total_emissions_table(
 
     # Write a placeholder to the file that denotes where
     # the list of species with differences will be written
-    placeholder = "@%% insert diff_list here %%@"
-    if "Inv" in template:
-        print(f"Inventories that differ btw {refstr} and {devstr}:", file=f)
-    else:
-        print(f"Species that differ btw {refstr} and {devstr}:", file=f)
+    placeholder = "@%% insert diff status here %%@"
     print(f"{placeholder}\n\n", file=f)
 
     # Define a list for differences
@@ -355,7 +351,10 @@ def create_total_emissions_table(
     util.insert_text_into_file(
         filename=outfilename,
         search_text=placeholder,
-        replace_text=diff_list_to_text(diff_list),
+        replace_text=diff_list_to_text(
+            refstr,
+            devstr,
+            diff_list),
         width=90
     )
 
@@ -464,19 +463,16 @@ def create_global_mass_table(
     if trop_only:
         title1 = f"### Global mass (Gg) {label} (Trop only)"
     title2 = f"### Ref = {refstr}; Dev = {devstr}"
-    title3 = f"### Species that differ btw {refstr} and {devstr}:"
 
     # Write a placeholder to the file that denotes where
     # the list of species with differences will be written
-    placeholder = "@%% insert diff_list here %%@"
-    title4 = f"{placeholder}"
+    placeholder = "@%% insert diff status here %%@"
 
     # Print header to file
     print("#" * 89, file=f)
     print(f"{title1 : <86}{'###'}", file=f)
     print(f"{title2 : <86}{'###'}", file=f)
     print(f"{'###'  : <86}{'###'}", file=f)
-    print(f"{title3 : <86}{'###'}", file=f)
     print(f"{placeholder}", file=f)
     print("#" * 89, file=f)
 
@@ -577,6 +573,8 @@ def create_global_mass_table(
         filename=outfilename,
         search_text=placeholder,
         replace_text=diff_list_to_text(
+            refstr,
+            devstr,
             diff_list,
             fancy_format=True
         ),
@@ -4604,6 +4602,8 @@ def create_benchmark_summary_table(
 
 
 def diff_list_to_text(
+        refstr,
+        devstr,
         diff_list,
         fancy_format=False
 ):
@@ -4629,13 +4629,21 @@ def diff_list_to_text(
     # Strip out duplicates from diff_list
     # Prepare a message about species differences (or alternate msg)
     diff_list = util.unique_values(diff_list, drop=[None])
-    diff_text = util.wrap_text(diff_list, width=85)
-    if len(diff_text) > 85:
-        diff_text = "... Too many diffs to print (see below for details)"
 
+    # Print the text
+    n_diff = len(diff_list)
+    if n_diff > 0:
+        diff_text = f"{devstr} and {refstr} show {n_diff} differences"
+    else:
+        diff_text = f"{devstr} and {refstr} are identical"
+    diff_text = util.wrap_text(
+        diff_text,
+        width=83
+    )
+        
     if fancy_format:
         diff_text = f"### {diff_text : <82}{'###'}"
-
+        
     return diff_text.strip()
 
 
From cc2ecec55a86b0d425e0e11f75cf6db819be6775 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Thu, 9 Feb 2023 17:40:24 -0500
Subject: [PATCH 37/54] Fix formatting for cloud benchmark tables w/ long
 version strings

gcpy/benchmark.py
gcpy/oh_metrics.py
- Print ref & dev versions on separate lines in table headers

gcpy/util.py
- Add routine "trim_cloud_benchmark_label" for later use

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 gcpy/benchmark.py  | 31 +++++++++++++++++++++++--------
 gcpy/oh_metrics.py |  6 ++----
 gcpy/util.py       | 22 ++++++++++++++++++++++
 3 files changed, 47 insertions(+), 12 deletions(-)

diff --git a/gcpy/benchmark.py b/gcpy/benchmark.py
index f204a415..ab4ee277 100644
--- a/gcpy/benchmark.py
+++ b/gcpy/benchmark.py
@@ -240,12 +240,14 @@ def create_total_emissions_table(
             title0 = f"for species {species_name}"
             title1 = f"### Emissions totals {title0} [Tg]"
 
-        title2 = f"### Ref = {refstr}; Dev = {devstr}"
+        title2 = f"### Ref = {refstr}"
+        title3 = f"### Dev = {devstr}"
 
         # Print header to file
         print("#" * 89, file=f)
         print(f"{title1 : <86}{'###'}", file=f)
         print(f"{title2 : <86}{'###'}", file=f)
+        print(f"{title3 : <86}{'###'}", file=f)
         print("#" * 89, file=f)
         print(f"{'' : <19}{'Ref' : >20}{'Dev' : >20}{'Dev - Ref' : >14}{'% diff' : >10} {'diffs'}", file=f)
 
@@ -470,7 +472,8 @@ def create_global_mass_table(
     title1 = f"### Global mass (Gg) {label} (Trop + Strat)"
     if trop_only:
         title1 = f"### Global mass (Gg) {label} (Trop only)"
-    title2 = f"### Ref = {refstr}; Dev = {devstr}"
+    title2 = f"### Ref = {refstr}"
+    title3 = f"### Dev = {devstr}"
 
     # Write a placeholder to the file that denotes where
     # the list of species with differences will be written
@@ -479,7 +482,9 @@ def create_global_mass_table(
     # Print header to file
     print("#" * 89, file=f)
     print(f"{title1 : <86}{'###'}", file=f)
+    print(f"{'###'  : <86}{'###'}", file=f)
     print(f"{title2 : <86}{'###'}", file=f)
+    print(f"{title3 : <86}{'###'}", file=f)
     print(f"{'###'  : <86}{'###'}", file=f)
     print(f"{placeholder}", file=f)
     print("#" * 89, file=f)
@@ -4493,12 +4498,15 @@ def create_benchmark_summary_table(
 
     # Title strings
     title1 = "### Benchmark summary table"
-    title2 = f"### Ref = {refstr}; Dev = {devstr}"
+    title2 = f"### Ref = {refstr}"
+    title3 = f"### Dev = {devstr}"
 
     # Print header to file
     print("#" * 80, file=f)
     print(f"{title1 : <77}{'###'}", file=f)
+    print(f"{'###'  : <77}{'###'}", file=f)   
     print(f"{title2 : <77}{'###'}", file=f)
+    print(f"{title3 : <77}{'###'}", file=f)
     print("#" * 80, file=f)
     print(file=f)
 
@@ -4619,6 +4627,11 @@ def diff_list_to_text(
     if not isinstance(diff_list, list):
         raise ValueError("Argument 'diff_list' must be a list!")
 
+    # Use "Dev" and "Ref" for inserting into a header
+    if fancy_format:
+        refstr = "Ref"
+        devstr = "Dev"
+        
     # Strip out duplicates from diff_list
     # Prepare a message about species differences (or alternate msg)
     diff_list = util.unique_values(diff_list, drop=[None])
@@ -4629,12 +4642,14 @@ def diff_list_to_text(
         diff_text = f"{devstr} and {refstr} show {n_diff} differences"
     else:
         diff_text = f"{devstr} and {refstr} are identical"
-    diff_text = util.wrap_text(
-        diff_text,
-        width=83
-    )
-        
+
+    # If we are placing the text in a header,
+    # then trim the length of diff_text to fit.
     if fancy_format:
+        diff_text = util.wrap_text(
+            diff_text,
+            width=83
+        )
         diff_text = f"### {diff_text : <82}{'###'}"
         
     return diff_text.strip()
diff --git a/gcpy/oh_metrics.py b/gcpy/oh_metrics.py
index 96d5d45b..d3cc8438 100644
--- a/gcpy/oh_metrics.py
+++ b/gcpy/oh_metrics.py
@@ -370,10 +370,8 @@ def print_metrics(common_vars, dst):
         # ==============================================================
         print("#" * 79, file=f)
         print("### OH Metrics", file=f)
-        print(\
-          f"### Ref = {common_vars['refstr']}; Dev = {common_vars['devstr']}",
-          file=f
-        )
+        print(f"### Ref = {common_vars['refstr']}", file=f)
+        print(f"### Dev = {common_vars['devstr']}", file=f)
         print("#" * 79, file=f)
 
         # ==============================================================
diff --git a/gcpy/util.py b/gcpy/util.py
index a30f4fe3..8f46a903 100644
--- a/gcpy/util.py
+++ b/gcpy/util.py
@@ -2339,3 +2339,25 @@ def make_directory(
 
     if not os.path.isdir(dir_name):
         os.makedirs(dir_name)
+
+
+def trim_cloud_benchmark_label(
+        label
+):
+    """
+    Removes the first part of the cloud benchmark label string
+    (e.g. "gchp-c24-1Hr", "gcc-4x5-1Mon", etc) to avoid clutter.
+    """
+    if not isinstance(label, str):
+        raise ValueError("Argument 'label' must be a string!")
+
+    for v in [
+        "gcc-4x5-1Hr",
+        "gchp-c24-1Hr",
+        "gcc-4x5-1Mon",
+        "gchp-c24-1Mon",
+    ]:
+        if v in label:
+            label.replace(v, "")
+            
+    return label

From d69f373b11b097e4585d71c053a5497686c7e242 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Fri, 10 Feb 2023 10:35:23 -0500
Subject: [PATCH 38/54] Bug fix: Also look for "SpeciesConc_" in
 add_lumped_species_to_dataset

gcpy/util.py
- In routine "add_lumped_species_to_dataset", add a condition to the
  if statement to allow "SpeciesConcVV_" or "SpeciesConc_" as prefixes.
  This is necessary in case we are comparing older data.

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 gcpy/util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcpy/util.py b/gcpy/util.py
index 8f46a903..13e44e64 100644
--- a/gcpy/util.py
+++ b/gcpy/util.py
@@ -1276,7 +1276,7 @@ def add_lumped_species_to_dataset(
         # Get a dummy DataArray to use for initialization
         dummy_darr = None
         for var in ds.data_vars:
-            if prefix in var:
+            if prefix in var or prefix.replace("VV", "") in var:
                 dummy_darr = ds[var]
                 dummy_type = dummy_darr.dtype
                 dummy_shape = dummy_darr.shape

From 55a3f79efac0aeec63f426fcfd78b7606facf69d Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Fri, 10 Feb 2023 11:15:49 -0500
Subject: [PATCH 39/54] Updated indentation in the installation instructions

docs/source/Getting-Started-with-GCPy.rst
- Changed ".. code::" to ".. code-block::"
- Indented heading text by 3 spaces

docs/source/conf.py
- Updated version to 1.4.0

CHANGELOG.md
- Updated accordingly

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 CHANGELOG.md                              |   3 +
 docs/source/Getting-Started-with-GCPy.rst | 120 +++++++++++-----------
 docs/source/conf.py                       |   2 +-
 3 files changed, 66 insertions(+), 59 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9d414db8..b91b6ac1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,9 @@ All notable changes to GCPy will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ## Unreleased
+### Added
+- Updated installation documentation, we now recommend users to create
+  a conda environment using the `environment.yml` file
 
 ## [1.3.2] -- 2022-10-25
 
diff --git a/docs/source/Getting-Started-with-GCPy.rst b/docs/source/Getting-Started-with-GCPy.rst
index 5ad75a81..06a8a9a2 100644
--- a/docs/source/Getting-Started-with-GCPy.rst
+++ b/docs/source/Getting-Started-with-GCPy.rst
@@ -4,6 +4,8 @@
 Installing GCPy
 ###############
 
+.. _requirements:
+
 ============
 Requirements
 ============
@@ -22,7 +24,7 @@ python version 3.9.
 You can check if you already have conda installed by running the
 following command:
 
-.. code:: console
+.. code-block:: console
 
    $ conda --version
 
@@ -38,85 +40,87 @@ Miniconda follow instructions in the  `Miniconda docs <https://docs.conda.io/en/
 Steps to install GCPy and its dependencies
 ==========================================
 
-#. Step 0: Install conda if not already installed.
+#. Install conda if not already installed.
 
-See the Requirements section above.
+   See the :ref:`Requirements section <requirements>` above.
 
-#. Step 1: Download GCPy
+#. Download GCPy
 
-Create and go to the directory in which you would like to store GCPy. In
-this example we will store GCPy in a python/packages subdirectory in the
-home directory, but you can store it wherever you wish. You can also name
-the GCPy download whatever you want. In this example the GCPy directory
-is called GCPy.
+   Create and go to the directory in which you would like to store GCPy. In
+   this example we will store GCPy in a python/packages subdirectory in the
+   home directory, but you can store it wherever you wish. You can also name
+   the GCPy download whatever you want. In this example the GCPy directory
+   is called GCPy.
 
-.. code:: console
+   .. code-block:: console
 
-   $ cd $HOME/python/packages
-   $ git clone https://github.com/geoschem/gcpy.git GCPy
-   $ cd GCPy
+      $ cd $HOME/python/packages
+      $ git clone https://github.com/geoschem/gcpy.git GCPy
+      $ cd GCPy
 
-#. Step 2: Create new python virtual environment for GCPy
+#. Create new python virtual environment for GCPy
 
-A python virtual environment is a named set of python installs,
-e.g. packages, that are independent of other virtual environments.
-Using an environment dedicated to GCPy is useful to maintain a set
-of package dependencies compatible with GCPy without interfering with
-python packages you use for other work. You can create a python virtual
-environment from anywhere on your system. It will be stored in your
-conda install rather than the directory from which you create it.
+   A python virtual environment is a named set of python installs,
+   e.g. packages, that are independent of other virtual environments.
+   Using an environment dedicated to GCPy is useful to maintain a set
+   of package dependencies compatible with GCPy without interfering with
+   python packages you use for other work. You can create a python virtual
+   environment from anywhere on your system. It will be stored in your
+   conda install rather than the directory from which you create it.
 
-You can create a python virtual environment using a file that lists
-all packages and their versions to be included in the environment.
-GCPy includes such as file, environment.yml, located in the top-level
-directory of the package.
+   You can create a python virtual environment using a file that lists
+   all packages and their versions to be included in the environment.
+   GCPy includes such as file, environment.yml, located in the top-level
+   directory of the package.
 
-Run the following command at the command prompt to create a virtual
-environment for use with GCPy. You can name environment whatever you
-wish. This example names it gcpy_env.
+   Run the following command at the command prompt to create a virtual
+   environment for use with GCPy. You can name environment whatever you
+   wish. This example names it gcpy_env.
 
-.. code:: console
+   .. code-block:: console
 
-   $ conda env create -n gcpy_env --file=environment.yml
+      $ conda env create -n gcpy_env --file=environment.yml
 
-Once successfully created you can load the environment by running the
-following command, specifying the name of your environment.
+   Once successfully created you can load the environment by running the
+   following command, specifying the name of your environment.
 
-.. code:: console
+   .. code-block:: console
 
-   $ conda activate gcpy_env
+      $ conda activate gcpy_env
 
-To exit the environment do the following:
+   To exit the environment do the following:
 
-.. code:: console
+   .. code-block:: console
 
-   $ conda deactivate
+      $ conda deactivate
 
-#. Step 3: Add GCPy to python path
+#. Add GCPy to python path
 
-The environment variable PYTHONPATH specifies the locations of python
-libraries on your system that are not included in your conda environment.
-If GCPy is included in PYTHONPATH then python will recognize its
-existence when you try to use. Add the following line to your startup
-script, e.g. .bashrc, and edit the path to where you are storing GCPy.
+   The environment variable :envvar:`PYTHONPATH` specifies the
+   locations of python libraries on your system that are not included
+   in your conda environment. If GCPy is included in
+   :envvar:`PYTHONPATH` then python will recognize its existence
+   when you try to use. Add the following line to your startup script,
+   e.g. :file:`.bashrc`, and edit the path to where you are storing
+   GCPy.  
 
-.. code:: console
+   .. code-block:: bash
 
-   PYTHONPATH=$PYTHONPATH:$HOME/python/packages/GCPy
+      PYTHONPATH=$PYTHONPATH:$HOME/python/packages/GCPy
 
-#. Step 4: Perform a simple test
+#. Perform a simple test
 
-Run the following commands in your terminal to check if the 
-installation was succcesful.
+   Run the following commands in your terminal to check if the 
+   installation was succcesful.
 
-.. code:: console
+   .. code-block:: console
 
-   $ source $HOME/.bashrc     # Alternatively close and reopen your terminal
-   $ echo $PYTHONPATH         # Check it contains path to your GCPy clone
-   $ conda activate gcpy_env    
-   $ conda list               # Check it contains contents of gcpy env file
-   $ python
-   \>>> import gcpy
+      $ source $HOME/.bashrc     # Alternatively close and reopen your terminal
+      $ echo $PYTHONPATH         # Check it contains path to your GCPy clone
+      $ conda activate gcpy_env    
+      $ conda list               # Check it contains contents of gcpy env file
+      $ python
+      \>>> import gcpy
 
 If no errors were encountered then you successfully installed GCPy and
 its dependencies.
@@ -134,7 +138,7 @@ environment.yml file included in the package.
 Run the following commands to update both your GCPy version to the
 latest available.
 
-.. code:: console
+.. code-block:: console
 
    $ cd $HOME/python/packages/GCPy
    $ git fetch -p
@@ -143,7 +147,7 @@ latest available.
 
 You can also checkout an older version by doing the following:
 
-.. code:: console
+.. code-block:: console
 
    $ cd $HOME/python/packages/GCPy
    $ git fetch -p
@@ -153,7 +157,7 @@ You can also checkout an older version by doing the following:
 Once you have the version you wish you use you can do the following
 commands to then update your virtual environment:
 
-.. code:: console
+.. code-block:: console
 
    $ source activate gcpy_env
    $ cd $HOME/python/packages/GCPy
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 1fc84fab..616aa758 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -22,7 +22,7 @@
 author = 'GEOS-Chem Support Team'
 
 # The full version, including alpha/beta/rc tags
-release = '1.3.1'
+release = '1.4.0'
 
 # -- General configuration ---------------------------------------------------
 

From 334759edb2a45fd3dcf4caccdb046e56dca904bb Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Fri, 24 Feb 2023 11:18:01 -0500
Subject: [PATCH 40/54] Add newline to diff-off-diffs refstr/devstr if string
 is too long

gcpy/benchmark.py
- Add function "diff_of_diffs_toprow_title".  This creates the refstr
  and devstr for the diff-of-diffs plots (which are plotted in the top
  row of a 6-panel plot page).  The function will add a newline character
  in the middle of the string if the string would be greater than 40
  characters.  This prevents the title strings in the top row of plots
  from overlapping each other.

benchmark/run_benchmark.py
benchmark/modules/run_1yr_fullchem_benchmark.py
- Now call "diff_of_diffs_toprow_title" to generate the diff-of-diffs
  refstr and devstr.

CHANGELOG.md
- Updated accordingly

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 CHANGELOG.md                                  |  4 +-
 .../modules/run_1yr_fullchem_benchmark.py     | 14 ++----
 benchmark/run_benchmark.py                    | 14 ++----
 gcpy/benchmark.py                             | 46 +++++++++++++++++++
 4 files changed, 54 insertions(+), 24 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 74d77a9f..8f896ff8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,8 +8,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 ### Added
 - Updated installation documentation, we now recommend users to create
   a conda environment using the `environment.yml` file
-
-### Added
 - Benchmark summary table output (intended for 1hr & 1mo benchmarks)
 - Species/emissions/inventories that differ between Dev & Ref versions are now printed at the top of the benchmark emissions, inventory, and global mass tables.  if there are too many species with diffs, an alternate message is printed.
 - New functions in `benchmark.py` and `util.py` to facilitate printing of the species/emissions/inventories that differ between Dev & Ref versions.
@@ -19,6 +17,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Replaced format with f-strings in `benchmark.py`, `util.py`, `plot.py`, `oh_metrics.py`, `ste_flux.py`
 - Abstract some common in `benchmark.py` into functions
 - Replaced direct calls to `yaml.load` with `util.read_config.file` (mostly using `quiet=True`)
+- Restore tag information to refstr and devstr 
+- Add a newline to diff-of-diffs refstr and devstr if the string is too long.  This prevents plot titles in the top 2 rows of a six-panel plot from running together.
 
 ## [1.3.2] -- 2022-10-25
 
diff --git a/benchmark/modules/run_1yr_fullchem_benchmark.py b/benchmark/modules/run_1yr_fullchem_benchmark.py
index 73d1c880..5cb9bb0a 100755
--- a/benchmark/modules/run_1yr_fullchem_benchmark.py
+++ b/benchmark/modules/run_1yr_fullchem_benchmark.py
@@ -242,17 +242,9 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev):
     gchp_vs_gcc_devstr = config["data"]["dev"]["gchp"]["version"]
     gchp_vs_gchp_refstr = config["data"]["ref"]["gchp"]["version"]
     gchp_vs_gchp_devstr = config["data"]["dev"]["gchp"]["version"]
-    diff_of_diffs_refstr = (
-        config["data"]["dev"]["gcc"]["version"]
-        + " - "
-        + config["data"]["ref"]["gcc"]["version"]
-    )
-    diff_of_diffs_devstr = (
-        config["data"]["dev"]["gchp"]["version"]
-        + " - "
-        + config["data"]["ref"]["gchp"]["version"]
-    )
-
+    diff_of_diffs_refstr = bmk.diff_of_diffs_toprow_title(config, "gcc")
+    diff_of_diffs_devstr = bmk.diff_of_diffs_toprow_title(config, "gchp")
+    
     ########################################################################
     ###    THE REST OF THESE SETTINGS SHOULD NOT NEED TO BE CHANGED      ###
     ########################################################################
diff --git a/benchmark/run_benchmark.py b/benchmark/run_benchmark.py
index e7c14e92..fc776cc8 100755
--- a/benchmark/run_benchmark.py
+++ b/benchmark/run_benchmark.py
@@ -300,17 +300,9 @@ def run_benchmark_default(config):
     gchp_vs_gcc_devstr = config["data"]["dev"]["gchp"]["version"]
     gchp_vs_gchp_refstr = config["data"]["ref"]["gchp"]["version"]
     gchp_vs_gchp_devstr = config["data"]["dev"]["gchp"]["version"]
-    diff_of_diffs_refstr = (
-        config["data"]["dev"]["gcc"]["version"]
-        + " - "
-        + config["data"]["ref"]["gcc"]["version"]
-    )
-    diff_of_diffs_devstr = (
-        config["data"]["dev"]["gchp"]["version"]
-        + " - "
-        + config["data"]["ref"]["gchp"]["version"]
-    )
-
+    diff_of_diffs_refstr = bmk.diff_of_diffs_toprow_title(config, "gcc")
+    diff_of_diffs_devstr = bmk.diff_of_diffs_toprow_title(config, "gchp")
+    
     ########################################################################
     ###    THE REST OF THESE SETTINGS SHOULD NOT NEED TO BE CHANGED      ###
     ########################################################################
diff --git a/gcpy/benchmark.py b/gcpy/benchmark.py
index ab4ee277..20e162ef 100644
--- a/gcpy/benchmark.py
+++ b/gcpy/benchmark.py
@@ -4653,3 +4653,49 @@ def diff_list_to_text(
         diff_text = f"### {diff_text : <82}{'###'}"
         
     return diff_text.strip()
+
+
+def diff_of_diffs_toprow_title(config, model):
+    """
+    Creates the diff-of-diffs plot title for the top row of the
+    six-plot output.  If the title string is too long (as empirically
+    determined), then a newline will be inserted in order to prevent
+    the title strings from overlapping.
+
+    Args:
+    -----
+    config : dict
+       Dictionary containing the benchmark options (as read from a
+       YAML file such as 1mo_benchmark.yml, etc.)
+    model: str
+       The model to plot.  Accepted values are "gcc" or "gchp".
+
+    Returns:
+    --------
+    title: str
+        The plot title string for the diff-of-diff
+    """
+    if not isinstance(config, dict):
+        msg = "The 'config' argument must be of type 'dict`!"
+        raise ValueError(msg)
+    if not isinstance(model, str):
+        msg = "The 'model' argument must be of type 'str'!"
+        raise ValueError(msg)
+    if not "gcc" in model and not "gchp" in model:
+        msg = "The 'model' argument must be either 'gcc' or 'gchp'!"
+        raise ValueError(msg)
+
+    title = (
+        config["data"]["dev"][model]["version"]
+        + " - "
+        + config["data"]["ref"][model]["version"]
+    )
+
+    if len(title) > 40:
+        title = (
+            config["data"]["dev"][model]["version"]
+            + " -\n"
+            + config["data"]["ref"][model]["version"]
+        )
+
+    return title

From 6432a82252b6f88e40a07172b27fe2ef43f69e21 Mon Sep 17 00:00:00 2001
From: Killian Murphy <killian.murphy@york.ac.uk>
Date: Mon, 27 Feb 2023 11:30:37 +0000
Subject: [PATCH 41/54] Start reworking regridding docs

---
 docs/source/Regridding.rst | 268 ++++++++++++++++++-------------------
 1 file changed, 134 insertions(+), 134 deletions(-)

diff --git a/docs/source/Regridding.rst b/docs/source/Regridding.rst
index f52475de..8005f4bd 100644
--- a/docs/source/Regridding.rst
+++ b/docs/source/Regridding.rst
@@ -15,91 +15,26 @@ resolution and any grid type available in GEOS-Chem, including lat/lon
 (global or non-global), global standard cubed-sphere, and global
 stretched-grid. GCPy also supports arbitrary vertical regridding
 across different vertical resolutions.
-.. _regrid-plot:
-
-===============================
-Regridding for Plotting in GCPy
-===============================
-
-When plotting in GCPy (e.g. through :code:`compare_single_level()` or
-:code:`compare_zonal_mean()`), the vast majority of regridding is
-handled internally. You can optionally request a specific
-horizontal comparison resolution in :code:`compare_single_level()``
-and :code:`compare_zonal_mean()`.  Note that all regridding in these
-plotting functions only applies to the comparison panels (not the top
-two panels which show data directly from each dataset). There are only
-two scenarios where you will need to pass extra information to GCPy to
-help it determine grids and to regrid when plotting.
-
-Pass stretched-grid file paths
-------------------------------
-
-Stretched-grid parameters cannot currently be automatically determined
-from grid coordinates. If you are plotting stretched-grid data in
-:code:`compare_single_level()` or :code:`compare_zonal_mean()` (even
-if regridding to another format), you need to use the
-:code:`sg_ref_path` or :code:`sg_dev_path` arguments to pass the path
-of your original stretched-grid restart file to GCPy.
-If using :code:`single_panel()`, pass the file path using
-:code:`sg_path`. Stretched-grid restart files created using GCPy
-contain the specified stretch factor, target longitude, and
-target latitude in their metadata.  Currently, output files from
-stretched-grid runs of GCHP do not contain any metadata that specifies
-the stretched-grid used.
-
-Pass vertical grid parameters for non-72/47-level grids
--------------------------------------------------------
-
-GCPy automatically handles regridding between different vertical grids
-when plotting except when you pass a dataset that is not on the
-typical 72-level or 47-level vertical grids. If using a different
-vertical grid, you will need to pass the corresponding `grid
-parameters
-<http://wiki.seas.harvard.edu/geos-chem/index.php/GEOS-Chem_vertical_grids#Reference_section_for_vertical_grids>`_
-using the :code:`ref_vert_params` or :code:`dev_vert_params` keyword
-arguments.
-
-Automatic regridding decision process
--------------------------------------
-
-When you do not specify a horizontal comparison resolution using the
-:code:`cmpres` argument in :code:`compare_single_level()` and
-:code:`compare_zonal_mean()`, GCPy follows several steps to determine
-what comparison resolution it should use:
-
-- If both input grids are lat/lon, use the highest resolution between
-  them (don't regrid if they are the same resolution).
-- Else if one grid is lat/lon and the other is cubed-sphere (standard
-  or stretched-grid), use a 1x1.25 lat/lon grid.
-- Else if both grids are cubed-sphere and you are plotting zonal
-  means, use a 1x1.25 lat/lon grid.
-- Else if both grids are standard cubed-sphere, use the highest
-  resolution between them (don't regrid if they are the same
-  resolution).
-- Else if one or more grids is a stretched-grid, use the grid of the
-  ref dataset.
 
-For differing vertical grids, the smaller vertical grid is currently
-used for comparisons.
+Regridding with GCPy is currently undergoing an overhaul. As of the current
+release, regridding is split into two different categories - regridding 
+GEOS-Chem Classic format files (lat/lon), and regridding GCHP format files
+(standard cubed-sphere, stretched cubed-sphere).
 
-================
-Regridding Files
-================
+====================================
+Regridding Files - GEOS-Chem Classic
+====================================
 
-You can regrid existing GEOS-Chem restart or output diagnostic files
-between lat/lon and cubed-sphere formats using
-:code:`gcpy.file_regrid`. :code:`gcpy.file_regrid` can either be
-called directly from the command line using :code:`python -m
-gcpy.file_regrid` or as a function
-(:code:`gcpy.file_regrid.file_regrid()`) from a Python script or
-interpreter. The syntax of :code:`file_regrid` is as follows:
+You can regrid existing GEOS-Chem Classic restart or output diagnostic files
+between lat/lon resolutions using :code:`gcpy.file_regrid`.
+:code:`gcpy.file_regrid` can either be called directly from the command line
+using :code:`python -m gcpy.file_regrid` or as a function
+(:code:`gcpy.file_regrid.file_regrid()`) from a Python script or interpreter.
+The syntax of :code:`file_regrid` is as follows:
 
 .. code-block:: python
 
-   def file_regrid(fin, fout, dim_format_in, dim_format_out,
-   cs_res_out=0, ll_res_out='0x0',
-   sg_params_in=[1.0, 170.0, -90.0], sg_params_out=[1.0, 170.0, -90.0]
-   ):
+   def file_regrid(fin, fout, dim_format_in, dim_format_out, ll_res_out='0x0'):
    """
    Regrids an input file to a new horizontal grid specification and saves it
    as a new file.
@@ -118,90 +53,86 @@ Required Arguments:
 
 .. option:: dim_format_in : str
 
-      Format of the input file's dimensions (choose from: classic,
-      checkpoint, diagnostic), where classic denotes lat/lon and
-      checkpoint / diagnostic are cubed-sphere formats
+      Format of the input file's dimensions (set this to 'classic' - denoting
+      a GEOS-Chem Classic file with a lat/lon grid)
 
 .. option:: dim_format_out : str
 
-      Format of the output file's dimensions (choose from: classic,
-      checkpoint, diagnostic), where classic denotes lat/lon and
-      checkpoint / diagnostic are cubed-sphere formats
+      Format of the output file's dimensions (set this to 'classic' - denoting
+      a GEOS-Chem Classic file with a lat/lon grid)
 
 Optional arguments:
 -------------------
 
-.. option:: cs_res_out : int
-
-      The cubed-sphere resolution of the output dataset. Not used if
-      dim_format_out is classic.
-
-      Default value: 0
-
 .. option:: ll_res_out : str
 
-      The lat/lon resolution of the output dataset. Not used if
-      dim_format_out is not classic/
+      The lat/lon resolution of the output dataset.
 
       Default value: '0x0'
 
-.. option:: sg_params_in : list[float, float, float]
-
-      Input grid stretching parameters [stretch-factor, target
-      longitude, target latitude]. Not used if dim_format_in is classic
-
-      Default value: [1.0, 170.0, -90.0] (No stretching)
-
-.. option:: sg_params_out : list[float, float, float]
-
-      Output grid stretching parameters [stretch-factor, target
-      longitude, target latitude].  Not used if dim_format_out is classic.
-
-      Default value: [1.0, 170.0, -90.0] (No stretching)
-
-There are three dimension formats available for regridding: :literal:`classic`
-(GEOS-Chem Classic lat/lon format), :literal:`checkpoint` (GCHP restart file
-format), and :literal:`diagnostic` (GCHP output file format). You can
-regrid between any of these formats using :code:`file_regrid`, as well as
-between different resolutions  and/or grid-types within each dimension
-format (e.g. standard cubed-sphere checkpoint to stretched-grid
-checkpoint). Note that although the :code:`cs_res_out` and
-:code:`ll_res_out` parameters are technically optional in the
-function, you must specify at least one of these in your call to
-:code:`file_regrid`.
+There is now only one dimension format available for regridding files using the
+:code:`gcpy.file_regrid` method: :literal:`classic`. You must specify
+:literal:`classic` as the value of both :code:`dim_format_in` and
+:code:`dim_format_out`, as well as specifying a resolution as the value of 
+:code:`ll_res_out`.
 
 As stated previously, you can either call
 :code:`file_regrid.file_regrid()` directly or call it from the command
 line using :code:`python -m gcpy.file_regrid ARGS`. An example command
-line call (separated by line for readability) for regridding a C90
-cubed-sphere restart file to a C48 stretched-grid with a stretch
-factor of 3, a target longitude of 260.0, and a target latitude of
-40.0 looks like:
+line call (separated by line for readability) for regridding a 2x2.5 lat/lon
+restart file to a 4x5 lat/lon grid looks like:
 
 .. code-block::
 
-   python -m gcpy.file_regrid             \
-         -i initial_GEOSChem_rst.c90_standard.nc   \
-         --dim_format_in checkpoint      \
-         -o sg_restart_c48_3_260_40.nc       \
-         --cs_res_out 48            \
-         --sg_params_out 3.0 260.0 40.0      \
-         --dim_format_out checkpoint
+   python -m gcpy.file_regrid                     \
+         --filein initial_GEOSChem_rst.2x2.5.nc   \
+         --dim_format_in classic                  \
+         --fileout GEOSChem_rst.4x5.nc            \
+         --ll_res_out 4x5                         \
+         --dim_format_out classic
 
 .. _regrid-sparselt:
 
-=====================================
-Regridding with gridspec and sparselt
-=====================================
+=======================
+Regridding Files - GCHP
+=======================
 
-GCPy 1.3.0 and later supports regridding with the `gridspec <https://github.com/liambindle/gridspec>`_ and `sparselt <https://github.com/liambindle/sparselt>`_
-utilities.
+GCHP regridding is where the first steps of the overhaul in GCPy regridding have
+happened. We are moving towards an integrated approach for all GEOS-Chem grid
+types using `gridspec <https://github.com/liambindle/gridspec>`_ and
+`sparselt <https://github.com/liambindle/sparselt>`_. For now, this is only
+supported for GCHP grid formats, but in a later GCPy this will be the single
+method for regridding all GEOS-Chem grid formats.
 
 .. _regrid-sparselt-firsttime:
 
 First-time setup
 -----------------
 
+Until GCPy contains a complete regridding implementation that works for all 
+GEOS-Chem grid formats, we recommend that you create a small
+`conda <https://docs.conda.io/en/latest/>`_ environment in which to carry out
+your GCHP regridding.
+
+The following conda `environment file <https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#creating-an-environment-from-an-environment-yml-file>`_
+will get you set up with an environment for regridding with 
+:literal:`gridspec` and :literal:`sparselt`:
+
+.. code-block:: yaml
+
+   name: gchp_regridding
+   channels:
+     - conda-forge
+   dependencies:
+     - python=3.10
+     - esmf
+     - gridspec
+     - numpy
+     - requests
+     - sparselt
+     - xarray
+     - xesmf
+
 #. Install command line tool gridspec in your bin directory
 
    .. code-block:: console
@@ -440,3 +371,72 @@ that you can modify.
 
    # Write xarray DataSet contents to netcdf file.
    ds.to_netcdf("my_data_latlon90x180.nc")
+
+.. _regrid-plot:
+
+===============================
+Regridding for Plotting in GCPy
+===============================
+
+When plotting in GCPy (e.g. through :code:`compare_single_level()` or
+:code:`compare_zonal_mean()`), the vast majority of regridding is
+handled internally. You can optionally request a specific
+horizontal comparison resolution in :code:`compare_single_level()``
+and :code:`compare_zonal_mean()`.  Note that all regridding in these
+plotting functions only applies to the comparison panels (not the top
+two panels which show data directly from each dataset). There are only
+two scenarios where you will need to pass extra information to GCPy to
+help it determine grids and to regrid when plotting.
+
+Pass stretched-grid file paths
+------------------------------
+
+Stretched-grid parameters cannot currently be automatically determined
+from grid coordinates. If you are plotting stretched-grid data in
+:code:`compare_single_level()` or :code:`compare_zonal_mean()` (even
+if regridding to another format), you need to use the
+:code:`sg_ref_path` or :code:`sg_dev_path` arguments to pass the path
+of your original stretched-grid restart file to GCPy.
+If using :code:`single_panel()`, pass the file path using
+:code:`sg_path`. Stretched-grid restart files created using GCPy
+contain the specified stretch factor, target longitude, and
+target latitude in their metadata.  Currently, output files from
+stretched-grid runs of GCHP do not contain any metadata that specifies
+the stretched-grid used.
+
+Pass vertical grid parameters for non-72/47-level grids
+-------------------------------------------------------
+
+GCPy automatically handles regridding between different vertical grids
+when plotting except when you pass a dataset that is not on the
+typical 72-level or 47-level vertical grids. If using a different
+vertical grid, you will need to pass the corresponding `grid
+parameters
+<http://wiki.seas.harvard.edu/geos-chem/index.php/GEOS-Chem_vertical_grids#Reference_section_for_vertical_grids>`_
+using the :code:`ref_vert_params` or :code:`dev_vert_params` keyword
+arguments.
+
+Automatic regridding decision process
+-------------------------------------
+
+When you do not specify a horizontal comparison resolution using the
+:code:`cmpres` argument in :code:`compare_single_level()` and
+:code:`compare_zonal_mean()`, GCPy follows several steps to determine
+what comparison resolution it should use:
+
+- If both input grids are lat/lon, use the highest resolution between
+  them (don't regrid if they are the same resolution).
+- Else if one grid is lat/lon and the other is cubed-sphere (standard
+  or stretched-grid), use a 1x1.25 lat/lon grid.
+- Else if both grids are cubed-sphere and you are plotting zonal
+  means, use a 1x1.25 lat/lon grid.
+- Else if both grids are standard cubed-sphere, use the highest
+  resolution between them (don't regrid if they are the same
+  resolution).
+- Else if one or more grids is a stretched-grid, use the grid of the
+  ref dataset.
+
+For differing vertical grids, the smaller vertical grid is currently
+used for comparisons.
+
+

From 9fe8d405b5c984bfa03fb548b72cddd0ac5e1d60 Mon Sep 17 00:00:00 2001
From: Killian Murphy <killian.murphy@york.ac.uk>
Date: Mon, 27 Feb 2023 14:52:25 +0000
Subject: [PATCH 42/54] Continue reworking regridding docs

---
 docs/source/Regridding.rst | 174 ++++++++++++++++++++++++++++++++++---
 1 file changed, 163 insertions(+), 11 deletions(-)

diff --git a/docs/source/Regridding.rst b/docs/source/Regridding.rst
index 8005f4bd..251ab465 100644
--- a/docs/source/Regridding.rst
+++ b/docs/source/Regridding.rst
@@ -70,7 +70,7 @@ Optional arguments:
 
       Default value: '0x0'
 
-There is now only one dimension format available for regridding files using the
+There is now only one grid format supported for regridding files using the
 :code:`gcpy.file_regrid` method: :literal:`classic`. You must specify
 :literal:`classic` as the value of both :code:`dim_format_in` and
 :code:`dim_format_out`, as well as specifying a resolution as the value of 
@@ -104,9 +104,59 @@ types using `gridspec <https://github.com/liambindle/gridspec>`_ and
 supported for GCHP grid formats, but in a later GCPy this will be the single
 method for regridding all GEOS-Chem grid formats.
 
+Currently, this method is only available from the command line. The syntax of
+:code:`regrid_restart_file` is as follows:
+
+Required Arguments:
+-------------------
+
+.. option:: file_to_regrid : str
+
+      The GCHP restart file to be regridded
+
+.. option:: regridding_weights_file : str
+      
+      Regridding weights to be used in the regridding transformation, generated
+      by :literal:`ESMF_RegridWeightGen`
+
+.. option:: template_file : str
+
+      The GCHP restart file to use as a template for the regridded restart
+      file - attributes, dimensions, and variables for the output file will be
+      taken from this template. Typically this will be the same file as the file
+      you are regridding!
+
+Optional arguments:
+-------------------
+
+.. option:: --stretched-grid : switch 
+      
+      A switch to indicate that the target grid is a stretched cubed-sphere grid
+
+.. option:: --stretch-factor : float
+
+      The grid stretching factor for the target stretched grid. Only takes
+      effect when :code:`--stretched-grid` is set. See the
+      `GCHP documentation <https://gchp.readthedocs.io/en/latest/supplement/stretched-grid.html#choose-stretching-parameters>`_
+      for more information
+
+.. option:: --target-latitude : float
+
+      The latitude of the centre point for stretching the target grid. Only
+      takes effect when :code:`--stretched-grid` is set. See the
+      `GCHP documentation <https://gchp.readthedocs.io/en/latest/supplement/stretched-grid.html#choose-stretching-parameters>`_
+      for more information
+
+.. option:: --target-longitude : float
+
+      The longitude of the centre point for stretching the target grid. Only
+      takes effect when :code:`--stretched-grid` is set. See the
+      `GCHP documentation <https://gchp.readthedocs.io/en/latest/supplement/stretched-grid.html#choose-stretching-parameters>`_
+      for more information
+
 .. _regrid-sparselt-firsttime:
 
-First-time setup
+First Time Setup
 -----------------
 
 Until GCPy contains a complete regridding implementation that works for all 
@@ -133,27 +183,129 @@ will get you set up with an environment for regridding with
      - xarray
      - xesmf
 
-#. Install command line tool gridspec in your bin directory
+After installing and switching to this new conda environment, you should have
+the :literal:`gridspec` commands available to you at the command line.
+
+.. _regrid-sparselt-gridcombo:
+
+Regridding
+----------
+
+Regridding with :literal:`gridspec` and :literal:`sparselt` is a three stage
+process:
+
+#. Create grid specifications for the source and target grids using
+   :literal:`gridspec`
+
+#. Create regridding weights for the transformation using
+   :literal:`ESMF_RegridWeightGen`
+
+#. Run the regridding operation using the new :code:`regrid_restart_file`
+   submodule of GCPy
+
+
+Standard Cubed-Sphere Regridding
+--------------------------------
+
+We will use the example of regridding the out-of-the-box
+:literal:`GEOSChem.Restart.20190701_0000z.c48.nc4` restart file from C48 to
+C60 to demonstrate the standard cubed-sphere regridding process:
+
+#. Create a source grid specification using :code:`gridspec-create`:
 
    .. code-block:: console
 
-      $ pip install git+https://github.com/LiamBindle/gridspec.git
+      $ gridspec-create gcs 48
 
-#. Make sure location of installation is added to path in your bashrc
-   (or equivalent)
+   This will produce 7 files - :literal:`c48_gridspec.nc` and
+   :literal:`c48.tile[1-6].nc`
 
-   .. code-block:: bash
+#. Create a target grid specification using :code:`gridspec-create`:
+
+   .. code-block:: console
+
+      $ gridspec-create gcs 60
 
-      $ export PATH=/path/to/home/.local/bin:$PATH
+   Again, this will produce 7 files - :literal:`c60_gridspec` and
+   :literal:`c60.tile[1-6].nc`
 
-#. Install sparselt as a python package.
+#. Create the regridding weights for the regridding transformation using
+   :code:`ESMF_RegridWeightGen`:
 
    .. code-block:: console
 
-      $ conda install -c conda-forge sparselt==0.1.3
+      $ ESMF_RegridWeightGen            \
+          --source c48_gridspec.nc      \
+          --destination c60_gridspec.nc \
+          --method conserve             \
+          --weight c48_to_c60_weights.nc 
 
-.. _regrid-sparselt-gridcombo:
+   This will produce a log file, :literal:`PET0.RegridWeightGen.Log`, and our
+   regridding weights, :literal:`c48_to_c60_weights.nc`
+
+#. Finally, use the grid weights produced in step 3 to complete the regridding:
+
+   .. code-block:: console
+
+      $ python -m gcpy.regrid_restart_file    \
+          GEOSChem.Restart.20190701_0000z.c48 \
+          c48_to_c60_weights.nc               \
+          GEOSChem.Restart.20190701_0000z.c48
+
+   This will produce a single file, :literal:`new_restart_file.nc`, regridded 
+   from C48 to C60, that you can rename and use as you please.
+
+Stretched Cubed-Sphere Regridding
+---------------------------------
+
+We will use the example of regridding the out-of-the-box
+:literal:`GEOSChem.Restart.20190701_0000z.c48.nc4` restart file from C48 to
+a C120 base resolution stretched grid with a stretch factor of 4.0 over Bermuda
+to demonstrate the stretched cubed-sphere regridding process:
+
+#. Create a source grid specification using :code:`gridspec-create`:
+
+   .. code-block:: console
+
+      $ gridspec-create gcs 48
+
+   This will produce 7 files - :literal:`c48_gridspec.nc` and
+   :literal:`c48.tile[1-6].nc`
+
+#. Create a target grid specification using :code:`gridspec-create`:
+
+   .. code-block:: console
+
+      $ gridspec-create gcs 60
+
+   Again, this will produce 7 files - :literal:`c60_gridspec` and
+   :literal:`c60.tile[1-6].nc`
+
+#. Create the regridding weights for the regridding transformation using
+   :code:`ESMF_RegridWeightGen`:
+
+   .. code-block:: console
+
+      $ ESMF_RegridWeightGen            \
+          --source c48_gridspec.nc      \
+          --destination c60_gridspec.nc \
+          --method conserve             \
+          --weight c48_to_c60_weights.nc 
+
+   This will produce a log file, :literal:`PET0.RegridWeightGen.Log`, and our
+   regridding weights, :literal:`c48_to_c60_weights.nc`
+
+#. Finally, use the grid weights produced in step 3 to complete the regridding:
+
+   .. code-block:: console
+
+      $ python -m gcpy.regrid_restart_file    \
+          GEOSChem.Restart.20190701_0000z.c48 \
+          c48_to_c60_weights.nc               \
+          GEOSChem.Restart.20190701_0000z.c48
 
+   This will produce a single file, :literal:`new_restart_file.nc`, regridded 
+   from C48 to C60, that you can rename and use as you please.
 One-time setup per grid resolution combination
 ----------------------------------------------
 

From 1554e1041e4b3808d1497cfb72802b4d7d08cef6 Mon Sep 17 00:00:00 2001
From: Killian Murphy <killian.murphy@york.ac.uk>
Date: Mon, 27 Feb 2023 15:54:08 +0000
Subject: [PATCH 43/54] Complete initial rework of regridding docs

---
 docs/source/Regridding.rst | 254 ++++---------------------------------
 1 file changed, 22 insertions(+), 232 deletions(-)

diff --git a/docs/source/Regridding.rst b/docs/source/Regridding.rst
index 251ab465..ea9da190 100644
--- a/docs/source/Regridding.rst
+++ b/docs/source/Regridding.rst
@@ -21,6 +21,8 @@ release, regridding is split into two different categories - regridding
 GEOS-Chem Classic format files (lat/lon), and regridding GCHP format files
 (standard cubed-sphere, stretched cubed-sphere).
 
+.. _regrid-classic:
+
 ====================================
 Regridding Files - GEOS-Chem Classic
 ====================================
@@ -91,7 +93,7 @@ restart file to a 4x5 lat/lon grid looks like:
          --ll_res_out 4x5                         \
          --dim_format_out classic
 
-.. _regrid-sparselt:
+.. _regrid-gchp:
 
 =======================
 Regridding Files - GCHP
@@ -154,7 +156,7 @@ Optional arguments:
       `GCHP documentation <https://gchp.readthedocs.io/en/latest/supplement/stretched-grid.html#choose-stretching-parameters>`_
       for more information
 
-.. _regrid-sparselt-firsttime:
+.. _regrid-gchp-firsttime:
 
 First Time Setup
 -----------------
@@ -186,7 +188,7 @@ will get you set up with an environment for regridding with
 After installing and switching to this new conda environment, you should have
 the :literal:`gridspec` commands available to you at the command line.
 
-.. _regrid-sparselt-gridcombo:
+.. _regrid-gchp-procedure:
 
 Regridding
 ----------
@@ -276,24 +278,29 @@ to demonstrate the stretched cubed-sphere regridding process:
 
    .. code-block:: console
 
-      $ gridspec-create gcs 60
+      $ gridspec-create sgcs 120 -s 4.0 -t 32.0 -64.0
 
-   Again, this will produce 7 files - :literal:`c60_gridspec` and
-   :literal:`c60.tile[1-6].nc`
+   Here, the :code:`-s` option denotes the stretch factor and the :code:`-t`
+   option denotes the latitude / longitude of the centre point of the grid
+   stretch. 
+   
+   Again, this will produce 7 files - :literal:`c120_..._gridspec.nc` and
+   :literal:`c120_..._tile[1-6].nc`, where :literal:`...` denotes randomly
+   generated characters.
 
 #. Create the regridding weights for the regridding transformation using
    :code:`ESMF_RegridWeightGen`:
 
    .. code-block:: console
 
-      $ ESMF_RegridWeightGen            \
-          --source c48_gridspec.nc      \
-          --destination c60_gridspec.nc \
-          --method conserve             \
-          --weight c48_to_c60_weights.nc 
+      $ ESMF_RegridWeightGen                 \
+          --source c48_gridspec.nc           \
+          --destination c120_..._gridspec.nc \
+          --method conserve                  \
+          --weight c48_to_c120_stretched_weights.nc 
 
    This will produce a log file, :literal:`PET0.RegridWeightGen.Log`, and our
-   regridding weights, :literal:`c48_to_c60_weights.nc`
+   regridding weights, :literal:`c48_to_c120_stretched_weights.nc`
 
 #. Finally, use the grid weights produced in step 3 to complete the regridding:
 
@@ -301,228 +308,12 @@ to demonstrate the stretched cubed-sphere regridding process:
 
       $ python -m gcpy.regrid_restart_file    \
           GEOSChem.Restart.20190701_0000z.c48 \
-          c48_to_c60_weights.nc               \
+          c48_to_c120_stretched_weights.nc    \
           GEOSChem.Restart.20190701_0000z.c48
 
    This will produce a single file, :literal:`new_restart_file.nc`, regridded 
-   from C48 to C60, that you can rename and use as you please.
-One-time setup per grid resolution combination
-----------------------------------------------
-
-#. Create a directory structure to store files that you will use in
-   regridding. Ideally this would be in a shared location where all of
-   the GCPy users at your institution coud access it.
-
-   Navigate to this directory.
-
-   .. code-block:: console
-
-      $ mkdir /path/to/RegridInfo
-
-#. Within this top level directory, create two directories that will
-   store grid information and regridding weights.  Navigate to the
-   grid information folder.
-
-   .. code-block:: console
-
-      $ mkdir Grids
-      $ mkdir Weights
-      $ cd Grids
-
-#. Create tilefiles (if cubed-sphere) and grid spec file for each
-   input and output grid resolution (see also gridspec README):
-
-   For uniform cubed-sphere global grid, specify face side length.
-
-   #. For simplicity, keep all cubed-sphere data in subdirectories
-      of the Grids folder.
-
-      .. code-block:: console
-
-         $ mkdir c24
-         $ gridspec-create gcs 24
-         $ mv c24*.nc c24
-
-         $ mkdir c48
-         $ gridspec-create gcs 48
-         $ mv c48*.nc c48
-
-          ... etc for other grids ...
-
-   #. For cubed-sphere stretched grid, specify face side length,
-      stretch factor, and target latitude and longitude:
-
-      .. code-block:: console
-
-         $ mkdir sc24
-         $ gridspec-create sgcs 24 -s 2 -t 40 -100
-         $ mv *c24*.nc sc24
-
-   #. For uniform global lat-lon grid, specify the number of latitude and
-      longitude grid boxes. For a list of optional settings, run the
-      command :command:`gridspec-create latlon --help`.
-
-      Create a subdirectory named latlon and move all of your latlon grid
-      specification files there.
-
-      .. code-block:: console
-
-         $ gridspec-create latlon 90 180                # Generic 1 x 1 grid
-         $ gridspec-create latlon 46 72 -dc -pc -hp     # GEOS-Chem Classic 4 x 5
-         $ gridspec-create latlon 91 144 -dc -pc -hp    # GEOS-Chem Classic 2 x 2.5
-         $ gridspec-create latlon 361 576 -dc -pc -hp   # MERRA-2 0.5 x 0.625
-         $ gridspec-create latlon 721 1172 -dc -pc -hp  # GEOS-FP 0.25 x  0.3125
-
-         $ mkdir latlon
-         $ mv regular_lat_lon*.nc latlon
-
-#. (Optional) View contents of grid spec file:
-
-   .. code-block:: console
-
-      $ gridspec-dump c24/c24_gridspec.nc
-
-      ... etc. for other grids ...
-
-#. Initialize your GCPy conda environmnt (which includes ESMF as a
-   dependency):
-
-   .. code-block:: console
-
-      $ conda activate gcpy_env
-
-#. Navigate to the directory that will store the regridding
-   weights. (Recall that we created this in created this in step #2.
-
-   .. code-block:: console
-
-      $ cd /path/to/RegridInfo/Weights
-
-#. Generate regridding weights (see also sparselt sample data files
-   README), specifying the following:
-
-   - Path to input file horizontal resolution grid spec netcdf file
-   - Path to output file horizontal resolution grid spec netcdf file
-   - Regridding type, e.g. conserve for conservative (string)
-   - Name of output regridding weights file (include input and output
-     resolutions)
-   - Name of directory containing grid spec tilefiles
-
-   .. code-block:: console
-
-      (gcpy_env) $ /ESMF_RegridWeightGen                                  \
-                   -s /path/to/RegridInfo/Grids/c48/c48_gridspec.nc       \
-                   -d /path/to/RegridInfo/Grids/regular_lat_lon_90x180.nc \
-                   -m conserve                                            \
-                   -w ./regrid_weights_c48_to_latlon90x180.nc             \
-                   --tilefile_path /path/to/RegridInfo/Grids/c48
-
-      ... etc. for other grid combinations ...
-
-#. (Optional) Consider using a bash script such as the one shown below
-   if you need to create regridding weights to/from several grids.
-
-   .. code-block:: bash
-
-      #!/bin/bash
-
-      # Generates regridding weights with ESMF_RegridWeightGen
-
-      # The top-level directory containing Grids and Weights subdirectories
-      # (EDIT AS NEEDED)
-      main_dir="/path/to/RegridInfo"
-
-      # Subdirectories for grid specifications and regridding weights
-      grids_dir="${main_dir}/Grids"
-      weights_dir="${main_dir}/Weights"
-
-      # GCHP cubed-sphere grids (EDIT AS NEEDED)
-      cs_list=(c24 c48 c90 c180 c360)
-
-      # GCClassic lat-lon grids (EDIT AS NEEDED)
-      ll_list=(46x72 91x144 361x576 721x1172)
-
-      # Loop over cubed-sphere grids
-      for cs in ${cs_list[@]}; do
-
-          # Cubed-sphere gridspec file
-          cs_grid_info="${grids_dir}/${cs}/${cs}_gridspec.nc"
-          if [[ ! -f ${cs_grid_info} ]]; then
-              echo "Could not find ${cs_grid_info}!"
-              exit 1
-          fi
-
-          # Loop over latlon grids
-          for ll in ${ll_list[@]}; do
-
-              # Latlon gridspec file
-              ll_grid_info="${grids_dir}/latlon/regular_lat_lon_${ll}.nc"
-              if [[ ! -f ${ll_grid_info} ]]; then
-                  echo "Could not find ${ll_grid_info}!"
-                  exit 1
-              fi
-
-              # Cubed-sphere -> latlon regridding
-              echo "----"
-              echo "Regridding from ${cs} to ${ll}"
-              weightfile="${weights_dir}/regrid_weights_${cs}_to_latlon${ll}.nc"
-              ESMF_RegridWeightGen                  \
-                  -s ${cs_grid_info}                \
-                  -d ${ll_grid_info}                \
-                  -m conserve                       \
-                  -w ${weightfile}                  \
-                  --tilefile_path ${grids_dir}/${cs}
-              unset weightfile
-
-              # Latlon -> cubed-sphere regridding
-              echo "----"
-              echo "Regridding from ${ll} to ${cs}"
-              weightfile="${weights_dir}/regrid_weights_latlon${ll}_to_${cs}.nc"
-              ESMF_RegridWeightGen                  \
-                  -s ${ll_grid_info}                \
-                  -d ${cs_grid_info}                \
-                  -m conserve                       \
-                  -w ${weightfile}                  \
-                  --tilefile_path ${grids_dir}/${cs}
-              unset weightfile
-
-          done
-      done
-
-.. _regrid-sparselt-regrid:
-
-Sample regridding script
-------------------------
-
-Once you have created the tilefiles and regridding weights, you can
-use them to regrid data files.  Shown below is a sample Python script
-that you can modify.
-
-.. code-block:: python
-
-   #!/usr/bin/env python
-
-   # Imports
-   import xarray as xr
-   import sparselt.esmf
-   import sparselt.xr
-
-   # Create a linear transform object from the regridding weights file
-   # for the combination of source and target horizontal resolutions.
-   transform = sparselt.esmf.load_weights(
-       'path/to/RegridInfo/Weights/regrid_weights_c48_to_latlon90x180.nc',
-        input_dims=[('nf', 'Ydim', 'Xdim'), (6, 48, 48)]
-        output_dims=[('lat', 'lon'), (90, 180)],
-   )
-
-   # Open file to regrid as xarray DataSet.
-   ds = xr.open_dataset('my_data_c48.nc')
-
-   # Regrid the DataSet using the transform object.
-   ds = sparselt.xr.apply(transform, ds)
-
-   # Write xarray DataSet contents to netcdf file.
-   ds.to_netcdf("my_data_latlon90x180.nc")
+   from C48 to C120, with a stretch factor of 4.0 over 32.0N, -64.0E, that you
+   can rename and use as you please.
 
 .. _regrid-plot:
 
@@ -591,4 +382,3 @@ what comparison resolution it should use:
 For differing vertical grids, the smaller vertical grid is currently
 used for comparisons.
 
-

From ab971652ff50084b1a2e511e29d72b40425ff458 Mon Sep 17 00:00:00 2001
From: Killian Murphy <killian.murphy@york.ac.uk>
Date: Mon, 27 Feb 2023 15:59:59 +0000
Subject: [PATCH 44/54] Update overview page with short details of regridding
 changes

---
 docs/source/Guide-to-Useful-Capabilities.rst | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/docs/source/Guide-to-Useful-Capabilities.rst b/docs/source/Guide-to-Useful-Capabilities.rst
index fa652ac8..09b095c7 100644
--- a/docs/source/Guide-to-Useful-Capabilities.rst
+++ b/docs/source/Guide-to-Useful-Capabilities.rst
@@ -230,9 +230,13 @@ several horizontal regridding functions built off of xESMF. GCPy
 automatically handles most regridding needs when plotting GEOS-Chem
 data.
 
-:file:`gcpy.file_regrid` allows you to regrid NetCDF files between
-different grid types / resolutions and can be called from the
-command line or as a function.
+:file:`gcpy.file_regrid` allows you to regrid GEOS-Chem Classic files between
+different grid resolutions and can be called from the command line or as a
+function.
+
+:file:`gcpy.regrid_restart_file` allows you to regrid GCHP files between
+between different grid resolutions and grid types (standard and stretched 
+cubed-sphere grids), and can be called from the command line.
 
 The 72-level and 47-level vertical grids are pre-defined in
 GCPy. Other vertical grids can also be defined if you provide `the A

From 8cf72194829881ee7cc67782058f086b42be8642 Mon Sep 17 00:00:00 2001
From: Lizzie Lundgren <elundgren@seas.harvard.edu>
Date: Wed, 1 Mar 2023 16:47:41 -0500
Subject: [PATCH 45/54] Minor updates to GCPy ReadTheDocs regridding section

Signed-off-by: Lizzie Lundgren <elundgren@seas.harvard.edu>
---
 docs/source/Regridding.rst | 44 ++++++++++++++++++++++----------------
 1 file changed, 26 insertions(+), 18 deletions(-)

diff --git a/docs/source/Regridding.rst b/docs/source/Regridding.rst
index ea9da190..6a1f82eb 100644
--- a/docs/source/Regridding.rst
+++ b/docs/source/Regridding.rst
@@ -213,7 +213,7 @@ We will use the example of regridding the out-of-the-box
 :literal:`GEOSChem.Restart.20190701_0000z.c48.nc4` restart file from C48 to
 C60 to demonstrate the standard cubed-sphere regridding process:
 
-#. Create a source grid specification using :code:`gridspec-create`:
+#. Create a source grid specification using :code:`gridspec-create`.
 
    .. code-block:: console
 
@@ -222,7 +222,7 @@ C60 to demonstrate the standard cubed-sphere regridding process:
    This will produce 7 files - :literal:`c48_gridspec.nc` and
    :literal:`c48.tile[1-6].nc`
 
-#. Create a target grid specification using :code:`gridspec-create`:
+#. Create a target grid specification using :code:`gridspec-create`.
 
    .. code-block:: console
 
@@ -232,7 +232,7 @@ C60 to demonstrate the standard cubed-sphere regridding process:
    :literal:`c60.tile[1-6].nc`
 
 #. Create the regridding weights for the regridding transformation using
-   :code:`ESMF_RegridWeightGen`:
+   :code:`ESMF_RegridWeightGen`.
 
    .. code-block:: console
 
@@ -245,14 +245,14 @@ C60 to demonstrate the standard cubed-sphere regridding process:
    This will produce a log file, :literal:`PET0.RegridWeightGen.Log`, and our
    regridding weights, :literal:`c48_to_c60_weights.nc`
 
-#. Finally, use the grid weights produced in step 3 to complete the regridding:
+#. Finally, use the grid weights produced in step 3 to complete the regridding. You will need to activate your GCPy python environment for this step.
 
    .. code-block:: console
 
-      $ python -m gcpy.regrid_restart_file    \
-          GEOSChem.Restart.20190701_0000z.c48 \
-          c48_to_c60_weights.nc               \
-          GEOSChem.Restart.20190701_0000z.c48
+      $ python -m gcpy.regrid_restart_file        \
+          GEOSChem.Restart.20190701_0000z.c48.nc4 \
+          c48_to_c60_weights.nc                   \
+          GEOSChem.Restart.20190701_0000z.c48.nc4
 
    This will produce a single file, :literal:`new_restart_file.nc`, regridded 
    from C48 to C60, that you can rename and use as you please.
@@ -265,7 +265,7 @@ We will use the example of regridding the out-of-the-box
 a C120 base resolution stretched grid with a stretch factor of 4.0 over Bermuda
 to demonstrate the stretched cubed-sphere regridding process:
 
-#. Create a source grid specification using :code:`gridspec-create`:
+#. Create a source grid specification using :code:`gridspec-create`.
 
    .. code-block:: console
 
@@ -274,7 +274,7 @@ to demonstrate the stretched cubed-sphere regridding process:
    This will produce 7 files - :literal:`c48_gridspec.nc` and
    :literal:`c48.tile[1-6].nc`
 
-#. Create a target grid specification using :code:`gridspec-create`:
+#. Create a target grid specification using :code:`gridspec-create`.
 
    .. code-block:: console
 
@@ -289,7 +289,8 @@ to demonstrate the stretched cubed-sphere regridding process:
    generated characters.
 
 #. Create the regridding weights for the regridding transformation using
-   :code:`ESMF_RegridWeightGen`:
+   :code:`ESMF_RegridWeightGen`, replacing :literal:`c120_..._gridspec.nc` 
+   with the actual name of the file created in the previous step.
 
    .. code-block:: console
 
@@ -302,18 +303,25 @@ to demonstrate the stretched cubed-sphere regridding process:
    This will produce a log file, :literal:`PET0.RegridWeightGen.Log`, and our
    regridding weights, :literal:`c48_to_c120_stretched_weights.nc`
 
-#. Finally, use the grid weights produced in step 3 to complete the regridding:
+#. Finally, use the grid weights produced in step 3 to complete the regridding. 
+   You will need to switch to your GCPy python environment for this step.
 
    .. code-block:: console
 
-      $ python -m gcpy.regrid_restart_file    \
-          GEOSChem.Restart.20190701_0000z.c48 \
-          c48_to_c120_stretched_weights.nc    \
-          GEOSChem.Restart.20190701_0000z.c48
+      $ python -m gcpy.regrid_restart_file        \
+          GEOSChem.Restart.20190701_0000z.c48.nc4 \
+          c48_to_c120_stretched_weights.nc        \
+          GEOSChem.Restart.20190701_0000z.c48.nc4
 
    This will produce a single file, :literal:`new_restart_file.nc`, regridded 
-   from C48 to C120, with a stretch factor of 4.0 over 32.0N, -64.0E, that you
-   can rename and use as you please.
+   from C48 to C120, with a stretch factor of 4.0 over 32.0N, -64.0E, that you 
+   can rename and use as you please. It is generally a good idea to rename the 
+   file to include the grid resolution, stretch factor, and target lat/lon for 
+   easy reference.
+
+   .. code-block:: console
+
+      $ mv new_restart_file.nc GEOSChem.Restart.20190701_0000z.c120.s4_32N_64E.nc
 
 .. _regrid-plot:
 

From 8381bd0aad1f920ad1fff7995e871b1ac0a0ae97 Mon Sep 17 00:00:00 2001
From: Killian Murphy <killian.murphy@york.ac.uk>
Date: Thu, 2 Mar 2023 09:25:15 +0000
Subject: [PATCH 46/54] Add stretching params to example regridding

---
 docs/source/Regridding.rst | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/source/Regridding.rst b/docs/source/Regridding.rst
index 6a1f82eb..5e2f0e50 100644
--- a/docs/source/Regridding.rst
+++ b/docs/source/Regridding.rst
@@ -309,6 +309,10 @@ to demonstrate the stretched cubed-sphere regridding process:
    .. code-block:: console
 
       $ python -m gcpy.regrid_restart_file        \
+          --stretched-grid                        \
+          --stretch-factor 4.0                    \
+          --target-latitude 32.0                  \
+          --target-longitude -64.0                \
           GEOSChem.Restart.20190701_0000z.c48.nc4 \
           c48_to_c120_stretched_weights.nc        \
           GEOSChem.Restart.20190701_0000z.c48.nc4

From 7426d2aa4c035aec43ac5e5a6a4d69e46dab3896 Mon Sep 17 00:00:00 2001
From: Killian Murphy <killian.murphy@york.ac.uk>
Date: Thu, 2 Mar 2023 17:22:06 +0000
Subject: [PATCH 47/54] Add helpful error message to bad file path in
 regrid_restart_file

---
 gcpy/regrid_restart_file.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/gcpy/regrid_restart_file.py b/gcpy/regrid_restart_file.py
index 0cd167cf..2683ef08 100644
--- a/gcpy/regrid_restart_file.py
+++ b/gcpy/regrid_restart_file.py
@@ -53,7 +53,11 @@ def file_path(path):
 
     """
     if not os.path.isfile(path):
-        raise argparse.ArgumentTypeError
+        error_message = (
+            f"File {path} does not exist! Please double-check the path"
+            " and make sure you have used the correct file extension"
+        )
+        raise argparse.ArgumentTypeError(error_message)
     return path
 
 
From 7072bf925f78801f6a8c1152b378b7c4406bde5e Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Wed, 8 Mar 2023 15:30:42 -0500
Subject: [PATCH 48/54] Add ipython and jupyter to environment.yml

docs/environment_files/environment.yml
- Change name at top-of-file to gcpy_env
- Add ipython==8.11.0 to pip section (install via PyPi)
- Add jupyter==1.0.0 to pip section (install via PyPi)

This environment was successfully built from scratch by Bob Y.

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 docs/environment_files/environment.yml | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/docs/environment_files/environment.yml b/docs/environment_files/environment.yml
index 84b03572..a9497bf1 100644
--- a/docs/environment_files/environment.yml
+++ b/docs/environment_files/environment.yml
@@ -1,4 +1,4 @@
-name: gcpy
+name: gcpy_env
 channels:
     - conda-forge
     - defaults
@@ -20,14 +20,16 @@ dependencies:
         - h5netcdf==0.11.0            # Python interface to netCDF4/HDF5
         - h5py==3.3.0                 # Python interface to HDF5
         - matplotlib==3.4.2           # Creates plots and visualizations
+        - ipython==8.11.0             # Interactive Python (used by Jupyter)
         - jinja2==3.0.3               # Dependency for Sphinx
         - joblib==1.0.1               # Parallelize python code
+        - jupyter==1.0.0              # Jupyter Notebook
         - numpy==1.21.1               # Optimized mathematical functions
         - pandas==1.3.1               # Tables/timeseries manipulation
         - pycodestyle==2.9.1          # Python style checker (formerly PEP8)
         - pylint==2.15.3              # Python linter
         - pypdf2==1.26.0              # PDF utilities (bookmarks, etc.)
-        - recommonmark==0.7.1         # Dependency for Sphinx   
+        - recommonmark==0.7.1         # Dependency for Sphinx
         - requests==2.26.0            # HTTP library
         - scipy==1.7.0                # Scientific python package
         - sphinx==3.5.4               # Generate ReadTheDocs output

From e9fa7282d0bf3f774ba983df16c8d8c03ad8afc9 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Wed, 8 Mar 2023 15:33:30 -0500
Subject: [PATCH 49/54] Change GCPy version number to 1.3.3

We are now going to release this version of GCPy as a patch version
rather than as a release version.  Change the version number in
docs/source/conf.py from 1.4.0 to 1.3.3.

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 docs/source/conf.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/source/conf.py b/docs/source/conf.py
index 616aa758..7552a838 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -18,11 +18,11 @@
 # -- Project information -----------------------------------------------------
 
 project = 'GCPy'
-copyright = '2022, GEOS-Chem Support Team'
+copyright = '2023, GEOS-Chem Support Team'
 author = 'GEOS-Chem Support Team'
 
 # The full version, including alpha/beta/rc tags
-release = '1.4.0'
+release = '1.3.3'
 
 # -- General configuration ---------------------------------------------------
 

From 58ba3e0420939bab25969183e9de6f81fec423b9 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Wed, 8 Mar 2023 15:49:54 -0500
Subject: [PATCH 50/54] Add gchp_regridding environment file to
 docs/environment_files

docs/environment_files/gchp_regridding.yml
- Environment file from the "Regridding GCHP/First Time Setup" from
  gcpy.readthedocs.io

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 docs/environment_files/gchp_regridding.yml | 12 ++++++++++++
 1 file changed, 12 insertions(+)
 create mode 100644 docs/environment_files/gchp_regridding.yml

diff --git a/docs/environment_files/gchp_regridding.yml b/docs/environment_files/gchp_regridding.yml
new file mode 100644
index 00000000..ee163daa
--- /dev/null
+++ b/docs/environment_files/gchp_regridding.yml
@@ -0,0 +1,12 @@
+name: gchp_regridding
+channels:
+  - conda-forge
+dependencies:
+  - python=3.9
+  - esmf
+  - gridspec
+  - numpy
+  - requests
+  - sparselt
+  - xarray
+  - xesmf

From f4b375348201a443e45857fe3e38476cea3464b3 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Thu, 9 Mar 2023 11:05:55 -0500
Subject: [PATCH 51/54] Update documentation for installing Miniconda and GCPy

docs/source/Getting-Started-with-GCPy.rst
- Add detailed instructions for installing Conda 4.12.0 with Miniconda,
  as later Conda versions will try to install incompatible packages.

docs/source/Regridding.rst
- Mention that we have placed the environment script listed under
  GCHP regridding in docs/environment_files/gchp_regridding.yml.


Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 docs/source/Getting-Started-with-GCPy.rst | 326 +++++++++++++++++++---
 docs/source/Regridding.rst                |  45 +--
 2 files changed, 305 insertions(+), 66 deletions(-)

diff --git a/docs/source/Getting-Started-with-GCPy.rst b/docs/source/Getting-Started-with-GCPy.rst
index 06a8a9a2..d25c483f 100644
--- a/docs/source/Getting-Started-with-GCPy.rst
+++ b/docs/source/Getting-Started-with-GCPy.rst
@@ -1,3 +1,7 @@
+.. |br| raw:: html
+
+   <br/>
+
 .. _install:
 
 ###############
@@ -10,47 +14,270 @@ Installing GCPy
 Requirements
 ============
 
-GCPy is currently supported for Linux and MacOS operating systems. Due
-to a reliance on several packages without Windows support, **GCPy is
-not currently supported for Windows**. You will receive an error
-message if you attempt to use GCPy on Windows.
+:program:`GCPy` is currently supported for Linux and MacOS operating
+systems. Due to a reliance on several packages without Windows
+support, **GCPy is not currently supported for Windows**. You will
+receive an error message if you attempt to use GCPy on Windows.
+
+.. tip::
+
+   Windows 11 (and some later builds of Windows 10) support the
+   `Windows Subsystem for Linux (WSL)
+   <https://learn.microsoft.com/en-us/windows/wsl/install>`_. If your
+   Windows version is WSL-compatible, you can install GCPy into a
+   Linux instance (such as Ubuntu 22.04) running under Windows.  At
+   present, this is the only way to use GCPy locally on a Windows
+   computer.
 
-The only essential software you need before installing GCPy is a
-distribution of the Conda package manager. This is used to create a
-python environment for GCPy containing all of its software dependences,
-including what version of python you use. We recommend using GCPy with
-python version 3.9.
+The only esential software you need before installing GCPy is a
+distribution of the :program:`Conda` package manager. This is used to
+create a Python environment for GCPy containing all of its software
+dependences, including what version of Python you use. We recommend
+using GCPy with Python version 3.9.
 
-You can check if you already have conda installed by running the
+You can check if you already have Conda installed by running the
 following command:
 
 .. code-block:: console
 
    $ conda --version
 
-If conda is not already installed then we recommend using Miniconda to
-install it. Miniconda is a minimal installer for conda that generally
-includes many fewer packages in the base environment than are available
-for download. This provides a lightweight conda install from which you
-can create custom python environments with whatever python packages you
-wish to use, including an environment with GCPy dependencies. To install
-Miniconda follow instructions in the  `Miniconda docs <https://docs.conda.io/en/latest/miniconda.html>`__. We recommend using Python 3.9.
+.. attention::
+
+   We recommend that you use Conda 4.12.0 or earlier to install GCPy
+   and its dependencies.  Newer versions of Conda than this will
+   install Python package versions that are incompatible with GCPy.
+   See :ref:`Installing Conda 4.12.0 with Miniconda <conda412_install>` below.
+
+   In the future we hope to be able to resolve this installation issue
+   so that you can use the latest Conda version.
+
+If Conda is not already installed, we recommend using
+:program:`Miniconda` to install Conda 4.12.0.  Miniconda is a minimal
+installer for Conda that generally includes many fewer packages in the
+base environment than are available for download. This provides a
+lightweight Conda installation from which you can create custom Python
+environments with whatever Python packages you wish to use, including
+an environment with GCPy dependencies.
+
+.. _conda412_install:
+
+============================================
+Steps to install Conda 4.12.0 with Miniconda
+============================================
+
+If you already have a Conda version prior to 4.12.0 installed on your
+system, you may skip this step and proceed to the section entitled
+:ref:`gcpy_install`.
+
+If you need to install Conda 4.12.0, follow these steps:
+
+#. Download the Miniconda installer script for your operating system
+   as shown below. The script will install Conda version 4.12.0 using
+   Python 3.9.
+
+   **Linux (x86_64 CPUs)**
+
+   .. code-block:: console
+
+      $ wget https://repo.anaconda.com/miniconda/Miniconda3-py39_4.12.0-Linux-x86_64.sh
+
+   **MacOS (M1 CPUs)**
+
+   .. code-block:: console
+
+      $ wget https://repo.anaconda.com/miniconda/Miniconda3-py39_4.12.0-MacOSX-arm64.sh
+
+   **MacOS (x86_64 CPUs)**
+
+   .. code-block:: console
+
+      $ wget https://repo.anaconda.com/miniconda/Miniconda3-py39_4.12.0-MacOSX-x86_64.sh
+
+   .. tip::
+
+      If you do not have :program:`wget` installed on MacOS, you can
+      download it with the :program:`Homebrew` package manager:
+
+      .. code-block::
+
+	 $ brew install wget
+
+   In the steps that follow, we will walk thorugh installation using
+   the Linux installer script.  The steps are the same for MacOS; just
+   substitute the appropriate MacOS script name for the Linux script
+   name. |br|
+   |br|
+
+
+#. Change the permission of the Miniconda installer script so that it
+   is executable:
+
+   .. code-block:: console
+
+      $ chmod 755 Miniconda3-py39_4.12.0-Linux-x86_64.sh
+
+   |br|
+
+#. Run the Miniconda installer script.
+
+   .. code-block:: console
+
+      $ ./Miniconda3-py39_4.12.0-Linux-x86_64.sh
+
+   |br|
+
+#. Accept the license agreement.
+
+   When the installer script starts, you will be prompted to accept
+   the Miniconda license agreement:
+
+   .. code-block:: console
+
+     Welcome to Miniconda3 py39_4.12.0
+
+     In order to continue the installation process, please review the license
+     agreement.
+     Please, press ENTER to continue
+     >>>
+
+   When you press :literal:`ENTER`, you will see the license agreement
+   in all of its gory legalese detail.  Press the space bar repeatedly
+   to scroll down ot the end. You will then see this prompt:
+
+   .. code-block:: console
+
+      Do you accept the license terms? [yes|no]
+      [no] >>>
+
+   Type :literal:`yes` and hit :literal:`ENTER` to accept.
+
+   You will be asked to review the license agreement.  Scroll down to
+   the end and type :file:`yes` and hit :literal:`ENTER` to
+   accept. |br|
+   |br|
+
+
+#. Specify the installation path.
+
+   You will then be prompted to provide a directory path for the
+   installation:
+
+   .. code-block:: console
+
+      Miniconda3 will now be installed into this location:
+      /home/bob/miniconda3
+
+      - Press ENTER to confirm the location
+      - Press CTRL-C to abort the installation
+      - Or specify a different location below
+
+      [/home/bob/miniconda3] >>>
+
+   Press :literal:`ENTER` to continue, or specify a new path and then
+   press :literal:`ENTER`.
+
+   .. tip::
+
+      If a previous Conda installation is already installed to the
+      default path, you may choose to delete the previous installation
+      folder, or install Conda 4.12.0 to a different path.
+
+   The script will then start installing the Conda 4.12.0 package
+   manager. |br|
+   |br|
+
+
+#. Specify post-installation options.
+
+   If installation is successful you will see this at the bottom of
+   the screen printout:
+
+   .. code-block:: console
+
+      Preparing transaction: done
+      Executing transaction: done
+      installation finished.
+      Do you wish the installer to initialize Miniconda3
+      by running conda init? [yes|no]
+      [no] >>>
+
+   Type :literal:`yes` and press :literal:`ENTER`.  You will see
+   output similar to this:
+
+   .. code-block:: console
+
+      no change     /home/bob/miniconda3/condabin/conda
+      no change     /home/bob/miniconda3/bin/conda
+      no change     /home/bob/miniconda3/bin/conda-env
+      no change     /home/bob/miniconda3/bin/activate
+      no change     /home/bob/miniconda3/bin/deactivate
+      no change     /home/bob/miniconda3/etc/profile.d/conda.sh
+      no change     /home/bob/miniconda3/etc/fish/conf.d/conda.fish
+      no change     /home/bob/miniconda3/shell/condabin/Conda.psm1
+      no change     /home/bob/miniconda3/shell/condabin/conda-hook.ps1
+      no change     /home/bob/miniconda3/lib/python3.9/site-packages/xontrib/conda.xsh
+      no change     /home/bob/miniconda3/etc/profile.d/conda.csh
+      no change     /home/bob/.bashrc
+      No action taken.
+      If you'd prefer that conda's base environment not be activated on startup,
+         set the auto_activate_base parameter to false:
+
+      conda config --set auto_activate_base false
+
+      Thank you for installing Miniconda3!
+
+   |br|
+
+#. Disable the base Conda environment from being activated at startup
+
+   Close the terminal window that you used to install Conda 4.12.0 and
+   open a new terminal window.  You will this prompt
+
+   .. code-block:: console
+
+      (base) $
+
+   By default, Conda will open the :literal:`base` environment each
+   time that you open a new terminal window.  to disable this
+   behavior, type:
+
+   .. code-block:: console
+
+      (base) $ conda config --set auto_activate_base false
+
+   The next time you open a terminal window, you will just see the
+   regular prompt, such as;
+
+   .. code-block:: console
+
+      $
+
+   (or whatever you have defined your prompt to be in your startup scripts).
+
+Now that you have installed Conda 4.12.0, you may proceed to creating
+a new Conda environment for GCPy, as shown below.
+
+.. _gcpy_install:
 
 ==========================================
 Steps to install GCPy and its dependencies
 ==========================================
 
-#. Install conda if not already installed.
+#. Install Conda if it is not already installed.
 
-   See the :ref:`Requirements section <requirements>` above.
+   If Conda 4.12.0 or prior is already installed on your system, you
+   may skip this step.  Otherwise, please follow the instructions
+   listed in :ref:`conda412_install`. |br|
+   |br|
 
-#. Download GCPy
+#. Download the GCPy source code.
 
    Create and go to the directory in which you would like to store GCPy. In
-   this example we will store GCPy in a python/packages subdirectory in the
-   home directory, but you can store it wherever you wish. You can also name
-   the GCPy download whatever you want. In this example the GCPy directory
-   is called GCPy.
+   this example we will store GCPy in a :file:`python/packages`
+   subdirectory in your home directory, but you can store it wherever
+   you wish. You can also name the GCPy download whatever you want. In
+   this example the GCPy directory is called :file:`GCPy`.
 
    .. code-block:: console
 
@@ -58,24 +285,27 @@ Steps to install GCPy and its dependencies
       $ git clone https://github.com/geoschem/gcpy.git GCPy
       $ cd GCPy
 
-#. Create new python virtual environment for GCPy
+   |br|
+
+#. Create a new Python virtual environment for GCPy.
 
-   A python virtual environment is a named set of python installs,
-   e.g. packages, that are independent of other virtual environments.
-   Using an environment dedicated to GCPy is useful to maintain a set
-   of package dependencies compatible with GCPy without interfering with
-   python packages you use for other work. You can create a python virtual
-   environment from anywhere on your system. It will be stored in your
-   conda install rather than the directory from which you create it.
+   A Python virtual environment is a named set of Python installs,
+   e.g. packages, that are independent of other virtual
+   environments. Using an environment dedicated to GCPy is useful to
+   maintain a set of package dependencies compatible with GCPy without
+   interfering with Python packages you use for other work. You can
+   create a Python virtual environment from anywhere on your
+   system. It will be stored in your Conda installation rather than
+   the directory from which you create it.
 
-   You can create a python virtual environment using a file that lists
+   You can create a Python virtual environment using a file that lists
    all packages and their versions to be included in the environment.
-   GCPy includes such as file, environment.yml, located in the top-level
-   directory of the package.
+   GCPy includes such as file, environment.yml, located in the
+   top-level directory of the package.
 
    Run the following command at the command prompt to create a virtual
    environment for use with GCPy. You can name environment whatever you
-   wish. This example names it gcpy_env.
+   wish. This example names it :file:`gcpy_env`.
 
    .. code-block:: console
 
@@ -94,36 +324,40 @@ Steps to install GCPy and its dependencies
 
       $ conda deactivate
 
-#. Add GCPy to python path
+   |br|
+
+#. Add GCPy to Python path.
 
    The environment variable :envvar:`PYTHONPATH` specifies the
-   locations of python libraries on your system that are not included
+   locations of Python libraries on your system that are not included
    in your conda environment. If GCPy is included in
-   :envvar:`PYTHONPATH` then python will recognize its existence
+   :envvar:`PYTHONPATH` then Python will recognize its existence
    when you try to use. Add the following line to your startup script,
    e.g. :file:`.bashrc`, and edit the path to where you are storing
-   GCPy.  
+   GCPy.
 
    .. code-block:: bash
 
       PYTHONPATH=$PYTHONPATH:$HOME/python/packages/GCPy
 
-#. Perform a simple test
+   |br|
+
+#. Perform a simple test.
 
-   Run the following commands in your terminal to check if the 
+   Run the following commands in your terminal to check if the
    installation was succcesful.
 
    .. code-block:: console
 
       $ source $HOME/.bashrc     # Alternatively close and reopen your terminal
       $ echo $PYTHONPATH         # Check it contains path to your GCPy clone
-      $ conda activate gcpy_env    
+      $ conda activate gcpy_env
       $ conda list               # Check it contains contents of gcpy env file
       $ python
-      \>>> import gcpy
+      >>> import gcpy
 
-If no errors were encountered then you successfully installed GCPy and
-its dependencies.
+If no error messages are displayed, you have successfully installed
+GCPy and its dependencies.
 
 =======================
 Upgrading GCPy versions
diff --git a/docs/source/Regridding.rst b/docs/source/Regridding.rst
index 5e2f0e50..1e82238d 100644
--- a/docs/source/Regridding.rst
+++ b/docs/source/Regridding.rst
@@ -17,7 +17,7 @@ stretched-grid. GCPy also supports arbitrary vertical regridding
 across different vertical resolutions.
 
 Regridding with GCPy is currently undergoing an overhaul. As of the current
-release, regridding is split into two different categories - regridding 
+release, regridding is split into two different categories - regridding
 GEOS-Chem Classic format files (lat/lon), and regridding GCHP format files
 (standard cubed-sphere, stretched cubed-sphere).
 
@@ -75,7 +75,7 @@ Optional arguments:
 There is now only one grid format supported for regridding files using the
 :code:`gcpy.file_regrid` method: :literal:`classic`. You must specify
 :literal:`classic` as the value of both :code:`dim_format_in` and
-:code:`dim_format_out`, as well as specifying a resolution as the value of 
+:code:`dim_format_out`, as well as specifying a resolution as the value of
 :code:`ll_res_out`.
 
 As stated previously, you can either call
@@ -117,7 +117,7 @@ Required Arguments:
       The GCHP restart file to be regridded
 
 .. option:: regridding_weights_file : str
-      
+
       Regridding weights to be used in the regridding transformation, generated
       by :literal:`ESMF_RegridWeightGen`
 
@@ -131,8 +131,8 @@ Required Arguments:
 Optional arguments:
 -------------------
 
-.. option:: --stretched-grid : switch 
-      
+.. option:: --stretched-grid : switch
+
       A switch to indicate that the target grid is a stretched cubed-sphere grid
 
 .. option:: --stretch-factor : float
@@ -161,13 +161,13 @@ Optional arguments:
 First Time Setup
 -----------------
 
-Until GCPy contains a complete regridding implementation that works for all 
+Until GCPy contains a complete regridding implementation that works for all
 GEOS-Chem grid formats, we recommend that you create a small
 `conda <https://docs.conda.io/en/latest/>`_ environment in which to carry out
 your GCHP regridding.
 
 The following conda `environment file <https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#creating-an-environment-from-an-environment-yml-file>`_
-will get you set up with an environment for regridding with 
+will get you set up with an environment for regridding with
 :literal:`gridspec` and :literal:`sparselt`:
 
 .. code-block:: yaml
@@ -176,7 +176,7 @@ will get you set up with an environment for regridding with
    channels:
      - conda-forge
    dependencies:
-     - python=3.10
+     - python=3.9
      - esmf
      - gridspec
      - numpy
@@ -185,6 +185,12 @@ will get you set up with an environment for regridding with
      - xarray
      - xesmf
 
+.. tip::
+
+   For your convenience, we have placed a copy of the above
+   environment file at the path
+   :file:`docs/environment/gchp_regridding.yml`.
+
 After installing and switching to this new conda environment, you should have
 the :literal:`gridspec` commands available to you at the command line.
 
@@ -240,7 +246,7 @@ C60 to demonstrate the standard cubed-sphere regridding process:
           --source c48_gridspec.nc      \
           --destination c60_gridspec.nc \
           --method conserve             \
-          --weight c48_to_c60_weights.nc 
+          --weight c48_to_c60_weights.nc
 
    This will produce a log file, :literal:`PET0.RegridWeightGen.Log`, and our
    regridding weights, :literal:`c48_to_c60_weights.nc`
@@ -254,7 +260,7 @@ C60 to demonstrate the standard cubed-sphere regridding process:
           c48_to_c60_weights.nc                   \
           GEOSChem.Restart.20190701_0000z.c48.nc4
 
-   This will produce a single file, :literal:`new_restart_file.nc`, regridded 
+   This will produce a single file, :literal:`new_restart_file.nc`, regridded
    from C48 to C60, that you can rename and use as you please.
 
 Stretched Cubed-Sphere Regridding
@@ -282,14 +288,14 @@ to demonstrate the stretched cubed-sphere regridding process:
 
    Here, the :code:`-s` option denotes the stretch factor and the :code:`-t`
    option denotes the latitude / longitude of the centre point of the grid
-   stretch. 
-   
+   stretch.
+
    Again, this will produce 7 files - :literal:`c120_..._gridspec.nc` and
    :literal:`c120_..._tile[1-6].nc`, where :literal:`...` denotes randomly
    generated characters.
 
 #. Create the regridding weights for the regridding transformation using
-   :code:`ESMF_RegridWeightGen`, replacing :literal:`c120_..._gridspec.nc` 
+   :code:`ESMF_RegridWeightGen`, replacing :literal:`c120_..._gridspec.nc`
    with the actual name of the file created in the previous step.
 
    .. code-block:: console
@@ -298,12 +304,12 @@ to demonstrate the stretched cubed-sphere regridding process:
           --source c48_gridspec.nc           \
           --destination c120_..._gridspec.nc \
           --method conserve                  \
-          --weight c48_to_c120_stretched_weights.nc 
+          --weight c48_to_c120_stretched_weights.nc
 
    This will produce a log file, :literal:`PET0.RegridWeightGen.Log`, and our
    regridding weights, :literal:`c48_to_c120_stretched_weights.nc`
 
-#. Finally, use the grid weights produced in step 3 to complete the regridding. 
+#. Finally, use the grid weights produced in step 3 to complete the regridding.
    You will need to switch to your GCPy python environment for this step.
 
    .. code-block:: console
@@ -317,10 +323,10 @@ to demonstrate the stretched cubed-sphere regridding process:
           c48_to_c120_stretched_weights.nc        \
           GEOSChem.Restart.20190701_0000z.c48.nc4
 
-   This will produce a single file, :literal:`new_restart_file.nc`, regridded 
-   from C48 to C120, with a stretch factor of 4.0 over 32.0N, -64.0E, that you 
-   can rename and use as you please. It is generally a good idea to rename the 
-   file to include the grid resolution, stretch factor, and target lat/lon for 
+   This will produce a single file, :literal:`new_restart_file.nc`, regridded
+   from C48 to C120, with a stretch factor of 4.0 over 32.0N, -64.0E, that you
+   can rename and use as you please. It is generally a good idea to rename the
+   file to include the grid resolution, stretch factor, and target lat/lon for
    easy reference.
 
    .. code-block:: console
@@ -393,4 +399,3 @@ what comparison resolution it should use:
 
 For differing vertical grids, the smaller vertical grid is currently
 used for comparisons.
-

From f15561b8fe1d4995fb1cd9d3c559dc81b4dda765 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Thu, 9 Mar 2023 11:15:40 -0500
Subject: [PATCH 52/54] Remove duplicate text in Miniconda installation
 instructions

docs/source/Getting-Started-with-GCPy.rst
- Removed a duplicate paragraph in the "Accept the license agreement"
  section.

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 docs/source/Getting-Started-with-GCPy.rst | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/docs/source/Getting-Started-with-GCPy.rst b/docs/source/Getting-Started-with-GCPy.rst
index d25c483f..0e437d09 100644
--- a/docs/source/Getting-Started-with-GCPy.rst
+++ b/docs/source/Getting-Started-with-GCPy.rst
@@ -150,11 +150,7 @@ If you need to install Conda 4.12.0, follow these steps:
       Do you accept the license terms? [yes|no]
       [no] >>>
 
-   Type :literal:`yes` and hit :literal:`ENTER` to accept.
-
-   You will be asked to review the license agreement.  Scroll down to
-   the end and type :file:`yes` and hit :literal:`ENTER` to
-   accept. |br|
+   Type :literal:`yes` and hit :literal:`ENTER` to accept. |br|
    |br|
 
 
From f63602038bb042256adcb555b2d7c84e371f7c8e Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Thu, 9 Mar 2023 11:40:28 -0500
Subject: [PATCH 53/54] Minor edits for RTD file Getting-Started-with-GCPy.rst

docs/source/Getting-Started-with-GCPy.rst
- Fixed typos
- Changed "we recommend" to "you must", since GCPy will not install
  with conda versions higher than 4.12.0
- Trimmmed whitespace
- Added clarifying text and reworded a couple sentences for clarity
- Replaced "bob" with "YOUR-USERNAME" in the prompt for the
  default Miniconda path

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 docs/source/Getting-Started-with-GCPy.rst | 41 ++++++++++++-----------
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/docs/source/Getting-Started-with-GCPy.rst b/docs/source/Getting-Started-with-GCPy.rst
index 0e437d09..a21040cc 100644
--- a/docs/source/Getting-Started-with-GCPy.rst
+++ b/docs/source/Getting-Started-with-GCPy.rst
@@ -29,10 +29,10 @@ receive an error message if you attempt to use GCPy on Windows.
    present, this is the only way to use GCPy locally on a Windows
    computer.
 
-The only esential software you need before installing GCPy is a
+The only essential software you need before installing GCPy is a
 distribution of the :program:`Conda` package manager. This is used to
 create a Python environment for GCPy containing all of its software
-dependences, including what version of Python you use. We recommend
+dependences, including what version of Python you use. You must
 using GCPy with Python version 3.9.
 
 You can check if you already have Conda installed by running the
@@ -44,21 +44,22 @@ following command:
 
 .. attention::
 
-   We recommend that you use Conda 4.12.0 or earlier to install GCPy
-   and its dependencies.  Newer versions of Conda than this will
-   install Python package versions that are incompatible with GCPy.
-   See :ref:`Installing Conda 4.12.0 with Miniconda <conda412_install>` below.
+   You must use Conda 4.12.0 or earlier to install GCPy and its
+   dependencies.  Newer versions of Conda than this will install
+   Python package versions that are incompatible with GCPy. See
+   :ref:`Installing Conda 4.12.0 with Miniconda <conda412_install>`
+   below.
 
    In the future we hope to be able to resolve this installation issue
    so that you can use the latest Conda version.
 
-If Conda is not already installed, we recommend using
-:program:`Miniconda` to install Conda 4.12.0.  Miniconda is a minimal
-installer for Conda that generally includes many fewer packages in the
-base environment than are available for download. This provides a
-lightweight Conda installation from which you can create custom Python
-environments with whatever Python packages you wish to use, including
-an environment with GCPy dependencies.
+If Conda is not already installed, you must use :program:`Miniconda`
+to install Conda 4.12.0.  Miniconda is a minimal installer for Conda
+that generally includes many fewer packages in the base environment
+than are available for download. This provides a lightweight Conda
+installation from which you can create custom Python environments with
+whatever Python packages you wish to use, including an environment
+with GCPy dependencies.
 
 .. _conda412_install:
 
@@ -103,10 +104,10 @@ If you need to install Conda 4.12.0, follow these steps:
 
 	 $ brew install wget
 
-   In the steps that follow, we will walk thorugh installation using
+   In the steps that follow, we will walk through installation using
    the Linux installer script.  The steps are the same for MacOS; just
    substitute the appropriate MacOS script name for the Linux script
-   name. |br|
+   name in steps 2 and 3 below. |br|
    |br|
 
 
@@ -162,13 +163,13 @@ If you need to install Conda 4.12.0, follow these steps:
    .. code-block:: console
 
       Miniconda3 will now be installed into this location:
-      /home/bob/miniconda3
+      /home/YOUR-USERNAME/miniconda3
 
       - Press ENTER to confirm the location
       - Press CTRL-C to abort the installation
       - Or specify a different location below
 
-      [/home/bob/miniconda3] >>>
+      [/home/YOUR-USERNAME/miniconda3] >>>
 
    Press :literal:`ENTER` to continue, or specify a new path and then
    press :literal:`ENTER`.
@@ -186,8 +187,8 @@ If you need to install Conda 4.12.0, follow these steps:
 
 #. Specify post-installation options.
 
-   If installation is successful you will see this at the bottom of
-   the screen printout:
+   You will see this text at the bottom of the screen printout upon
+   successful installation:
 
    .. code-block:: console
 
@@ -228,7 +229,7 @@ If you need to install Conda 4.12.0, follow these steps:
 #. Disable the base Conda environment from being activated at startup
 
    Close the terminal window that you used to install Conda 4.12.0 and
-   open a new terminal window.  You will this prompt
+   open a new terminal window.  You will see this prompt:
 
    .. code-block:: console
 

From f6208015e084bb0b3fc0c99208dfb817284d7c52 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Thu, 9 Mar 2023 11:52:44 -0500
Subject: [PATCH 54/54] Update CHANGELOG for 1.3.3 release

CHANGELOG.md
- Updated with information about features & fixes for GCPy 1.3.3
- Added version number and release date

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 CHANGELOG.md | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8f896ff8..e8737750 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,21 +4,25 @@ All notable changes to GCPy will be documented in this file.
 
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## Unreleased
+## [1.3.3] -- 2023-03-09
 ### Added
 - Updated installation documentation, we now recommend users to create
   a conda environment using the `environment.yml` file
 - Benchmark summary table output (intended for 1hr & 1mo benchmarks)
 - Species/emissions/inventories that differ between Dev & Ref versions are now printed at the top of the benchmark emissions, inventory, and global mass tables.  if there are too many species with diffs, an alternate message is printed.
 - New functions in `benchmark.py` and `util.py` to facilitate printing of the species/emissions/inventories that differ between Dev & Ref versions.
+- Added new RTD documentation for installing Conda 4.12.0 with Miniconda
+- Added GCHP regridding environnment file `docs/environment_files/gchp_regridding.yml`
 
 ### Changed
 - Applied cleanup susggestions from pylint to `benchmark.py`, `util.py`, `plot.py`, `oh_metrics.py`, `ste_flux.py`
 - Replaced format with f-strings in `benchmark.py`, `util.py`, `plot.py`, `oh_metrics.py`, `ste_flux.py`
 - Abstract some common in `benchmark.py` into functions
-- Replaced direct calls to `yaml.load` with `util.read_config.file` (mostly using `quiet=True`)
-- Restore tag information to refstr and devstr 
-- Add a newline to diff-of-diffs refstr and devstr if the string is too long.  This prevents plot titles in the top 2 rows of a six-panel plot from running together.
+- Replaced direct calls to `yaml.load` with `util.read_config.file`
+- Restore tag information to benchmark `refstr` and `devstr` labels
+- Add a newline to diff-of-diffs refstr and devstr if the string is too long.
+- Updated GCHP regridding documentation
+- Restored `ipython` and `jupyter ` to environment file `environment.yml`
 
 ## [1.3.2] -- 2022-10-25