From 242a63604870a483adbe9b7a2646deb2461c90c1 Mon Sep 17 00:00:00 2001 From: Arjo Segers Date: Tue, 16 Sep 2025 11:05:30 +0200 Subject: [PATCH 1/7] Fixed reading settings for map styles. --- src/cso/cso_catalogue.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/cso/cso_catalogue.py b/src/cso/cso_catalogue.py index 439538e..9ab0cbf 100644 --- a/src/cso/cso_catalogue.py +++ b/src/cso/cso_catalogue.py @@ -33,6 +33,7 @@ # 2025-09, Arjo Segers # Updated plotting of gridded fields. # Extended unit conversions. +# Fixed reading of settings for map styles. # @@ -362,7 +363,7 @@ class CSO_Catalogue(CSO_CatalogueBase): figsize = eval(self.GetSetting("figsize")) # no-data color: - color_nan = self.GetSetting("color_nan", default="0.80") + color_nan = self.GetSetting("color_nan", default="0.90") # extra map properties: bmp_kwargs = self.GetSetting("map", totype="dict", default=dict()) @@ -714,7 +715,7 @@ class CSO_SimCatalogue(CSO_CatalogueBase): figsize = eval(self.GetSetting("figsize")) # no-data color: - color_nan = self.GetSetting("color_nan", default="0.80") + color_nan = self.GetSetting("color_nan", default="0.90") # extra map properties: bmp_kwargs = self.GetSetting("map", totype="dict", default=dict()) @@ -788,20 +789,20 @@ class CSO_SimCatalogue(CSO_CatalogueBase): # settings for this variable: vkey = "var.%s" % varname # originating variable: - vsource = self.GetSetting("var.{varname}.source", default="data:" + varname) + vsource = self.GetSetting(f"var.{varname}.source", default="data:" + varname) # target units: - vunits = self.GetSetting("var.{varname}.units", default="None") + vunits = self.GetSetting(f"var.{varname}.units", default="None") # plot type: - ptype = self.GetSetting("var.{varname}.type", default="map") + ptype = self.GetSetting(f"var.{varname}.type", default="map") # long name used in labels: - long_name = self.GetSetting("var.{varname}.long_name", default=varname) + long_name = self.GetSetting(f"var.{varname}.long_name", default=varname) # switch: if ptype == "map": # style: - vmin = eval(self.GetSetting("var.{varname}.vmin", default="None")) - vmax = eval(self.GetSetting("var.{varname}.vmax", default="None")) - colors = eval(self.GetSetting("var.{varname}.colors", default="None")) + vmin = eval(self.GetSetting(f"var.{varname}.vmin", default="None")) + vmax = eval(self.GetSetting(f"var.{varname}.vmax", default="None")) + colors = eval(self.GetSetting(f"var.{varname}.colors", default="None")) # variable source: # [data:]vname -- GitLab From c4bb6f50e7745c1c472640eee492fad6cc2440ae Mon Sep 17 00:00:00 2001 From: Arjo Segers Date: Tue, 16 Sep 2025 11:07:21 +0200 Subject: [PATCH 2/7] Added blacklist for problematic urls --- src/cso/cso_earthaccess.py | 41 +++++++++++++++++++++++++++++++------- 1 file changed, 34 insertions(+), 7 deletions(-) diff --git a/src/cso/cso_earthaccess.py b/src/cso/cso_earthaccess.py index 0e82269..21c8c67 100644 --- a/src/cso/cso_earthaccess.py +++ b/src/cso/cso_earthaccess.py @@ -13,6 +13,9 @@ # 2025-04, Arjo Segers # Changed imports for python packaging. # +# 2025-09, Arjo Segers +# Added 'blacklist' for problematic URL's. +# ######################################################################## ### @@ -383,6 +386,15 @@ class CSO_EarthAccess_Inquire(utopya.UtopyaRc): # update record: listing.UpdateRecord(filename, data, indent=f"{indent} ") + ## ADHOC check on double records .. + #dlst = listing.Select( product=data["product"], start_time=data["start_time"], end_time=data["end_time"], processor_version=data["processor_version"] ) + #if len(dlst) > 1 : + # logging.error( f"record alredy exist?" ) + # logging.error( dlst.df ) + # logging.error( data ) + # raise Exception + ##endif + ## testing ... # if len(listing) >= 100 : # logging.warning( f"BREAK after {len(listing)} files ..." ) @@ -460,23 +472,30 @@ class CSO_EarthAccess_Download(utopya.UtopyaRc): Define the processor version as a 6-digit number, as this is the format used in the inquiry table:: ! processor version "v2.0.0" - .processor_version : 020000 + .processor_version : 020000 + + Some url's seem not to exist anymore, or actually, these are double available from 2 different url's of which 1 does not work anymore. + As temporary solution these could be blacklisted:: + + ! skip some problematic url's: + .blacklist : https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/5200/AERDB_L2_VIIRS_SNPP/2024/103/AERDB_L2_VIIRS_SNPP.A2024103.0836.002.2024106154554.nc \ + https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/5200/AERDB_L2_VIIRS_SNPP/2024/103/AERDB_L2_VIIRS_SNPP.A2024103.1348.002.2024106155539.nc Specify the directory where the input files are to be searched, or where to download them to if not present yet:: ! target dir for downloads, including templates for year and julian day: - .dir : /data/EarthData/AERDB_L2_VIIRS_SNPP/%Y/%j + .dir : /data/EarthData/AERDB_L2_VIIRS_SNPP/%Y/%j Optionally define a creation mode for the (parent) directories:: ! directory creation mode: - .dmode : 0o775 + .dmode : 0o775 Define if existing files need to be replaced:: ! download existing files again? - .renew : False + .renew : False """ @@ -566,8 +585,8 @@ class CSO_EarthAccess_Download(utopya.UtopyaRc): logging.info(f"{indent}selection:") logging.info(f"{indent} processor version: {processor_version}") - ## skip some? - # blacklist = self.GetSetting("blacklist", default="").split() + # skip some? + blacklist = self.GetSetting("blacklist", default="").split() # target directory, including time templates: arch_dir__template = self.GetSetting("dir") @@ -594,6 +613,14 @@ class CSO_EarthAccess_Download(utopya.UtopyaRc): # info ... logging.info(f"{indent}{basename} ...") + # check .. + if rec["href"] in blacklist: + # info ... + logging.info(f"{indent} download url is blacklisted, skip ...") + # next record: + continue + #endif + # expand time templates arch_dir = rec["start_time"].strftime(arch_dir__template) # full path: @@ -700,7 +727,7 @@ class CSO_EarthAccess_Download_Listing(utopya.UtopyaRc): """ Create *listing* file for files downloaded from VIIRS data portals. - A *listing* file contains the names of the converted orbit files, + A *listing* file contains the names of orbit files, the time range of pixels in the file, and other information extracted from the filenames or file attributes:: filename ;start_time ;end_time ;orbit -- GitLab From 2df1685676790c18c324df64a234b2cbafb98e5e Mon Sep 17 00:00:00 2001 From: Arjo Segers Date: Tue, 16 Sep 2025 11:09:02 +0200 Subject: [PATCH 3/7] Support masked arrays for selecting no-data values. --- src/cso/cso_plot.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/cso/cso_plot.py b/src/cso/cso_plot.py index 7a86e55..7227b14 100644 --- a/src/cso/cso_plot.py +++ b/src/cso/cso_plot.py @@ -36,6 +36,7 @@ # # 2025-09, Arjo Segers # Improved supported for defined number of color entries. +# Support masked arrays for selecting no-data values. # # @@ -1066,7 +1067,11 @@ class ColorbarFigure(Figure): # adhoc: add layer with no-data colors if self.cmap__color_bad is not None: # any nan values? - jj, ii = numpy.where(numpy.isnan(cc)) + if hasattr(cc,"mask"): + jj, ii = numpy.where(cc.mask) + else: + jj, ii = numpy.where(numpy.isnan(cc)) + #endif if len(jj) > 0: # init as full nan field: cc2 = numpy.full(cc.shape, numpy.nan) -- GitLab From 957a65a005f7573658a5ca4bcc30c055522be731 Mon Sep 17 00:00:00 2001 From: Arjo Segers Date: Tue, 16 Sep 2025 11:09:52 +0200 Subject: [PATCH 4/7] Trap multiple selections from inquire table as problematic. --- src/cso/cso_s5p.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/cso/cso_s5p.py b/src/cso/cso_s5p.py index 01513f2..d9b4c04 100644 --- a/src/cso/cso_s5p.py +++ b/src/cso/cso_s5p.py @@ -70,6 +70,10 @@ # 2025-04, Arjo Segers # Changed imports for python packaging. # +# 2025-09, Arjo Segers +# Trap multiple selections from inquire table as this might indicate +# a problem in the table or the selection. +# ######################################################################## @@ -2863,6 +2867,11 @@ class CSO_S5p_Convert(utopya.UtopyaRc): # no orbit found? next: if len(odf) == 0: continue + elif len(odf) > 1: + logging.error(f"found {len(odf)} records matching selection;" + + " use finer selection, or something wrong in inquiry table?" ) + raise Exception + #endif # selected record: rec = odf.GetRecord(0) -- GitLab From 2b2605d038236678fdf2cfa73ed353a77bf0e1d2 Mon Sep 17 00:00:00 2001 From: Arjo Segers Date: Tue, 16 Sep 2025 11:11:54 +0200 Subject: [PATCH 5/7] Fix longitude/latitude arrays with no-data values, interpolate between surrounding locations. --- src/cso/cso_viirs.py | 36 +++++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/src/cso/cso_viirs.py b/src/cso/cso_viirs.py index 92b5714..07678e0 100644 --- a/src/cso/cso_viirs.py +++ b/src/cso/cso_viirs.py @@ -4,6 +4,9 @@ # 2025-02, Arjo Segers # Initial version based on "cso_viirs.py". # +# 2025-09, Arjo Segers +# Fix longitude/latitude arrays with no-data values, interpolate between surrounding locations. +# ######################################################################## @@ -166,12 +169,6 @@ class VIIRS_File(object): # store: self.filenames = filenames - ## check ... - # if not os.path.isfile(filename): - # logging.error("file not found : %s" % filename) - # raise Exception - ## endif - # open: with xarray.open_mfdataset( self.filenames, concat_dim="Idx_Atrack", combine="nested" @@ -545,7 +542,7 @@ class CSO_VIIRS_File(cso_file.CSO_File): * ``longitude``, ``latitude`` : pixel locations; the postions of the even secan lines are reset to have them in between the odd scan lines; - * ``longitude_bounds``, ``latitude_bounds`` : footprint bounds per pixel, guess by interpolation + * ``longitude_bounds``, ``latitude_bounds`` : footprint bounds per pixel, guess by interpolation and/or extrapolation of pixel centers; if pixel covers date line, corner values are reset to values outside [-180,+180] to ensure that the footprints remains convex with the center inside; @@ -1035,6 +1032,9 @@ class CSO_VIIRS_File(cso_file.CSO_File): Correct locations of the odd scan lines, which seem to become dis-located towards the edge of the swath. + Sometimes no-data values are found (-999), this is problematic to define the footprint bounds. + Therfore, these are filled by interpolation. + Arguments: * ``values`` : array of shape ``(ny,nx)`` with longitudes or latitudes @@ -1050,6 +1050,28 @@ class CSO_VIIRS_File(cso_file.CSO_File): # shape: ny, nx = values.shape + # check ... + if numpy.any( values < -180.0 ): + # info .. + logging.warning(f" replace no-data values in pixel locations ...") + # modules; + import scipy + # loop over pixels: + for i in range(nx): + # pixels without data: + jj, = numpy.where( values[:,i] < -180.0 ) + # any? + if len(jj) > 0: + # valid values: + jj1, = numpy.where( values[:,i] >= -180.0 ) + # interpolator, linear: + spl = scipy.interpolate.make_interp_spline( jj1, values[jj1,i], k=1 ) + # replace: + values[jj,i] = spl( jj ) + #endif + #endfor + #endif + # target arrays: xx = numpy.zeros((ny, nx)) -- GitLab From 5ea8c70b81a8419018e849810487af94d107622d Mon Sep 17 00:00:00 2001 From: Arjo Segers Date: Tue, 16 Sep 2025 11:12:10 +0200 Subject: [PATCH 6/7] Corrected messages. --- src/cso/cso_file.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cso/cso_file.py b/src/cso/cso_file.py index 786c125..1eb4a7f 100644 --- a/src/cso/cso_file.py +++ b/src/cso/cso_file.py @@ -1683,7 +1683,7 @@ class CSO_Listing(object): # show selection? if verbose: # info ... - logging.info(f"{indent}available records(s):") + logging.info(f"{indent}available record(s):") # loop: for fname, row in df.iterrows(): line = fname -- GitLab From 7afe471cfa9fb2c60a8e20f11dce0eb33bc0eeb2 Mon Sep 17 00:00:00 2001 From: Arjo Segers Date: Tue, 16 Sep 2025 11:30:54 +0200 Subject: [PATCH 7/7] Updated template settings for VIIRS. --- config/VIIRS/cso-user-settings.rc | 3 +- config/VIIRS/cso-viirs.rc | 62 +++++++++++++++++-------------- config/VIIRS/cso.rc | 27 ++++++++------ 3 files changed, 50 insertions(+), 42 deletions(-) diff --git a/config/VIIRS/cso-user-settings.rc b/config/VIIRS/cso-user-settings.rc index 9616ec9..4fa434e 100644 --- a/config/VIIRS/cso-user-settings.rc +++ b/config/VIIRS/cso-user-settings.rc @@ -75,8 +75,7 @@ my.attr.institution : CSO my.attr.email : Your.Name@cso.org ! base location for work directories: -!my.work : /work/${USER}/CSO-Tutorial -my.work : /Scratch/${USER}/CSO-VIIRS +my.work : /work/${USER}/CSO-Tutorial-VIIRS !---------------------------------------------------------- diff --git a/config/VIIRS/cso-viirs.rc b/config/VIIRS/cso-viirs.rc index 52a02fa..da8420f 100644 --- a/config/VIIRS/cso-viirs.rc +++ b/config/VIIRS/cso-viirs.rc @@ -140,6 +140,12 @@ cso.download.processor_version : 020000 ! target directory, includiong time values: cso.download.dir : ${my.arch.dir}/${my.version}/%Y/%j +! ADHOC: some files are double (from different url's); +! should be ckecked in inquire? +! for the moment keep blacklist: +cso.download.blacklist : https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/5200/AERDB_L2_VIIRS_SNPP/2024/103/AERDB_L2_VIIRS_SNPP.A2024103.0836.002.2024106154554.nc \ + https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/5200/AERDB_L2_VIIRS_SNPP/2024/103/AERDB_L2_VIIRS_SNPP.A2024103.1348.002.2024106155539.nc + !----------------------------------------------------------- @@ -378,34 +384,34 @@ cso.convert.output.var.aot_WVLnm.attrs : { 'valid_range' : Non -!!====================================================================== -!!=== -!!=== listing -!!=== -!!====================================================================== -! -!! csv file that will hold records per file with: -!! - timerange of pixels in file -!! - orbit number -!cso.listing.file : ${my.work}/_PRODUCT_/data/${my.region}/${my.selection}__listing.csv -! -!! renew table if file already exists? -!cso.listing.renew : True -! -!! time range: -!cso.listing.timerange.start : ${my.timerange.start} -!cso.listing.timerange.end : ${my.timerange.end} -! -!! filename filters relative to listing file that should be scanned for orbit files; -!! names could include time templates ; -!! if same orbit is found in multiple directories, the first found is used; -!! remove existing table for safety to ensure that this is done correctly ... -!cso.listing.patterns : ${my.selection}/%Y/%m/_PRODUCT__*.nc -! -!! extra columns to be added, read from global attributes: -!cso.listing.xcolumns : orbit -! -! +!====================================================================== +!=== +!=== listing +!=== +!====================================================================== + +! csv file that will hold records per file with: +! - timerange of pixels in file +! - orbit number +cso.listing.file : ${my.work}/${MY_PRODUCT}/data/${my.region}/listing.csv + +! renew table if file already exists? +cso.listing.renew : True + +! time range: +cso.listing.timerange.start : ${my.timerange.start} +cso.listing.timerange.end : ${my.timerange.end} + +! filename filters relative to listing file that should be scanned for orbit files; +! names could include time templates ; +! if same orbit is found in multiple directories, the first found is used; +! remove existing table for safety to ensure that this is done correctly ... +cso.listing.patterns : %Y/%m/${MY_PRODUCT}_*.nc + +! extra columns to be added, read from global attributes: +cso.listing.xcolumns : + + !====================================================================== diff --git a/config/VIIRS/cso.rc b/config/VIIRS/cso.rc index a633c0f..dd0edc6 100644 --- a/config/VIIRS/cso.rc +++ b/config/VIIRS/cso.rc @@ -107,26 +107,26 @@ cso.copy.rcwrite : ${my.work}/cso.rc #for _PRODUCT_ in viirs1-aod-db viirs2-aod-db viirs1-aod-dt viirs2-aod-dt - ! class to create a job tree: cso._PRODUCT_.class : utopya.UtopyaJobTree !~ sub steps: -!cso._PRODUCT_.elements : inquire download download-listing convert +!cso._PRODUCT_.elements : inquire download download-listing convert listing !~ one by one ... !cso._PRODUCT_.elements : inquire !cso._PRODUCT_.elements : download !cso._PRODUCT_.elements : inquire download -cso._PRODUCT_.elements : download-listing +!cso._PRODUCT_.elements : download-listing !cso._PRODUCT_.elements : convert +cso._PRODUCT_.elements : listing + +! * ! inquire tasks: !cso._PRODUCT_.inquire.tasks : table-earthaccess plot cso._PRODUCT_.inquire.tasks : table-earthaccess !cso._PRODUCT_.inquire.tasks : plot - - ! single step: cso._PRODUCT_.inquire.class : utopya.UtopyaJobStep !~ inquire files downloaded from EarthAccess: @@ -140,6 +140,8 @@ cso._PRODUCT_.inquire.plot.args : '${my.work}/rc/cso-viirs.rc' rcbase='cso.inquire-plot', \ env={ 'MY_PRODUCT' : '_PRODUCT_' } +! * + !~ download data: ! single step: cso._PRODUCT_.download.class : utopya.UtopyaJobStep @@ -165,13 +167,14 @@ cso._PRODUCT_.convert.task.class : cso.CSO_VIIRS_Convert cso._PRODUCT_.convert.task.args : '${my.work}/rc/cso-viirs.rc', \ rcbase='cso.convert', \ env={ 'MY_PRODUCT' : '_PRODUCT_' } -!! single step: -!cso._PRODUCT_.listing.class : utopya.UtopyaJobStep -!! listing task: -!cso._PRODUCT_.listing.task.class : cso.CSO_S5p_Listing -!cso._PRODUCT_.listing.task.args : '${my.work}/rc/cso-s5p-ALL.rc', \ -! rcbase='cso._PRODUCT_.listing' -! +! single step: +cso._PRODUCT_.listing.class : utopya.UtopyaJobStep +! listing task: +cso._PRODUCT_.listing.task.class : cso.CSO_S5p_Listing +cso._PRODUCT_.listing.task.args : '${my.work}/rc/cso-viirs.rc', \ + rcbase='cso.listing', \ + env={ 'MY_PRODUCT' : '_PRODUCT_' } + #endfor -- GitLab