Loading src/cso/cso_earthaccess.py +34 −7 Original line number Diff line number Diff line Loading @@ -13,6 +13,9 @@ # 2025-04, Arjo Segers # Changed imports for python packaging. # # 2025-09, Arjo Segers # Added 'blacklist' for problematic URL's. # ######################################################################## ### Loading Loading @@ -383,6 +386,15 @@ class CSO_EarthAccess_Inquire(utopya.UtopyaRc): # update record: listing.UpdateRecord(filename, data, indent=f"{indent} ") ## ADHOC check on double records .. #dlst = listing.Select( product=data["product"], start_time=data["start_time"], end_time=data["end_time"], processor_version=data["processor_version"] ) #if len(dlst) > 1 : # logging.error( f"record alredy exist?" ) # logging.error( dlst.df ) # logging.error( data ) # raise Exception ##endif ## testing ... # if len(listing) >= 100 : # logging.warning( f"BREAK after {len(listing)} files ..." ) Loading Loading @@ -462,6 +474,13 @@ class CSO_EarthAccess_Download(utopya.UtopyaRc): ! processor version "v2.0.0" <rcbase>.processor_version : 020000 Some url's seem not to exist anymore, or actually, these are double available from 2 different url's of which 1 does not work anymore. As temporary solution these could be blacklisted:: ! skip some problematic url's: <rcbase>.blacklist : https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/5200/AERDB_L2_VIIRS_SNPP/2024/103/AERDB_L2_VIIRS_SNPP.A2024103.0836.002.2024106154554.nc \ https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/5200/AERDB_L2_VIIRS_SNPP/2024/103/AERDB_L2_VIIRS_SNPP.A2024103.1348.002.2024106155539.nc Specify the directory where the input files are to be searched, or where to download them to if not present yet:: Loading Loading @@ -566,8 +585,8 @@ class CSO_EarthAccess_Download(utopya.UtopyaRc): logging.info(f"{indent}selection:") logging.info(f"{indent} processor version: {processor_version}") ## skip some? # blacklist = self.GetSetting("blacklist", default="").split() # skip some? blacklist = self.GetSetting("blacklist", default="").split() # target directory, including time templates: arch_dir__template = self.GetSetting("dir") Loading @@ -594,6 +613,14 @@ class CSO_EarthAccess_Download(utopya.UtopyaRc): # info ... logging.info(f"{indent}{basename} ...") # check .. if rec["href"] in blacklist: # info ... logging.info(f"{indent} download url is blacklisted, skip ...") # next record: continue #endif # expand time templates arch_dir = rec["start_time"].strftime(arch_dir__template) # full path: Loading Loading @@ -700,7 +727,7 @@ class CSO_EarthAccess_Download_Listing(utopya.UtopyaRc): """ Create *listing* file for files downloaded from VIIRS data portals. A *listing* file contains the names of the converted orbit files, A *listing* file contains the names of orbit files, the time range of pixels in the file, and other information extracted from the filenames or file attributes:: filename ;start_time ;end_time ;orbit Loading Loading
src/cso/cso_earthaccess.py +34 −7 Original line number Diff line number Diff line Loading @@ -13,6 +13,9 @@ # 2025-04, Arjo Segers # Changed imports for python packaging. # # 2025-09, Arjo Segers # Added 'blacklist' for problematic URL's. # ######################################################################## ### Loading Loading @@ -383,6 +386,15 @@ class CSO_EarthAccess_Inquire(utopya.UtopyaRc): # update record: listing.UpdateRecord(filename, data, indent=f"{indent} ") ## ADHOC check on double records .. #dlst = listing.Select( product=data["product"], start_time=data["start_time"], end_time=data["end_time"], processor_version=data["processor_version"] ) #if len(dlst) > 1 : # logging.error( f"record alredy exist?" ) # logging.error( dlst.df ) # logging.error( data ) # raise Exception ##endif ## testing ... # if len(listing) >= 100 : # logging.warning( f"BREAK after {len(listing)} files ..." ) Loading Loading @@ -462,6 +474,13 @@ class CSO_EarthAccess_Download(utopya.UtopyaRc): ! processor version "v2.0.0" <rcbase>.processor_version : 020000 Some url's seem not to exist anymore, or actually, these are double available from 2 different url's of which 1 does not work anymore. As temporary solution these could be blacklisted:: ! skip some problematic url's: <rcbase>.blacklist : https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/5200/AERDB_L2_VIIRS_SNPP/2024/103/AERDB_L2_VIIRS_SNPP.A2024103.0836.002.2024106154554.nc \ https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/5200/AERDB_L2_VIIRS_SNPP/2024/103/AERDB_L2_VIIRS_SNPP.A2024103.1348.002.2024106155539.nc Specify the directory where the input files are to be searched, or where to download them to if not present yet:: Loading Loading @@ -566,8 +585,8 @@ class CSO_EarthAccess_Download(utopya.UtopyaRc): logging.info(f"{indent}selection:") logging.info(f"{indent} processor version: {processor_version}") ## skip some? # blacklist = self.GetSetting("blacklist", default="").split() # skip some? blacklist = self.GetSetting("blacklist", default="").split() # target directory, including time templates: arch_dir__template = self.GetSetting("dir") Loading @@ -594,6 +613,14 @@ class CSO_EarthAccess_Download(utopya.UtopyaRc): # info ... logging.info(f"{indent}{basename} ...") # check .. if rec["href"] in blacklist: # info ... logging.info(f"{indent} download url is blacklisted, skip ...") # next record: continue #endif # expand time templates arch_dir = rec["start_time"].strftime(arch_dir__template) # full path: Loading Loading @@ -700,7 +727,7 @@ class CSO_EarthAccess_Download_Listing(utopya.UtopyaRc): """ Create *listing* file for files downloaded from VIIRS data portals. A *listing* file contains the names of the converted orbit files, A *listing* file contains the names of orbit files, the time range of pixels in the file, and other information extracted from the filenames or file attributes:: filename ;start_time ;end_time ;orbit Loading