Loading src/cso/cso_dataspace.py +10 −3 Original line number Diff line number Diff line Loading @@ -54,6 +54,7 @@ # 2026-04, Arjo Segers # Fixed definition of bounding box for global selection. # Added increasing delays to avoid rate limit errors from STAC catalogue inquiry. # Trap files that could not be downloaded. # Loading Loading @@ -754,7 +755,7 @@ class CSO_DataSpace_Downloader(object): # check ... if npfile != 1: print(f"ERROR - found {npfile} files in S3 bucket for product: {product}") raise Exception raise Exception(f"file not found in S3 bucket") #endif # loop over (single) files: Loading Loading @@ -864,8 +865,14 @@ class CSO_DataSpace_Downloader(object): # unknown ... except Exception as err: # check on known errors .. if "file not found in S3 bucket" in str(err): logging.warning(f"{indent} WARNING - could not download: {href}") break else: logging.error(f"{indent}unknown error:") logging.error(f"{indent} {str(err)}") #endif # endtry Loading src/cso/cso_s5p.py +31 −13 Original line number Diff line number Diff line Loading @@ -81,6 +81,9 @@ # 2026-03, Arjo Segers # Updated selection of download source for Copernicus Dataspace. # # 2026-04, Arjo Segers # Select latest input file with latest processor version if multiple are selected. # ######################################################################## ### Loading Loading @@ -3326,7 +3329,7 @@ class CSO_S5p_Download(utopya.UtopyaRc): df.sort_values("filename", inplace=True) # info ... logging.info(f"{indent}number of files : %i" % len(df)) logging.info(f"{indent}number of files : {len(df)}") # list of ';' seperated selection expression: # (%{processor_version} == '020400') & (%{processing} == 'RPRO') ; ... Loading @@ -3334,14 +3337,14 @@ class CSO_S5p_Download(utopya.UtopyaRc): # replace templates: # (xrec['processor_version'] == '020400') & (xrec['processing'] == 'RPRO') ; ... for key in df.keys(): line = line.replace("%{" + key + "}", "xrec['" + key + "']") line = line.replace(f"%{{{key}}}", f"xrec['{key}']") # endfor # split: selections = line.split(";") # info .. logging.info("selection criteria (first with matching orbit is used):") for selection in selections: logging.info(" %s" % selection.strip()) logging.info(" {selection.strip()}") # endif # skip some? Loading @@ -3356,7 +3359,9 @@ class CSO_S5p_Download(utopya.UtopyaRc): logging.info(f"{indent}found %i orbits with overlap of time range .." % len(xdf)) # orbit labels: orbits = xdf["orbit"].unique() orbits = list( xdf["orbit"].unique() ) # sorted version: orbits.sort() # no download initialized yet: downloader = None Loading @@ -3366,11 +3371,14 @@ class CSO_S5p_Download(utopya.UtopyaRc): # loop over orbits: for orbit in orbits: # info ... logging.info(indent + ' orbit "%s" ...' % orbit) logging.info(f"{indent} orbit '{orbit}'") # search for other records for same orbit: odf = xdf[xdf["orbit"] == orbit] # sort on processor version, newest first: odf.sort_values(by="processor_version",ascending=False) # storage for status label: "selected", "blacklisted", ... filestatus = {} # no match yet .. Loading @@ -3380,6 +3388,8 @@ class CSO_S5p_Download(utopya.UtopyaRc): for selection in selections: # make empty again: selected = [] # latest processor version: pversion = None # loop over records: for indx, xrec in odf.iterrows(): # skip? Loading @@ -3389,20 +3399,27 @@ class CSO_S5p_Download(utopya.UtopyaRc): # endif # evaluate expression including 'xrec[key]' values: if eval(selection): # already selected a record? then check on processor version: if len(selected) > 0: if xrec["processor_version"] < pversion: filestatus[xrec["filename"]] = "older processor version" continue #endif #endif # store: selected.append(xrec["filename"]) filestatus[xrec["filename"]] = "selected" rec = xrec pversion = xrec["processor_version"] # endif # endfor # records # exactly one? then leave: if len(selected) == 1: break elif len(selected) > 1: logging.error( "found more than one orbit file matching selection: %s" % selection ) logging.warning(f"{indent} found more than one orbit file matching selection: {selection}") for fname in selected: logging.error(" %s" % fname) logging.warning(f"{indent} {fname}") # endfor raise Exception # endif # number found Loading Loading @@ -3460,7 +3477,8 @@ class CSO_S5p_Download(utopya.UtopyaRc): # initialize download? if downloader is None: # init downloader based on url: if "dataspace.copernicus.eu" in href: # init downloader based on url: if href.startswith("s3://eodata/"): # download from Copernicus DataSpace: downloader = cso_dataspace.CSO_DataSpace_Downloader() # Loading @@ -3469,7 +3487,7 @@ class CSO_S5p_Download(utopya.UtopyaRc): downloader = cso_pal.CSO_PAL_Downloader() # else: logging.error("no downloader class defined for url: {href}") logging.error(f"no downloader class defined for url: {href}") raise Exception # endif # endif Loading @@ -3481,9 +3499,9 @@ class CSO_S5p_Download(utopya.UtopyaRc): # download might have failed .. if not os.path.isfile(input_file): logging.error(f"missing input file") logging.error(f" {input_file}") logging.error(f"missing input file: {input_file}") raise Exception #logging.warning(f"missing input file: {input_file}") # endif # endfor # input files Loading Loading
src/cso/cso_dataspace.py +10 −3 Original line number Diff line number Diff line Loading @@ -54,6 +54,7 @@ # 2026-04, Arjo Segers # Fixed definition of bounding box for global selection. # Added increasing delays to avoid rate limit errors from STAC catalogue inquiry. # Trap files that could not be downloaded. # Loading Loading @@ -754,7 +755,7 @@ class CSO_DataSpace_Downloader(object): # check ... if npfile != 1: print(f"ERROR - found {npfile} files in S3 bucket for product: {product}") raise Exception raise Exception(f"file not found in S3 bucket") #endif # loop over (single) files: Loading Loading @@ -864,8 +865,14 @@ class CSO_DataSpace_Downloader(object): # unknown ... except Exception as err: # check on known errors .. if "file not found in S3 bucket" in str(err): logging.warning(f"{indent} WARNING - could not download: {href}") break else: logging.error(f"{indent}unknown error:") logging.error(f"{indent} {str(err)}") #endif # endtry Loading
src/cso/cso_s5p.py +31 −13 Original line number Diff line number Diff line Loading @@ -81,6 +81,9 @@ # 2026-03, Arjo Segers # Updated selection of download source for Copernicus Dataspace. # # 2026-04, Arjo Segers # Select latest input file with latest processor version if multiple are selected. # ######################################################################## ### Loading Loading @@ -3326,7 +3329,7 @@ class CSO_S5p_Download(utopya.UtopyaRc): df.sort_values("filename", inplace=True) # info ... logging.info(f"{indent}number of files : %i" % len(df)) logging.info(f"{indent}number of files : {len(df)}") # list of ';' seperated selection expression: # (%{processor_version} == '020400') & (%{processing} == 'RPRO') ; ... Loading @@ -3334,14 +3337,14 @@ class CSO_S5p_Download(utopya.UtopyaRc): # replace templates: # (xrec['processor_version'] == '020400') & (xrec['processing'] == 'RPRO') ; ... for key in df.keys(): line = line.replace("%{" + key + "}", "xrec['" + key + "']") line = line.replace(f"%{{{key}}}", f"xrec['{key}']") # endfor # split: selections = line.split(";") # info .. logging.info("selection criteria (first with matching orbit is used):") for selection in selections: logging.info(" %s" % selection.strip()) logging.info(" {selection.strip()}") # endif # skip some? Loading @@ -3356,7 +3359,9 @@ class CSO_S5p_Download(utopya.UtopyaRc): logging.info(f"{indent}found %i orbits with overlap of time range .." % len(xdf)) # orbit labels: orbits = xdf["orbit"].unique() orbits = list( xdf["orbit"].unique() ) # sorted version: orbits.sort() # no download initialized yet: downloader = None Loading @@ -3366,11 +3371,14 @@ class CSO_S5p_Download(utopya.UtopyaRc): # loop over orbits: for orbit in orbits: # info ... logging.info(indent + ' orbit "%s" ...' % orbit) logging.info(f"{indent} orbit '{orbit}'") # search for other records for same orbit: odf = xdf[xdf["orbit"] == orbit] # sort on processor version, newest first: odf.sort_values(by="processor_version",ascending=False) # storage for status label: "selected", "blacklisted", ... filestatus = {} # no match yet .. Loading @@ -3380,6 +3388,8 @@ class CSO_S5p_Download(utopya.UtopyaRc): for selection in selections: # make empty again: selected = [] # latest processor version: pversion = None # loop over records: for indx, xrec in odf.iterrows(): # skip? Loading @@ -3389,20 +3399,27 @@ class CSO_S5p_Download(utopya.UtopyaRc): # endif # evaluate expression including 'xrec[key]' values: if eval(selection): # already selected a record? then check on processor version: if len(selected) > 0: if xrec["processor_version"] < pversion: filestatus[xrec["filename"]] = "older processor version" continue #endif #endif # store: selected.append(xrec["filename"]) filestatus[xrec["filename"]] = "selected" rec = xrec pversion = xrec["processor_version"] # endif # endfor # records # exactly one? then leave: if len(selected) == 1: break elif len(selected) > 1: logging.error( "found more than one orbit file matching selection: %s" % selection ) logging.warning(f"{indent} found more than one orbit file matching selection: {selection}") for fname in selected: logging.error(" %s" % fname) logging.warning(f"{indent} {fname}") # endfor raise Exception # endif # number found Loading Loading @@ -3460,7 +3477,8 @@ class CSO_S5p_Download(utopya.UtopyaRc): # initialize download? if downloader is None: # init downloader based on url: if "dataspace.copernicus.eu" in href: # init downloader based on url: if href.startswith("s3://eodata/"): # download from Copernicus DataSpace: downloader = cso_dataspace.CSO_DataSpace_Downloader() # Loading @@ -3469,7 +3487,7 @@ class CSO_S5p_Download(utopya.UtopyaRc): downloader = cso_pal.CSO_PAL_Downloader() # else: logging.error("no downloader class defined for url: {href}") logging.error(f"no downloader class defined for url: {href}") raise Exception # endif # endif Loading @@ -3481,9 +3499,9 @@ class CSO_S5p_Download(utopya.UtopyaRc): # download might have failed .. if not os.path.isfile(input_file): logging.error(f"missing input file") logging.error(f" {input_file}") logging.error(f"missing input file: {input_file}") raise Exception #logging.warning(f"missing input file: {input_file}") # endif # endfor # input files Loading