Loading py/cso_pal.py +158 −1 Original line number Diff line number Diff line Loading @@ -4,6 +4,9 @@ # 2023-08, Arjo Segers # Reformatted using 'black'. # # 2024-01, Arjo Segers # Added 'CSO_PAL_Downloader' class. # ######################################################################## ### Loading Loading @@ -34,7 +37,7 @@ PAL API See the PAL `API info <https://data-portal.s5p-pal.com/cat-doc>`_ for latest info. S5P-PAL product files can be selected and downloaded using the *Spatio Temporal Asset Catalog* (STAC). The `PySTAC <https://pystac-client.readthedocs.io/en/latest/` Python interface is used for access. The `PySTAC <https://pystac-client.readthedocs.io/en/latest/>`_ Python interface is used for access. Class hierchy Loading @@ -46,6 +49,7 @@ The classes and are defined according to the following hierchy: * :py:class:`.UtopyaRc` * :py:class:`.CSO_PAL_Inquire` * :py:class:`.CSO_PAL_Downloader` Classes Loading Loading @@ -295,6 +299,159 @@ class CSO_PAL_Inquire(utopya.UtopyaRc): # endclass CSO_PAL_Inquire ######################################################################## ### ### PAL download ### ######################################################################## class CSO_PAL_Downloader(object): """ Class to download single file from the `Product Algorithm Laboratory <https://www.s5p-pal.com/>`_. The :py:class:`DownloadFile` method should be used to actually download a file. Usage:: # initialize downloader: downloader = CSO_PAL_Downloader() # download file, store in specified file: downloader.DownloadFile( "https://data-portal.s5p-pal.com/cat/sentinel-5p/download/88c15681-db43-4219-b391-c8567e39cccf", "orbit.nc" ) """ def __init__(self): """ Initialize downloader. """ # enddef __init__ # * def DownloadFile(self, href, output_file, maxtry=10, nsec_wait=60, indent=""): """ Download file from PAL. If a request fails it is tried again up to a maximum of ``maxtry`` times, with a delay of ``nsec_wait`` between requsts. Arguments: * ``href`` : download url, for example:: https://data-portal.s5p-pal.com/cat/sentinel-5p/download/88c15681-db43-4219-b391-c8567e39cccf * ``output_file`` : target file Optional arguments: * ``maxtry`` : number of times to try again if download fails * ``nsec_wait`` : delay in seconds between requests """ # modules: import sys import os import time import requests # tools: import cso_file # retry loop .. ntry = 1 while True: # try to download and save: try: # get data: r = requests.get(href) # check status, raise error if request failed: r.raise_for_status() # product is netcdf file, use base name of target file: product_file = os.path.basename(output_file) # info .. logging.info(f"{indent}write to {product_file} ...") # write to temporary target first .. tmpfile = product_file + ".tmp" # open destination file for binary write: with open(tmpfile, "wb") as fd: # prefered way to write content following: # https://docs.python-requests.org/en/master/user/quickstart/ for chunk in r.iter_content(chunk_size=128): fd.write(chunk) # endfor # endwith # rename: os.rename(tmpfile, product_file) # create target dir if necessary: cso_file.CheckDir(output_file) # move to destination: os.rename(product_file, output_file) # all ok, leave retry loop: break except requests.exceptions.HTTPError as err: # info .. msg = str(err) logging.warning(f"{indent}exception from download; message received:") logging.warning(f"{indent} %s" % msg) # catch known problem ... if msg.startswith("401 Client Error: Unauthorized for url:"): logging.warning(f"{indent}renew token ...") self.CreateToken(href, indent=indent) # endif except MemoryError as err: logging.error("memory error from download; increase resources?") # quit with error: raise except Exception as err: # info .. logging.error("from download; message received:") logging.error(" %s" % str(err)) # catch known problem ... if msg.startswith("File is not a zip file"): logging.warning(f"{indent}maybe download was interrupted, try again ...") else: # quit with error: raise # endif # endtry # increase counter: ntry += 1 # switch: if ntry == maxtry: logging.warning(f"{indent}tried {maxtry} times; exit ...") raise Exception else: logging.warning(f"{indent}wait {nsec_wait} seconds ...") time.sleep(nsec_wait) logging.warning(f"{indent}attempt {ntry} / {maxtry} ...") continue # while-loop # endif # endwhile # retry # enddef DownloadFile # endclass CSO_PAL_Downloader ######################################################################## ### ### end Loading py/cso_s5p.py +20 −3 Original line number Diff line number Diff line Loading @@ -29,6 +29,9 @@ # 2023-12, Arjo Segers # Fixed bug in orbit selection. # # 2024-01, Arjo Segers # Switch between DataSpace and PAL downloader based on download link. # ######################################################################## ### Loading Loading @@ -2184,6 +2187,7 @@ class CSO_S5p_Convert(utopya.UtopyaRc): # tools: import cso_file import cso_dataspace import cso_pal import utopya # info ... Loading Loading @@ -2523,13 +2527,26 @@ class CSO_S5p_Convert(utopya.UtopyaRc): if not os.path.isfile(input_file): # info .. logging.info(" not present yet, download ...") # download url: href = rec["href"] # initialize download? if downloader is None: # init download .. # init downloader based on url: if "dataspace.copernicus.eu" in href: # download from Copernicus DataSpace: downloader = cso_dataspace.CSO_DataSpace_Downloader() # elif "s5p-pal.com" in href: # download from PAL: downloader = cso_pal.CSO_PAL_Downloader() # else: logging.error("no downloader class defined for url: {href}") raise Exception # endif # endif # download ... downloader.DownloadFile(rec["href"], input_file, indent=" ") downloader.DownloadFile(href, input_file, indent=" ") # store name: downloads.append(input_file) # endif Loading Loading
py/cso_pal.py +158 −1 Original line number Diff line number Diff line Loading @@ -4,6 +4,9 @@ # 2023-08, Arjo Segers # Reformatted using 'black'. # # 2024-01, Arjo Segers # Added 'CSO_PAL_Downloader' class. # ######################################################################## ### Loading Loading @@ -34,7 +37,7 @@ PAL API See the PAL `API info <https://data-portal.s5p-pal.com/cat-doc>`_ for latest info. S5P-PAL product files can be selected and downloaded using the *Spatio Temporal Asset Catalog* (STAC). The `PySTAC <https://pystac-client.readthedocs.io/en/latest/` Python interface is used for access. The `PySTAC <https://pystac-client.readthedocs.io/en/latest/>`_ Python interface is used for access. Class hierchy Loading @@ -46,6 +49,7 @@ The classes and are defined according to the following hierchy: * :py:class:`.UtopyaRc` * :py:class:`.CSO_PAL_Inquire` * :py:class:`.CSO_PAL_Downloader` Classes Loading Loading @@ -295,6 +299,159 @@ class CSO_PAL_Inquire(utopya.UtopyaRc): # endclass CSO_PAL_Inquire ######################################################################## ### ### PAL download ### ######################################################################## class CSO_PAL_Downloader(object): """ Class to download single file from the `Product Algorithm Laboratory <https://www.s5p-pal.com/>`_. The :py:class:`DownloadFile` method should be used to actually download a file. Usage:: # initialize downloader: downloader = CSO_PAL_Downloader() # download file, store in specified file: downloader.DownloadFile( "https://data-portal.s5p-pal.com/cat/sentinel-5p/download/88c15681-db43-4219-b391-c8567e39cccf", "orbit.nc" ) """ def __init__(self): """ Initialize downloader. """ # enddef __init__ # * def DownloadFile(self, href, output_file, maxtry=10, nsec_wait=60, indent=""): """ Download file from PAL. If a request fails it is tried again up to a maximum of ``maxtry`` times, with a delay of ``nsec_wait`` between requsts. Arguments: * ``href`` : download url, for example:: https://data-portal.s5p-pal.com/cat/sentinel-5p/download/88c15681-db43-4219-b391-c8567e39cccf * ``output_file`` : target file Optional arguments: * ``maxtry`` : number of times to try again if download fails * ``nsec_wait`` : delay in seconds between requests """ # modules: import sys import os import time import requests # tools: import cso_file # retry loop .. ntry = 1 while True: # try to download and save: try: # get data: r = requests.get(href) # check status, raise error if request failed: r.raise_for_status() # product is netcdf file, use base name of target file: product_file = os.path.basename(output_file) # info .. logging.info(f"{indent}write to {product_file} ...") # write to temporary target first .. tmpfile = product_file + ".tmp" # open destination file for binary write: with open(tmpfile, "wb") as fd: # prefered way to write content following: # https://docs.python-requests.org/en/master/user/quickstart/ for chunk in r.iter_content(chunk_size=128): fd.write(chunk) # endfor # endwith # rename: os.rename(tmpfile, product_file) # create target dir if necessary: cso_file.CheckDir(output_file) # move to destination: os.rename(product_file, output_file) # all ok, leave retry loop: break except requests.exceptions.HTTPError as err: # info .. msg = str(err) logging.warning(f"{indent}exception from download; message received:") logging.warning(f"{indent} %s" % msg) # catch known problem ... if msg.startswith("401 Client Error: Unauthorized for url:"): logging.warning(f"{indent}renew token ...") self.CreateToken(href, indent=indent) # endif except MemoryError as err: logging.error("memory error from download; increase resources?") # quit with error: raise except Exception as err: # info .. logging.error("from download; message received:") logging.error(" %s" % str(err)) # catch known problem ... if msg.startswith("File is not a zip file"): logging.warning(f"{indent}maybe download was interrupted, try again ...") else: # quit with error: raise # endif # endtry # increase counter: ntry += 1 # switch: if ntry == maxtry: logging.warning(f"{indent}tried {maxtry} times; exit ...") raise Exception else: logging.warning(f"{indent}wait {nsec_wait} seconds ...") time.sleep(nsec_wait) logging.warning(f"{indent}attempt {ntry} / {maxtry} ...") continue # while-loop # endif # endwhile # retry # enddef DownloadFile # endclass CSO_PAL_Downloader ######################################################################## ### ### end Loading
py/cso_s5p.py +20 −3 Original line number Diff line number Diff line Loading @@ -29,6 +29,9 @@ # 2023-12, Arjo Segers # Fixed bug in orbit selection. # # 2024-01, Arjo Segers # Switch between DataSpace and PAL downloader based on download link. # ######################################################################## ### Loading Loading @@ -2184,6 +2187,7 @@ class CSO_S5p_Convert(utopya.UtopyaRc): # tools: import cso_file import cso_dataspace import cso_pal import utopya # info ... Loading Loading @@ -2523,13 +2527,26 @@ class CSO_S5p_Convert(utopya.UtopyaRc): if not os.path.isfile(input_file): # info .. logging.info(" not present yet, download ...") # download url: href = rec["href"] # initialize download? if downloader is None: # init download .. # init downloader based on url: if "dataspace.copernicus.eu" in href: # download from Copernicus DataSpace: downloader = cso_dataspace.CSO_DataSpace_Downloader() # elif "s5p-pal.com" in href: # download from PAL: downloader = cso_pal.CSO_PAL_Downloader() # else: logging.error("no downloader class defined for url: {href}") raise Exception # endif # endif # download ... downloader.DownloadFile(rec["href"], input_file, indent=" ") downloader.DownloadFile(href, input_file, indent=" ") # store name: downloads.append(input_file) # endif Loading