TNO Intern

Commit 66477802 authored by Arjo Segers's avatar Arjo Segers
Browse files

Introduced downloader class for PAL website.

parent 9e6159d7
Loading
Loading
Loading
Loading
+158 −1
Original line number Diff line number Diff line
@@ -4,6 +4,9 @@
# 2023-08, Arjo Segers
#   Reformatted using 'black'.
#
# 2024-01, Arjo Segers
#   Added 'CSO_PAL_Downloader' class.
#

########################################################################
###
@@ -34,7 +37,7 @@ PAL API
See the PAL `API info <https://data-portal.s5p-pal.com/cat-doc>`_ for latest info.

S5P-PAL product files can be selected and downloaded using the *Spatio Temporal Asset Catalog* (STAC).
The `PySTAC <https://pystac-client.readthedocs.io/en/latest/` Python interface is used for access.
The `PySTAC <https://pystac-client.readthedocs.io/en/latest/>`_ Python interface is used for access.


Class hierchy
@@ -46,6 +49,7 @@ The classes and are defined according to the following hierchy:
* :py:class:`.UtopyaRc`

  * :py:class:`.CSO_PAL_Inquire`
  * :py:class:`.CSO_PAL_Downloader`


Classes
@@ -295,6 +299,159 @@ class CSO_PAL_Inquire(utopya.UtopyaRc):
# endclass CSO_PAL_Inquire


########################################################################
###
### PAL download
###
########################################################################


class CSO_PAL_Downloader(object):

    """
    Class to download single file from the `Product Algorithm Laboratory <https://www.s5p-pal.com/>`_.

    The :py:class:`DownloadFile` method should be used to
    actually download a file.

    Usage::

        # initialize downloader:
        downloader = CSO_PAL_Downloader()
        # download file, store in specified file:
        downloader.DownloadFile( "https://data-portal.s5p-pal.com/cat/sentinel-5p/download/88c15681-db43-4219-b391-c8567e39cccf", "orbit.nc" )

    """

    def __init__(self):

        """
        Initialize downloader.
        """

    # enddef __init__

    # *

    def DownloadFile(self, href, output_file, maxtry=10, nsec_wait=60, indent=""):

        """
        Download file from PAL.

        If a request fails it is tried again up to a maximum of ``maxtry`` times,
        with a delay of ``nsec_wait`` between requsts.

        Arguments:

        * ``href`` : download url, for example::
    
            https://data-portal.s5p-pal.com/cat/sentinel-5p/download/88c15681-db43-4219-b391-c8567e39cccf

        * ``output_file`` : target file
    
        Optional arguments:
    
        * ``maxtry`` : number of times to try again if download fails
        * ``nsec_wait`` : delay in seconds between requests

        """

        # modules:
        import sys
        import os
        import time
        import requests

        # tools:
        import cso_file

        # retry loop ..
        ntry = 1
        while True:
            # try to download and save:
            try:

                # get data:
                r = requests.get(href)
                # check status, raise error if request failed:
                r.raise_for_status()

                # product is netcdf file, use base name of target file:
                product_file = os.path.basename(output_file)
                # info ..
                logging.info(f"{indent}write to {product_file} ...")
                # write to temporary target first ..
                tmpfile = product_file + ".tmp"
                # open destination file for binary write:
                with open(tmpfile, "wb") as fd:
                    # prefered way to write content following:
                    #   https://docs.python-requests.org/en/master/user/quickstart/
                    for chunk in r.iter_content(chunk_size=128):
                        fd.write(chunk)
                    # endfor
                # endwith
                # rename:
                os.rename(tmpfile, product_file)

                # create target dir if necessary:
                cso_file.CheckDir(output_file)
                # move to destination:
                os.rename(product_file, output_file)

                # all ok, leave retry loop:
                break

            except requests.exceptions.HTTPError as err:
                # info ..
                msg = str(err)
                logging.warning(f"{indent}exception from download; message received:")
                logging.warning(f"{indent}  %s" % msg)
                # catch known problem ...
                if msg.startswith("401 Client Error: Unauthorized for url:"):
                    logging.warning(f"{indent}renew token ...")
                    self.CreateToken(href, indent=indent)
                # endif

            except MemoryError as err:
                logging.error("memory error from download; increase resources?")
                # quit with error:
                raise

            except Exception as err:
                # info ..
                logging.error("from download; message received:")
                logging.error("  %s" % str(err))
                # catch known problem ...
                if msg.startswith("File is not a zip file"):
                    logging.warning(f"{indent}maybe download was interrupted, try again  ...")
                else:
                    # quit with error:
                    raise
                # endif

            # endtry

            # increase counter:
            ntry += 1
            # switch:
            if ntry == maxtry:
                logging.warning(f"{indent}tried {maxtry} times; exit ...")
                raise Exception
            else:
                logging.warning(f"{indent}wait {nsec_wait} seconds ...")
                time.sleep(nsec_wait)
                logging.warning(f"{indent}attempt {ntry} / {maxtry} ...")
                continue  # while-loop
            # endif

        # endwhile # retry

    # enddef DownloadFile


# endclass CSO_PAL_Downloader


########################################################################
###
### end
+20 −3
Original line number Diff line number Diff line
@@ -29,6 +29,9 @@
# 2023-12, Arjo Segers
#   Fixed bug in orbit selection.
#
# 2024-01, Arjo Segers
#   Switch between DataSpace and PAL downloader based on download link.
#

########################################################################
###
@@ -2184,6 +2187,7 @@ class CSO_S5p_Convert(utopya.UtopyaRc):
        # tools:
        import cso_file
        import cso_dataspace
        import cso_pal
        import utopya

        # info ...
@@ -2523,13 +2527,26 @@ class CSO_S5p_Convert(utopya.UtopyaRc):
                if not os.path.isfile(input_file):
                    # info ..
                    logging.info("      not present yet, download ...")
                    # download url:
                    href = rec["href"]
                    # initialize download?
                    if downloader is None:
                        # init download ..
                        # init downloader based on url:
                        if "dataspace.copernicus.eu" in href:
                            # download from Copernicus DataSpace:
                            downloader = cso_dataspace.CSO_DataSpace_Downloader()
                        #
                        elif "s5p-pal.com" in href:
                            # download from PAL:
                            downloader = cso_pal.CSO_PAL_Downloader()
                        #
                        else:
                            logging.error("no downloader class defined for url: {href}")
                            raise Exception
                        # endif
                    # endif
                    # download ...
                    downloader.DownloadFile(rec["href"], input_file, indent="        ")
                    downloader.DownloadFile(href, input_file, indent="        ")
                    # store name:
                    downloads.append(input_file)
                # endif