TNO Intern

Commit c8a14d51 authored by Arjo Segers's avatar Arjo Segers
Browse files

Store access token in object to avoid server errors. Retry if downloaded zipfile is corrupted.

parent a97b0ae0
Loading
Loading
Loading
Loading
+138 −63
Original line number Diff line number Diff line
@@ -6,6 +6,8 @@
#
# 2023-11, Arjo Segers
#   Extended error traps.
#   Store access token in object to avoid server errors.
#   Retry if downloaded zipfile is corrupted.
#

########################################################################
@@ -212,9 +214,6 @@ class CSO_DataSpace_Inquire(utopya.UtopyaRc):
        # combine into search url:
        search_url = f"{api_url}/collections/{collection}/search.json"

        ## authorization is done by header dict:.
        # headers = { "Authorization" : f"access_token {access_token}" }

        # time range:
        t1 = self.GetSetting("timerange.start", totype="datetime")
        t2 = self.GetSetting("timerange.end", totype="datetime")
@@ -306,15 +305,15 @@ class CSO_DataSpace_Inquire(utopya.UtopyaRc):
                    r.raise_for_status()
                except Exception as err:
                    msg = str(err)
                    logging.error(f"{indent}    from query; message received:")
                    logging.error(f"{indent}      %s" % msg)
                    logging.warning(f"{indent}    from query; message received:")
                    logging.warning(f"{indent}%s" % msg)
                    if ntry == maxtry:
                        logging.error(f"{indent}    tried {ntry} times now, exit ...")
                        raise Exception
                    else:
                        logging.error(f"{indent}    wait {nsec_wait} seconds ..")
                        logging.warning(f"{indent}    wait {nsec_wait} seconds ..")
                        time.sleep(nsec_wait)
                        logging.error(f"{indent}    try again ...")
                        logging.warning(f"{indent}    attempt {ntry} / {maxtry} ...")
                        ntry += 1
                        continue
                    # endif
@@ -459,7 +458,7 @@ class CSO_DataSpace_Inquire(utopya.UtopyaRc):
        # endwhile # pages

        # info ..
        logging.info("save to: %s ..." % output_file)
        logging.info(f"{indent}save to: %s ..." % output_file)
        # create directory:
        dirname = os.path.dirname(output_file)
        if len(dirname) > 0:
@@ -520,42 +519,47 @@ class NullAuth(requests.auth.AuthBase):
# *


class CSO_DataSpace_DownloadFile(object):
class CSO_DataSpace_Downloader(object):

    """
    Download single file from *Copernicus DataSpace*.
    Class to download single file from *Copernicus DataSpace*.
    The object defined by the class is used to store an
    access token that is re-used until it is expired.

    Arguments:
    The :py:class:`DownloadFile` method should be used to
    actually download a file.

    * ``href`` : download url, for example::
    Usage::

        https://zipper.dataspace.copernicus.eu/odata/v1/Products('d483baa0-3a61-4985-aa0c-5642a83c9214')/$value
        # initialize downloader:
        downloader = CSO_DataSpace_Downloader()
        # download file:
        downloader.DownloadFile( "https://zipper.dataspace.copernicus.eu/odata/v1/Products('d483baa0-3a61-4985-aa0c-5642a83c9214')/$value", "orbit.nc" )

    * ``output_file`` : target file
    """

    Optional arguments:
    def __init__(self):

    * ``maxtry`` : number of times to try again if download fails
    * ``timeout`` : delay in seconds between requests
        """
        Initialize downloader.
        """

    def __init__(self, href, output_file, maxtry=10, timeout=60, indent=""):
        # no token yet:
        self.access_token = None

    # enddef __init__

    # *

    def CreateToken(self, href, indent=""):

        """
        Download file.
        Create access token.
        """

        # modules:
        import sys
        import os
        import time
        import urllib.parse
        import requests
        import zipfile
        import shutil

        # tools:
        import cso_file

        # number of seconds to wait in retry loop:
        nsec_wait = 10
@@ -596,7 +600,7 @@ class CSO_DataSpace_DownloadFile(object):
        domain = "identity.dataspace.copernicus.eu"
        url = f"https://{domain}/auth/realms/CDSE/protocol/openid-connect/token"
        # retry loop ..
        ntry = 0
        ntry = 1
        while True:
            # try to obtain token:
            try:
@@ -605,7 +609,7 @@ class CSO_DataSpace_DownloadFile(object):
                # check status, raise error if request failed:
                r.raise_for_status()
                # extract token from response:
                access_token = r.json()["access_token"]
                self.access_token = r.json()["access_token"]
                # all ok, leave try loop:
                break
            except requests.exceptions.HTTPError as err:
@@ -617,14 +621,27 @@ class CSO_DataSpace_DownloadFile(object):
                if msg.startswith("401 Client Error: Unauthorized for url:"):
                    logging.error(f"{indent}Possible causes:")
                    logging.error(f"{indent} * Just a random failure ...")
                    logging.error(f"{indent}      * The (login,password) pair received from your '~/.netrc' file are incorrect.")
                    logging.error(f"{indent}        For the Copernicus DataSpace, the file should contain:")
                    logging.error(
                        f"{indent} * The (login,password) pair received from your '~/.netrc' file are incorrect."
                    )
                    logging.error(
                        f"{indent}   For the Copernicus DataSpace, the file should contain:"
                    )
                    logging.error(f"{indent}     machine {p.netloc}   login ****  password ****")
                    logging.error(f"{indent}        If the machine was not found, a default might have been received.")
                    logging.error(
                        f"{indent}   If the machine was not found, a default might have been received."
                    )
                    logging.error(f"{indent}   Login received: {username}")
                    logging.error(f"{indent}      * System maintenance? Check the Copernicus DataSpace website.")
                    logging.error(
                        f"{indent} * Too many logins? Try to run single processing only."
                    )
                    logging.error(
                        f"{indent} * System maintenance? Check the Copernicus DataSpace website."
                    )
                else:
                    logging.error(f"{indent}      Access token creation failed; server response: {r.json()}")
                    logging.error(
                        f"{indent}Access token creation failed; server response: {r.json()}"
                    )
                # endif
            except:
                # info ...
@@ -637,21 +654,69 @@ class CSO_DataSpace_DownloadFile(object):
                logging.warning(f"{indent}tried {maxtry} times; exit ...")
                raise Exception
            else:
                logging.warning(f"{indent}      exception from token creation; wait {nsec_wait} seconds ...")
                logging.warning(
                    f"{indent}exception from token creation; wait {nsec_wait} seconds ..."
                )
                time.sleep(nsec_wait)
                logging.warning(f"{indent}      try again ...")
                logging.warning(f"{indent}attempt {ntry} / {maxtry} ...")
                continue  # while-loop
            # endif
        # endwhile # retry

    # enddef CreateToken

    # *

    def DownloadFile(self, href, output_file, maxtry=10, timeout=60, indent=""):

        """
        Download file from DataSpace.

        Arguments:

        * ``href`` : download url, for example::
    
            https://zipper.dataspace.copernicus.eu/odata/v1/Products('d483baa0-3a61-4985-aa0c-5642a83c9214')/$value

        * ``output_file`` : target file
    
        Optional arguments:
    
        * ``maxtry`` : number of times to try again if download fails
        * ``timeout`` : delay in seconds between requests

        """

        # modules:
        import sys
        import os
        import time
        import requests
        import zipfile
        import shutil

        # tools:
        import cso_file

        # number of seconds to wait in retry loop:
        nsec_wait = 10

        # no token yet?
        if self.access_token is None:
            # info ..
            logging.info(f"{indent}create token ...")
            # create token, re-use until error is received ...
            self.CreateToken(href, indent=indent)
        # endif

        # retry loop ..
        ntry = 0
        ntry = 1
        while True:
            # try to download and save:
            try:

                # fill authorization token in header:
                headers = {"Authorization": f"Bearer {access_token}"}
                headers = {"Authorization": f"Bearer {self.access_token}"}
                # ensure that "~/.netrc" is ignored by passing null-authorization,
                # otherwise the token in the header is overwritten by a token formed
                # from the login/password in the rcfile if that is found:
@@ -716,8 +781,13 @@ class CSO_DataSpace_DownloadFile(object):
            except requests.exceptions.HTTPError as err:
                # info ..
                msg = str(err)
                logging.error("exception from download; message received:")
                logging.error("  %s" % msg)
                logging.warning(f"{indent}exception from download; message received:")
                logging.warning(f"{indent}  %s" % msg)
                # catch known problem ...
                if msg.startswith("401 Client Error: Unauthorized for url:"):
                    logging.warning(f"{indent}renew token ...")
                    self.CreateToken(href, indent=indent)
                # endif

            except MemoryError as err:
                logging.error("memory error from download; increase resources?")
@@ -728,8 +798,13 @@ class CSO_DataSpace_DownloadFile(object):
                # info ..
                logging.error("from download; message received:")
                logging.error("  %s" % str(err))
                # catch known problem ...
                if msg.startswith("File is not a zip file"):
                    logging.warning(f"{indent}maybe download was interrupted, try again  ...")
                else:
                    # quit with error:
                    raise
                # endif

            # endtry

@@ -740,18 +815,18 @@ class CSO_DataSpace_DownloadFile(object):
                logging.warning(f"{indent}tried {maxtry} times; exit ...")
                raise Exception
            else:
                logging.warning(f"{indent}      exception from download; wait {nsec_wait} seconds ...")
                logging.warning(f"{indent}wait {nsec_wait} seconds ...")
                time.sleep(nsec_wait)
                logging.warning(f"{indent}      try again ...")
                logging.warning(f"{indent}attempt {ntry} / {maxtry} ...")
                continue  # while-loop
            # endif

        # endwhile # retry

    # enddef __init__
    # enddef DownloadFile


# endclass CSO_DataSpace_DownloadFile
# endclass CSO_DataSpace_Downloader


########################################################################
+83 −71

File changed.

Preview size limit exceeded, changes collapsed.