TNO Intern

Commit 1fbe2fc8 authored by Arjo Segers's avatar Arjo Segers
Browse files

Inquire Dataspace per month after change in allowed maximum number of records....

Inquire Dataspace per month after change in allowed maximum number of records. Sort listing files by processing and processor version.
parent d392965f
Loading
Loading
Loading
Loading
+209 −173
Original line number Diff line number Diff line
@@ -25,6 +25,10 @@
#   Trap "404 Client Error", display messate that inquire table might need an update.
#   Sort listing inplace.
#
# 2024-11, Arjo Segers
#   Inquire Dataspace per month after change in allowed maximum number of records.
#   Sort listing files by processing and processor version.
#

########################################################################
###
@@ -277,36 +281,53 @@ class CSO_DataSpace_Inquire(utopya.UtopyaRc):
        # info ...
        logging.info(f"{indent}search all items in timerange ...")

        # search query could only return a maximum number of records;
        # a 'page' of records is requested using a row offset and the number of rows:
        row0 = 0
        nrow = 100

        # initialize search parameters;
        # for possible content, see:
        #   https://documentation.dataspace.copernicus.eu/APIs/OpenSearch.html
        params = {}
        # fill maximum time range:
        tfmt = "%Y-%m-%dT%H:%M:%SZ"
        params["startDate"] = t1.strftime(tfmt)
        params["completionDate"] = t2.strftime(tfmt)
        # fill domain:
        if box is not None:
            params["box"] = box
        # endif
        # fill product type:
        params["productType"] = producttype

        # search query could only return a maximum number of records:
        nrow = 100
        # fill paging info:
        params["maxRecords"] = nrow

        # init counter:
        # start of first month:
        tm = datetime.datetime(t1.year,t1.month,1)
        # loop over months:
        while tm < t2 :
            # info ...
            logging.info(f"{indent}  {tm.year}-{tm.month:0>2} ...")
            
            # month length:
            weekday,nday = calendar.monthrange(tm.year,tm.month)
            # start of month or overall range:
            tm1 = max( t1, tm )
            # end of month or overall range:
            tm2 = min( tm + datetime.timedelta(nday), t2 )

            # fill time range:
            tfmt = "%Y-%m-%dT%H:%M:%SZ"
            params["startDate"] = tm1.strftime(tfmt)
            params["completionDate"] = tm2.strftime(tfmt)

            # a 'page' of records is requested using a row offset and the number of rows;
            # init page counter:
            ipage = 0
            # init row offset used for messages:
            row0 = 0

            # loop over pages of query result:
            while True:
                # increase counter:
                ipage += 1
                # info ...
            logging.info(f"{indent}  page {ipage} (entries {row0+1},..,{row0+nrow})")
                logging.info(f"{indent}    page {ipage} ...")

                # fill page number:
                params["page"] = ipage
@@ -383,6 +404,13 @@ class CSO_DataSpace_Inquire(utopya.UtopyaRc):

                # count:
                nrec = len(data["features"])
                # info ...
                if nrec == 0 :
                    logging.info(f"{indent}      no records found ...")
                else :
                    logging.info(f"{indent}      entries {row0+1:>4},..,{row0+nrec:>4}")
                # endif

                # loop over features:
                for feature in data["features"]:
                    # check ...
@@ -475,6 +503,11 @@ class CSO_DataSpace_Inquire(utopya.UtopyaRc):

            # endwhile # pages
            
            # next:
            tm = tm2
            
        #endwhile # month loop

        # info ..
        logging.info(f"{indent}save to: %s ..." % output_file)
        # create directory:
@@ -485,7 +518,7 @@ class CSO_DataSpace_Inquire(utopya.UtopyaRc):
            # endif
        # endif
        # sort the output_df by increasing orbit number
        output_df.sort_values(by="orbit", inplace=True)
        output_df.sort_values(by=["orbit","processing","processor_version"], inplace=True)
        # write:
        output_df.to_csv(output_file, sep=";", index=False)

@@ -808,9 +841,12 @@ class CSO_DataSpace_Downloader(object):
                    logging.warning(f"{indent}renew token ...")
                    self.CreateToken(href, indent=indent)
                elif msg.startswith("404 Client Error: Not Found for url:"):
                    logging.error(f"url seems not available anymore; maybe need to update inquiry table?")
                    # quit with error:
                    raise
                    # # info ...
                    # logging.error(f"url seems not available anymore; maybe need to update inquiry table?")
                    # # quit with error:
                    # raise
                    # warning ...
                    logging.warning(f"url seems not available, try again ...")
                # endif

            except MemoryError as err: