TNO Intern

Commit d38265a6 authored by Arjo Segers's avatar Arjo Segers
Browse files

Update DataSpace inquire to ensure that results are the same when repeated.

parent 45374b1f
Loading
Loading
Loading
Loading
+7 −0
Original line number Diff line number Diff line
@@ -459,3 +459,10 @@ v2.9.6
Changed insert of new columns to dataframe after efficiency warning.
  py/cso_superobs.py

v2.9.7
~~~~~~

Increased maximum number of records and introduced sort order to avoid that
search request return a different result when repeated.
  py/cso_dataspace.py
+12 −5
Original line number Diff line number Diff line
@@ -28,6 +28,8 @@
# 2024-11, Arjo Segers
#   Inquire Dataspace per month after change in allowed maximum number of records.
#   Sort listing files by processing and processor version.
#   Increased maximum number of records and introduced sort order to avoid that
#   search request return a different result when repeated.
#

########################################################################
@@ -292,10 +294,15 @@ class CSO_DataSpace_Inquire(utopya.UtopyaRc):
        # fill product type:
        params["productType"] = producttype

        # search query could only return a maximum number of records:
        nrow = 100
        # search query could only return a maximum number of records;
        # use large value to have single page with full request,
        # since the catalogue might return different pages due to inconsistent sorting ...
        nrow = 2000
        # fill paging info:
        params["maxRecords"] = nrow
        # sort on publication date to ensure that pages are the same when requested again:
        params["sortParam"] = "published"
        params["sortOrder"] = "ascending"

        # start of first month:
        tm = datetime.datetime(t1.year,t1.month,1)
@@ -379,7 +386,7 @@ class CSO_DataSpace_Inquire(utopya.UtopyaRc):
                #  }
                #
                # save result?
                if True:
                if False:
                    # targefile:
                    qfile = "query.json"
                    # save:
@@ -463,8 +470,8 @@ class CSO_DataSpace_Inquire(utopya.UtopyaRc):
                    if len(output_df) > 0:
                        # same href already stored?
                        if href in output_df["href"].values:
                            ## testing ...
                            # logging.warning(f"ignore double product_id: {product_id}")
                            # testing ...
                            logging.warning(f"{indent}      ignore double product_id: {product_id}")
                            # ignore record:
                            continue
                        # endif