Loading py/cso_dataspace.py +209 −173 Original line number Diff line number Diff line Loading @@ -25,6 +25,10 @@ # Trap "404 Client Error", display messate that inquire table might need an update. # Sort listing inplace. # # 2024-11, Arjo Segers # Inquire Dataspace per month after change in allowed maximum number of records. # Sort listing files by processing and processor version. # ######################################################################## ### Loading Loading @@ -277,36 +281,53 @@ class CSO_DataSpace_Inquire(utopya.UtopyaRc): # info ... logging.info(f"{indent}search all items in timerange ...") # search query could only return a maximum number of records; # a 'page' of records is requested using a row offset and the number of rows: row0 = 0 nrow = 100 # initialize search parameters; # for possible content, see: # https://documentation.dataspace.copernicus.eu/APIs/OpenSearch.html params = {} # fill maximum time range: tfmt = "%Y-%m-%dT%H:%M:%SZ" params["startDate"] = t1.strftime(tfmt) params["completionDate"] = t2.strftime(tfmt) # fill domain: if box is not None: params["box"] = box # endif # fill product type: params["productType"] = producttype # search query could only return a maximum number of records: nrow = 100 # fill paging info: params["maxRecords"] = nrow # init counter: # start of first month: tm = datetime.datetime(t1.year,t1.month,1) # loop over months: while tm < t2 : # info ... logging.info(f"{indent} {tm.year}-{tm.month:0>2} ...") # month length: weekday,nday = calendar.monthrange(tm.year,tm.month) # start of month or overall range: tm1 = max( t1, tm ) # end of month or overall range: tm2 = min( tm + datetime.timedelta(nday), t2 ) # fill time range: tfmt = "%Y-%m-%dT%H:%M:%SZ" params["startDate"] = tm1.strftime(tfmt) params["completionDate"] = tm2.strftime(tfmt) # a 'page' of records is requested using a row offset and the number of rows; # init page counter: ipage = 0 # init row offset used for messages: row0 = 0 # loop over pages of query result: while True: # increase counter: ipage += 1 # info ... logging.info(f"{indent} page {ipage} (entries {row0+1},..,{row0+nrow})") logging.info(f"{indent} page {ipage} ...") # fill page number: params["page"] = ipage Loading Loading @@ -383,6 +404,13 @@ class CSO_DataSpace_Inquire(utopya.UtopyaRc): # count: nrec = len(data["features"]) # info ... if nrec == 0 : logging.info(f"{indent} no records found ...") else : logging.info(f"{indent} entries {row0+1:>4},..,{row0+nrec:>4}") # endif # loop over features: for feature in data["features"]: # check ... Loading Loading @@ -475,6 +503,11 @@ class CSO_DataSpace_Inquire(utopya.UtopyaRc): # endwhile # pages # next: tm = tm2 #endwhile # month loop # info .. logging.info(f"{indent}save to: %s ..." % output_file) # create directory: Loading @@ -485,7 +518,7 @@ class CSO_DataSpace_Inquire(utopya.UtopyaRc): # endif # endif # sort the output_df by increasing orbit number output_df.sort_values(by="orbit", inplace=True) output_df.sort_values(by=["orbit","processing","processor_version"], inplace=True) # write: output_df.to_csv(output_file, sep=";", index=False) Loading Loading @@ -808,9 +841,12 @@ class CSO_DataSpace_Downloader(object): logging.warning(f"{indent}renew token ...") self.CreateToken(href, indent=indent) elif msg.startswith("404 Client Error: Not Found for url:"): logging.error(f"url seems not available anymore; maybe need to update inquiry table?") # quit with error: raise # # info ... # logging.error(f"url seems not available anymore; maybe need to update inquiry table?") # # quit with error: # raise # warning ... logging.warning(f"url seems not available, try again ...") # endif except MemoryError as err: Loading Loading
py/cso_dataspace.py +209 −173 Original line number Diff line number Diff line Loading @@ -25,6 +25,10 @@ # Trap "404 Client Error", display messate that inquire table might need an update. # Sort listing inplace. # # 2024-11, Arjo Segers # Inquire Dataspace per month after change in allowed maximum number of records. # Sort listing files by processing and processor version. # ######################################################################## ### Loading Loading @@ -277,36 +281,53 @@ class CSO_DataSpace_Inquire(utopya.UtopyaRc): # info ... logging.info(f"{indent}search all items in timerange ...") # search query could only return a maximum number of records; # a 'page' of records is requested using a row offset and the number of rows: row0 = 0 nrow = 100 # initialize search parameters; # for possible content, see: # https://documentation.dataspace.copernicus.eu/APIs/OpenSearch.html params = {} # fill maximum time range: tfmt = "%Y-%m-%dT%H:%M:%SZ" params["startDate"] = t1.strftime(tfmt) params["completionDate"] = t2.strftime(tfmt) # fill domain: if box is not None: params["box"] = box # endif # fill product type: params["productType"] = producttype # search query could only return a maximum number of records: nrow = 100 # fill paging info: params["maxRecords"] = nrow # init counter: # start of first month: tm = datetime.datetime(t1.year,t1.month,1) # loop over months: while tm < t2 : # info ... logging.info(f"{indent} {tm.year}-{tm.month:0>2} ...") # month length: weekday,nday = calendar.monthrange(tm.year,tm.month) # start of month or overall range: tm1 = max( t1, tm ) # end of month or overall range: tm2 = min( tm + datetime.timedelta(nday), t2 ) # fill time range: tfmt = "%Y-%m-%dT%H:%M:%SZ" params["startDate"] = tm1.strftime(tfmt) params["completionDate"] = tm2.strftime(tfmt) # a 'page' of records is requested using a row offset and the number of rows; # init page counter: ipage = 0 # init row offset used for messages: row0 = 0 # loop over pages of query result: while True: # increase counter: ipage += 1 # info ... logging.info(f"{indent} page {ipage} (entries {row0+1},..,{row0+nrow})") logging.info(f"{indent} page {ipage} ...") # fill page number: params["page"] = ipage Loading Loading @@ -383,6 +404,13 @@ class CSO_DataSpace_Inquire(utopya.UtopyaRc): # count: nrec = len(data["features"]) # info ... if nrec == 0 : logging.info(f"{indent} no records found ...") else : logging.info(f"{indent} entries {row0+1:>4},..,{row0+nrec:>4}") # endif # loop over features: for feature in data["features"]: # check ... Loading Loading @@ -475,6 +503,11 @@ class CSO_DataSpace_Inquire(utopya.UtopyaRc): # endwhile # pages # next: tm = tm2 #endwhile # month loop # info .. logging.info(f"{indent}save to: %s ..." % output_file) # create directory: Loading @@ -485,7 +518,7 @@ class CSO_DataSpace_Inquire(utopya.UtopyaRc): # endif # endif # sort the output_df by increasing orbit number output_df.sort_values(by="orbit", inplace=True) output_df.sort_values(by=["orbit","processing","processor_version"], inplace=True) # write: output_df.to_csv(output_file, sep=";", index=False) Loading Loading @@ -808,9 +841,12 @@ class CSO_DataSpace_Downloader(object): logging.warning(f"{indent}renew token ...") self.CreateToken(href, indent=indent) elif msg.startswith("404 Client Error: Not Found for url:"): logging.error(f"url seems not available anymore; maybe need to update inquiry table?") # quit with error: raise # # info ... # logging.error(f"url seems not available anymore; maybe need to update inquiry table?") # # quit with error: # raise # warning ... logging.warning(f"url seems not available, try again ...") # endif except MemoryError as err: Loading