diff --git a/CHANGELOG b/CHANGELOG index 96decfe671451df3d271b5818cc4da4f53c669ed..3e48756a27c2f43f9e9d2d907550349bfb9cf3e1 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -549,3 +549,25 @@ Plot pixels als polygons in case track information is not present. src/cso/cso_plot.py +v2.15 +----- + +Updates for access to Copernicus DataSpace: use STAC API to inquire, and download from S3 buckets. + src/cso/cso_dataspace.py + src/cso/cso_s5p.py + config/tutorial/tutorial.rc + pyproject.toml + +Updated documentation. + doc/source/s5p-no2.rst + src/cso/cso_colocate.py + src/cso/cso_file.py + src/cso/cso_gridded.py + src/cso/cso_mapping.py + src/cso/cso_pal.py + src/cso/cso_regions.py + src/cso/cso_superobs.py + src/cso/cso_tools.py + src/cso/cso_viirs.py + src/rc.py + src/utopya/utopya_tools.py diff --git a/config/tutorial/tutorial.rc b/config/tutorial/tutorial.rc index 8af4063d05a89473931e3242389bce5f5f1ead92..c3d988d28ae784aa8df6ef2148b1fb5c6b080c09 100644 --- a/config/tutorial/tutorial.rc +++ b/config/tutorial/tutorial.rc @@ -301,21 +301,13 @@ VIRTUAL_ENV : ! full time range: cso.tutorial.inquire-table-dataspace.timerange.start : 2018-01-01 00:00:00 cso.tutorial.inquire-table-dataspace.timerange.end : 2025-01-01 00:00:00 -!! ... testing ... -!cso.tutorial.inquire-table-dataspace.timerange.start : 2018-06-01 00:00:00 -!cso.tutorial.inquire-table-dataspace.timerange.end : 2018-07-01 00:00:00 ! API url: -cso.tutorial.inquire-table-dataspace.url : https://catalogue.dataspace.copernicus.eu/resto/api +cso.tutorial.inquire-table-dataspace.url : https://stac.dataspace.copernicus.eu/v1 ! collection name: -cso.tutorial.inquire-table-dataspace.collection : Sentinel5P - -! product type, always 10 characters! -! L2__NO2___ -! L2__CO____ -! ... -cso.tutorial.inquire-table-dataspace.producttype : L2__NO2___ +cso.tutorial.inquire-table-dataspace.collections : sentinel-5p-l2-no2-rpro \ + sentinel-5p-l2-no2-offl ! target area; !!~ empty for no limitation: @@ -323,9 +315,6 @@ cso.tutorial.inquire-table-dataspace.producttype : L2__NO2___ !~ domain specified as: west,south,east,north cso.tutorial.inquire-table-dataspace.area : ${my.region.west},${my.region.south},${my.region.east},${my.region.north} -! template for download url given "{product_id}": -cso.tutorial.inquire-table-dataspace.download_url : https://zipper.dataspace.copernicus.eu/odata/v1/Products({product_id})/$value - ! output table, date of today: cso.tutorial.inquire-table-dataspace.output.file : ${my.work}/Copernicus/Copernicus_S5p_NO2_dataspace__%Y-%m-%d.csv @@ -339,7 +328,7 @@ cso.tutorial.inquire-plot.renew : True cso.tutorial.inquire-plot.file : ${cso.tutorial.inquire-table-dataspace.output.file} !!~ specify dates ("yyyy-mm-dd") to use historic tables, !! default is table of today: -!cso.tutorial.inquire-plot.filedate : 2022-01-18 +!cso.tutorial.inquire-plot.filedate : 2026-03-11 ! annote: cso.tutorial.inquire-plot.title : S5p/NO2 %Y-%m-%d @@ -371,7 +360,7 @@ cso.tutorial.convert.timerange.end : ${my.timerange.end} ! listing of available source files, created by 'inquire' job: cso.tutorial.convert.inquire.file : ${my.work}/Copernicus/Copernicus_S5p_NO2_dataspace__%Y-%m-%d.csv !!~ historic inquire ... -!cso.tutorial.convert.inquire.filedate : 2024-01-18 +!cso.tutorial.convert.inquire.filedate : 2026-03-25 ! selection keyword: my.tutorial.selection : C03 diff --git a/doc/source/history.rst b/doc/source/history.rst index 4e999294353c1c9326a02f764d1219a2a2bba61f..491444974f2a6a05a764bf244689023bae2da7e0 100644 --- a/doc/source/history.rst +++ b/doc/source/history.rst @@ -124,6 +124,9 @@ A summary of the versions and changes. * | *v2.14* | Plot pixels als polygons in case track information is not present. + +* | *v2.15* + | Updates for access to Copernicus DataSpace: use STAC API to inquire, and download from S3 buckets. To be done diff --git a/doc/source/s5p-no2.rst b/doc/source/s5p-no2.rst index b876d7e41cc6aaf0f011199ba709b578222bb47a..69e525cfcf9d42afb36dbab58be568e7247c97d8 100644 --- a/doc/source/s5p-no2.rst +++ b/doc/source/s5p-no2.rst @@ -112,6 +112,8 @@ This is used to obtain the alternative retrieval and tropospheric averaging kern scalled versions of the original variables: .. math:: + :nowrap: + \begin{eqnarray} \hat{\mathbf{y}}_r(\hat{\mathbf{x}}_a) &=& \frac{M^{trop}}{\hat{M}^{trop}(\hat{\mathbf{x}}_a)}\ \mathbf{y}_r \\ \hat{\mathbf{A}}^{trop}(\hat{\mathbf{x}}_a) &=& \frac{M^{trop}}{\hat{M}^{trop}(\hat{\mathbf{x}}_a)}\ \mathbf{A}^{trop} \\ @@ -121,6 +123,8 @@ scalled versions of the original variables: A simulation of the retrieval from the same model concentrations becomes: .. math:: + :nowrap: + \begin{eqnarray} \hat{\mathbf{y}}_s(\hat{\mathbf{x}}_a) &=& \hat{\mathbf{A}}^{trop}(\hat{\mathbf{x}}_a)\ \mathbf{V}\mathbf{G}\ \hat{\mathbf{x}}_a \\ diff --git a/pyproject.toml b/pyproject.toml index d52ded81ee906d352e34f72452be2ab2f3918c33..aba050fe4181037702d3a0141782d674ece4a124 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "cso" -version = "2.14" +version = "2.15" authors = [ { name = "Arjo Segers" }, { name = "Lewis Blake" }, @@ -38,6 +38,9 @@ dependencies = [ "scipy >=1.1.0", "earthaccess", "requests", + "pystac", + "pystac_client", + "boto3", "typer", "python-magic", ] diff --git a/src/cso/cso_colocate.py b/src/cso/cso_colocate.py index 53da05cdf71e56c7ecf4eac33f3b43a44721a869..d2551d0bff842b36d60b80e7b9ce09587f6b474f 100644 --- a/src/cso/cso_colocate.py +++ b/src/cso/cso_colocate.py @@ -14,6 +14,9 @@ # 2025-04, Arjo Segers # Changed imports for python packaging. # +# 2026-01, Arjo Segers +# Fixed escape characters in (document) strings. +# ######################################################################## @@ -282,7 +285,7 @@ class CSO_CoLocate(utopya.UtopyaRc): loc_lat = self.GetSetting("locations.latitude") # read: locations = pandas.read_csv( - locations_file, sep="\s*" + sep + "\s*", engine="python", comment=comment + locations_file, sep=f"\\s*{sep}\\s*", engine="python", comment=comment ) # count: nloc = len(locations) diff --git a/src/cso/cso_dataspace.py b/src/cso/cso_dataspace.py index 4a595ea7f627e1f996cd32170972847460a4f44b..11a33c216aae29a8d29004db25f20649eb223a76 100644 --- a/src/cso/cso_dataspace.py +++ b/src/cso/cso_dataspace.py @@ -46,6 +46,11 @@ # 2025-10, Arjo Segers # Use mimetype of downloaded product to decide on postprocessing. # +# 2026-03, Arjo Segers +# Use STAC API to inquire Copernicus Dataspace. +# Files are downloaded from S3 buckets. +# Save credentials needed to download from S3 bucket to file in home directory. +# ######################################################################## @@ -79,7 +84,7 @@ On a Linux system, login/passwords for websites can be stored in the users ``.ne in the home directory. Create this file if it does not exist yet, and add the following line with the login name of the account (your email) and the chosen password:: - machine zipper.dataspace.copernicus.eu login Your.Name@institute.org password *********** + machine eodata.dataspace.copernicus.eu login Your.Name@institute.org password *********** The file should be readible and writable for you only:: @@ -94,13 +99,8 @@ DataSpace API's The *DataSpace* could be access with a number of different `APIs `_. -Currently the `OpenSearch API `_ -is used as that was the first that worked as needed. - -In future the `STAC API `_ might be used, +Currently the `STAC API `_ is used, as this is becoming more and more the standard in the Earth Observation community. -Within CSO it was already used by for example :ref:`pal-api`, but could not get working yet -for the *DataSpace*. See the `STAC product catalog `_ for more information. @@ -116,7 +116,6 @@ The classes and are defined according to the following hierchy: * :py:class:`.CSO_DataSpace_Inquire` * :py:class:`CSO_DataSpace_Downloader` -* :py:class:`NullAuth` Classes @@ -146,25 +145,41 @@ import utopya ### ######################################################################## +# +# Use the browser: +# https://browser.stac.dataspace.copernicus.eu/ +# Select for example "list view", and scrol down to reach: +# Sentinel-5P Level 2 Nitrogen Dioxide (RPRO) +# This points to: +# https://browser.stac.dataspace.copernicus.eu/collections/sentinel-5p-l2-no2-rpro +# which reviels the name of the "collection": +# sentinel-5p-l2-no2-rpro +# +# Click on the Collection link. +# Click on one of the "Items": https://browser.stac.dataspace.copernicus.eu/collections/sentinel-5p-l2-no2-rpro/items/S5P_RPRO_L2__NO2____20251007T075904_20251007T094231_41370_03_020901_20260204T205955 +# Click on the "Source" button on the top right +# Follow the link to the STAC metadata file: https://stac.dataspace.copernicus.eu/v1/collections/sentinel-5p-l2-no2-rpro/items/S5P_RPRO_L2__NO2____20251007T075904_20251007T094231_41370_03_020901_20260204T205955 +# Page shows dict (use "pretty print" option) with content, +# for example the download link: +# { ..., +# "assets": { +# "netcdf": { +# "href": "s3://eodata/Sentinel-5P/TROPOMI/L2__NO2___/2025/10/06/S5P_RPRO_L2__NO2____20251006T150410_20251006T164737_41360_03_020901_20260203T132453.nc", +# ... +# } +# ... +# } +# } +# class CSO_DataSpace_Inquire(utopya.UtopyaRc): """ - Inquire available Sentinel data from the - `Copernicus DataSpace `_. - - Before data could be downloaded from the *DataSpace*, setup your :ref:`dataspace-account`. + Inquire available Sentinel data from the `Copernicus DataSpace `_. - Currently the `OpenSearch API `_ - is used as that was the first that worked as needed; - in future, the `STAC product catalog `_ - might be used. - See also the `OpenSearch Description `_ - for the various parameters that the API accepts. - - A query is sent to search for products that are available + This class sends a query to the server to search for data files that are available for a certain time and overlap with a specified region. - The result is a list with orbit files and instructions on how to download them. + The result is a csv table with orbit files and instructions on how to download them. In the settings, specify the time range over which files should be searched:: @@ -173,16 +188,33 @@ class CSO_DataSpace_Inquire(utopya.UtopyaRc): Specify the base url of the API:: - .url : https://catalogue.dataspace.copernicus.eu/resto/api + .url : https://stac.dataspace.copernicus.eu/v1 + + The `STAC product catalog `_ is used to list the available data. + Different data sets are stored as *collections*. + Use the `STAC Browser `_ for a first overview. + Select for example ``list view`` and scrol down to reach: + + * `Sentinel-5P Level 2 Nitrogen Dioxide (RPRO) `_ + + On the top right is an information button ``[i Source]`` ; open this to see that the *collection id* is ``sentinel-5p-l2-no2-rpro``. + This id describes that the data set: + + * holds data out of the *Sentinel-5P* mission (TROPOMI instrument); + * has level-2 data (retrieved product); + * provides a NO\\ :sub:`2` product; + * was produced in a *reprocessing* stream. - Define the collection name with:: + To obtain a full time series it is probably necessary to combine different processing streams, for example: - .collection : Sentinel5P + * near-real-time data that is available within a few hours after observation; + * offline data that is avaible within a few weeks; + * reprocessed data for historical observations that have been processed again using a more recent version of the retrieval software. - Provide a product type:: + The STAC Browser could be used to search for collection names corresponding to the various processing streams. + Specify a list of collections that should be inquired; in this example, inquire only data from the *offline* and *reprocessing* streams:: - ! product type (always 10 characters!): - .producttype : L2__NO2___ + .collections : sentinel-5p-l2-no2-rpro sentinel-5p-l2-no2-offl Eventually specify a target area, only orbits with some pixels within the defined box will be downloaded:: @@ -190,22 +222,16 @@ class CSO_DataSpace_Inquire(utopya.UtopyaRc): .area : !.area : -30,30,35,76 - The table will also create the url's to download a file; - specifity the template that should be used: - - ! template for download url given "{product_id}": - .download_url : https://zipper.dataspace.copernicus.eu/odata/v1/Products({product_id})/$value - - Name of output csv file:: + Specify the name of the output csv file:: ! output table, here including date of today: - .output.file : ${my.work}/PAL_S5P_NO2_%Y-%m-%d.csv + .output.file : /work/Copernicus/Copernicus_S5p_NO2_dataspace__%Y-%m-%d.csv Example records (with extra whitespace to show the columns):: orbit;start_time ;end_time ;processing;collection;processor_version;filename ;href - 11488;2020-01-01 02:34:16;2020-01-01 04:15:46;RPRO ;03 ;020400 ;S5P_RPRO_L2__CH4____20200101T023416_20200101T041546_11488_03_020400_20221120T003820.nc;https://zipper.dataspace.copernicus.eu/odata/v1/Products(b3f240e6-505d-4cae-97ea-43a8778a318d)/$value - 11487;2020-01-01 00:52:46;2020-01-01 02:34:16;RPRO ;03 ;020400 ;S5P_RPRO_L2__CH4____20200101T005246_20200101T023416_11487_03_020400_20221120T003818.nc;https://zipper.dataspace.copernicus.eu/odata/v1/Products(a3d40f81-6c86-44bc-bc4b-457ff069b121)/$value + 03285;2018-06-01 22:19:51;2018-06-02 00:03:19;RPRO ;01 ;010202 ;S5P_RPRO_L2__NO2____20180601T221951_20180602T000319_03285_01_010202_20190207T004558.nc;s3://eodata/Sentinel-5P/TROPOMI/L2__NO2___/2018/06/01/S5P_RPRO_L2__NO2____20180601T221951_20180602T000319_03285_01_010202_20190207T004558.nc + 03285;2018-06-01 22:20:50;2018-06-02 00:02:20;RPRO ;03 ;020400 ;S5P_RPRO_L2__NO2____20180601T222050_20180602T000220_03285_03_020400_20221129T161641.nc;s3://eodata/Sentinel-5P/TROPOMI/L2__NO2___/2018/06/01/S5P_RPRO_L2__NO2____20180601T222050_20180602T000220_03285_03_020400_20221129T161641.nc : """ @@ -221,7 +247,8 @@ class CSO_DataSpace_Inquire(utopya.UtopyaRc): import datetime import calendar import time - import requests + import pystac + import pystac_client import pandas # info ... @@ -239,21 +266,16 @@ class CSO_DataSpace_Inquire(utopya.UtopyaRc): api_url = self.GetSetting("url") # info ... logging.info(f"{indent}API url : {api_url}") + # get client: + client = pystac_client.Client.open(api_url) - # template url for downloads: - download_url = self.GetSetting("download_url") + # collections: + collections = self.GetSetting("collections").split() # info ... - logging.info(f"{indent}download url : {download_url}") - - # collection: - collection = self.GetSetting("collection") - # info ... - logging.info(f"{indent}collection : {collection}") - - # combine into search url: - search_url = f"{api_url}/collections/{collection}/search.json" - # info ... - logging.info(f"{indent}search url : {search_url}") + logging.info(f"{indent}collections:") + for collection in collections: + logging.info(f"{indent} {collection}") + #endfor # time range: t1 = self.GetSetting("timerange.start", totype="datetime") @@ -262,12 +284,6 @@ class CSO_DataSpace_Inquire(utopya.UtopyaRc): tfmt = "%Y-%m-%d %H:%M" logging.info(f"{indent}timerange : [{t1.strftime(tfmt)},{t2.strftime(tfmt)}") - # product type (always 10 characters!): - # L2__NO2___ - producttype = self.GetSetting("producttype") - # info ... - logging.info(f"{indent}product type : {producttype}") - # area of interest: west,south:east,north area = self.GetSetting("area") # defined? @@ -276,11 +292,9 @@ class CSO_DataSpace_Inquire(utopya.UtopyaRc): # west,south:east,north west, south, east, north = map(float, area.replace(":", " ").replace(",", " ").split()) # info ... - logging.info( - f"{indent}area : [{west:.2f},{east:.2f}] x [{south:.2f},{north:.2f}]" - ) - # box parameter: - box = f"{west},{south},{east},{north}" + logging.info(f"{indent}area : [{west:.2f},{east:.2f}] x [{south:.2f},{north:.2f}]" ) + # bounding box parameter: + bbox = [west, south, east, north] else: # info ... logging.info(f"{indent}area : no") @@ -299,241 +313,127 @@ class CSO_DataSpace_Inquire(utopya.UtopyaRc): # info ... logging.info(f"{indent}search all items in timerange ...") - # initialize search parameters; - # for possible content, see: - # https://documentation.dataspace.copernicus.eu/APIs/OpenSearch.html - params = {} - # fill domain: - if box is not None: - params["box"] = box - # endif - # fill product type: - params["productType"] = producttype - - # search query could only return a maximum number of records; - # use large value to have single page with full request, - # since the catalogue might return different pages due to inconsistent sorting ... - nrow = 2000 - # fill paging info: - params["maxRecords"] = nrow - # sort on publication date to ensure that pages are the same when requested again: - params["sortParam"] = "published" - params["sortOrder"] = "ascending" - - # start of first month: - tm = datetime.datetime(t1.year, t1.month, 1) - # loop over months: - while tm < t2: + # start of first day: + tx = datetime.datetime(t1.year, t1.month, t1.day) + # loop over inquiry periods: + while tx < t2: # info ... - logging.info(f"{indent} {tm.year}-{tm.month:0>2} ...") - - # month length: - weekday, nday = calendar.monthrange(tm.year, tm.month) - # start of month or overall range: - tm1 = max(t1, tm) - # end of month or overall range: - tm2 = min(tm + datetime.timedelta(nday), t2) - - # fill time range: - tfmt = "%Y-%m-%dT%H:%M:%SZ" - params["startDate"] = tm1.strftime(tfmt) - params["completionDate"] = tm2.strftime(tfmt) - - # a 'page' of records is requested using a row offset and the number of rows; - # init page counter: - ipage = 0 - # init row offset used for messages: - row0 = 0 - - # loop over pages of query result: - while True: - # increase counter: - ipage += 1 - # info ... - logging.info(f"{indent} page {ipage} ...") - - # fill page number: - params["page"] = ipage - - # number of tries: - ntry = 1 - maxtry = 500 - # repeat a few times if necessary: - while ntry <= maxtry: - try: - # send query to search page; no authorization is needed ... - r = requests.get(search_url, params=params) - # check status, raise error if request failed: - r.raise_for_status() - except Exception as err: - msg = str(err) - logging.warning(f"{indent} from query; message received:") - logging.warning(f"{indent}%s" % msg) - logging.warning(f"{indent} current time:{time.ctime()}") - if ntry == maxtry: - logging.error(f"{indent} tried {ntry} times now, exit ...") - raise Exception - else: - logging.warning(f"{indent} wait {nsec_wait} seconds ..") - time.sleep(nsec_wait) - logging.warning(f"{indent} attempt {ntry} / {maxtry} ...") - ntry += 1 - continue - # endif - # endtry - # no error, leave: - break - # endwhile - - # While testing: save the result as a json file, and load it into a browser. - # This shows a dict with among others the fields: - # - # { .. - # 'features' : [ # list of orbits, in browser named: '0','1',... - # { 'id' : '0f318743-8bb9-55ed-b42d-7721b24f7ede', # download id - # 'properties' : { - # 'title' : "S5P_OFFL_L2__CH4____20220531T224613_20220601T002743_23999_02_020301_20220602T143707.nc", - # ... - # } - # ... - # }, - # ... - # ] - # } - # - # save result? - if False: - # targefile: - qfile = "query.json" - # save: - with open(qfile, "w") as f: - f.write(r.text) - # endwith - # endif + logging.info(f"{indent} {tx.year}-{tx.month:0>2}-{tx.day:0>2} ...") - # convert response to json dict: - data = r.json() + # inquire time range (1 day): + tx1 = tx + tx2 = tx + datetime.timedelta(1) - # check ... - if type(data) != dict: - logging.error( - f"request response should be a json dict, found type: {type(data)}" - ) - raise Exception - # endif - # check ... - if "features" not in data.keys(): - logging.error(f"element 'features' not found in response") - raise Exception - # endif - - # count: - nrec = len(data["features"]) - # info ... - if nrec == 0: - logging.info(f"{indent} no records found ...") - else: - logging.info(f"{indent} entries {row0+1:>4},..,{row0+nrec:>4}") - # endif - - # loop over features: - for feature in data["features"]: - # check ... - if type(feature) != dict: - logging.error(f"feature should be a dict, found type: {type(feature)}") - raise Exception - # endif + # info .. + logging.info(f"{indent} search in catalog ...") + # search, return item generator; + # do not generate before actual use, otherwise items are lost! + search_result = client.search( + collections=collections, + bbox=bbox, + datetime=(tx1, tx2), + #max_items=30 # seems maximum allowed? + ) - # check ... - if "id" not in feature.keys(): - logging.error(f"element 'id' not found in feature") - raise Exception - # endif - # get product id: - product_id = feature["id"] + # retry parameters: + maxtry = 10 + ntry = 1 + nsec_wait = 5 + # loop until success or too many errors: + while True: + # until success: + try: + + # loop over pages: + for ipage,page in enumerate(search_result.pages()): + # info ... + logging.info(f"{indent} page {ipage+1} with {len(page)} items ..") + + # loop over items: + for n,item in enumerate(page.items): + # extract: + product_id = item.id + # info ... + logging.info(f"{indent} item {n+1:2} : {product_id}") + + # + # S5P_OFFL_L2__NO2____20180701T005930_20180701T024100_03698_01_010002_20180707T022838 + # plt proc [product-] [starttime....] [endtime......] orbit cl procrv [prodtime.....] + # + # split: + platform_name, processing, rest = product_id.split("_", 2) + product_type = rest[0:10] + parts = rest[11:].split("_") + start_time, end_time, orbit, collection, processor_version, prod_time = parts + + # convert: + tfmt = "%Y%m%dT%H%M%S" + ts = datetime.datetime.strptime(start_time, tfmt) + te = datetime.datetime.strptime(end_time, tfmt) + + # extract download url: + href = item.assets['netcdf'].href + # filename: + filename = f"{product_id}.nc" + + # strange, sometimes records seem double ... + # already records present? + if len(output_df) > 0: + # same href already stored? + if href in output_df["href"].values: + # testing ... + logging.warning(f"{indent} already used, ignore ...") + # ignore record: + continue + # endif + # endif - # check ... - if "properties" not in feature.keys(): - logging.error(f"element 'properties' not found in feature") - raise Exception - # endif - # check ... - if "title" not in feature["properties"].keys(): - logging.error(f"element 'properties/title' not found in feature") - raise Exception - # endif - # get full filename: - filename = feature["properties"]["title"] - # - # S5P_OFFL_L2__NO2____20180701T005930_20180701T024100_03698_01_010002_20180707T022838.nc - # plt proc [product-] [starttime....] [endtime......] orbit cl procrv [prodtime.....] - # - bname = os.path.basename(filename).replace(".nc", "") - # split: - platform_name, processing, rest = bname.split("_", 2) - product_type = rest[0:10] - parts = rest[11:].split("_") - start_time, end_time, orbit, collection, processor_version, prod_time = parts - - # convert: - tfmt = "%Y%m%dT%H%M%S" - ts = datetime.datetime.strptime(start_time, tfmt) - te = datetime.datetime.strptime(end_time, tfmt) - - # fill download href: - href = download_url.format(product_id=product_id) - - # strange, sometimes records seem double ... - # already records present? - if len(output_df) > 0: - # same href already stored? - if href in output_df["href"].values: - # testing ... - logging.warning( - f"{indent} ignore double product_id: {product_id}" + # fill record, values should be lists for concatenation below: + rec = { + "orbit": [orbit], + "start_time": [ts], + "end_time": [te], + "processing": [processing], + "collection": [collection], + "processor_version": [processor_version], + "filename": [filename], + "href": [href], + } + # add record: + output_df = pandas.concat( + (output_df, pandas.DataFrame(rec)), ignore_index=True ) - # ignore record: - continue - # endif - # endif - - # fill record, values should be lists for concatenation below: - rec = { - "orbit": [orbit], - "start_time": [ts], - "end_time": [te], - "processing": [processing], - "collection": [collection], - "processor_version": [processor_version], - "filename": [filename], - "href": [href], - } - # add record: - output_df = pandas.concat( - (output_df, pandas.DataFrame(rec)), ignore_index=True - ) - # endfor features + #endfor # items + ## testing ... + #break + #endfor # pages - ## testing... - # if ipage == 9 : - # logging.warning( f"break after page {ipage} ..." ) - # break - ## endif - - # not a full page? then end is reached ... - if nrec < nrow: - # leave loop over pages: + # no errors, leave retry loop: break - # endif - # increse row offset: - row0 += nrow + except Exception as err: + msg = str(err) + logging.warning(f"{indent} from query; message received:") + logging.warning(f"{indent} {msg}") + if ntry == maxtry: + logging.error(f"{indent} tried {ntry} times now, exit ...") + raise Exception + else: + logging.warning(f"{indent} wait {nsec_wait} seconds ..") + time.sleep(nsec_wait) + ntry += 1 + logging.warning(f"{indent} attempt {ntry} / {maxtry} ...") + # endif + # endtry - # endwhile # pages + #endwhile # retry loop # next: - tm = tm2 + tx = tx2 + + ## testing ... + #break # endwhile # month loop @@ -569,54 +469,24 @@ class CSO_DataSpace_Inquire(utopya.UtopyaRc): ######################################################################## -class NullAuth(requests.auth.AuthBase): - - """ - Force requests to ignore the ``~/.netrc`` file. - - Some sites do not support regular authentication, but we still - want to store credentials in the ``~/.netrc`` file and submit them - as form elements. Without this, requests would otherwise use the - ``~/.netrc`` which leads, on some sites, to a 401 error. - - Use with:: - - requests.get( url, auth=NullAuth() ) - - Source: - - ``_ - - """ - - def __call__(self, r): - return r - - # enddef __call__ - - -# endclass NullAuth - - -# * - - class CSO_DataSpace_Downloader(object): """ Class to download single file from *Copernicus DataSpace*. The object defined by the class is used to store an - access token that is re-used until it is expired. + access token and S3 download credentials that are re-used until it is expired. + + The :py:class:`DownloadFile` method should be used to actually download a file. - The :py:class:`DownloadFile` method should be used to - actually download a file. + An account is needed to download data; see :ref:`dataspace-account` + on how to obtain and store your login information. Usage:: # initialize downloader: downloader = CSO_DataSpace_Downloader() # download file: - downloader.DownloadFile( "https://zipper.dataspace.copernicus.eu/odata/v1/Products('d483baa0-3a61-4985-aa0c-5642a83c9214')/$value", "orbit.nc" ) + downloader.DownloadFile( "s3://eodata/Sentinel-5P/TROPOMI/L2__NO2___/2018/06/30/S5P_RPRO_L2__NO2____20180630T231701_20180701T010029_03697_01_010202_20190211T183716.nc", "orbit.nc" ) """ @@ -627,12 +497,14 @@ class CSO_DataSpace_Downloader(object): # no token yet: self.access_token = None + # no credentials yet: + self.s3_credentials = None # enddef __init__ # * - def CreateToken(self, href, indent=""): + def Create_Token(self, href, indent=""): """ Create access token. """ @@ -647,7 +519,7 @@ class CSO_DataSpace_Downloader(object): # # On linux system, login/passwords for websites and ftp can be stored in "~/.netrc" file: # ---[~/.netrc]----------------------------------------------- - # machine zipper.dataspace.copernicus.eu login Your.Name@institute.org password *********** + # machine eodata.dataspace.copernicus.eu login Your.Name@institute.org password *********** # ------------------------------------------------------------ # Retrieve the login/password from ~/.netrc to avoid hardcoding them in a script. # @@ -743,7 +615,145 @@ class CSO_DataSpace_Downloader(object): # endif # endwhile # retry - # enddef CreateToken + # enddef Create_Token + + # * + + def Create_S3_Credentials( self, renew=False, indent="" ): + + """ + Create temporary S3 credentials by calling the S3 keys manager API. + """ + + # modules: + import os + import pathlib + import requests + + # tools: + from . import cso_file + + # local file with credentials: + credfile = pathlib.Path.home() / ".cso" / "dataspace-credentials" + + # create new? + if not os.path.isfile(credfile): + + # info ... + logging.info(f"{indent}create S3 credentials ...") + + # fill authorization token in header: + headers = { + "Authorization": f"Bearer {self.access_token}", + "Accept": "application/json" + } + + # request new credentials: + r = requests.post("https://s3-keys-manager.cloudferro.com/api/user/credentials", headers=headers) + # check response: + if r.status_code == 200: + # evaluate: + self.s3_credentials = r.json() + # info ... + logging.info(f"{indent} temporary S3 credentials created successfully.") + ## testing .. + #logging.info(f"{indent} access: {self.s3_credentials['access_id']}") + #logging.info(f"{indent} secret: {self.s3_credentials['secret']}") + # + # 403: Max number of credentials reached. + elif r.status_code == 403: + # info ... + logging.error(f"{indent} maximum number of credential exceeded, not deleted ...?") + logging.error(f"{indent} try to delete manually on:") + logging.error(f"{indent} https://eodata-s3keysmanager.dataspace.copernicus.eu/panel/s3-credentials") + # + else: + logging.error(f"ERROR - failed to create temporary S3 credentials:") + logging.error(f"ERROR - status code: {r.status_code}") + logging.error(f"ERROR - {r.text}") + raise Exception + #endif + + # info ... + logging.info(f"{indent} store in {credfile} ...") + # create target dir if necessary: + cso_file.CheckDir( credfile ) + # write: + with open(credfile,"w") as f: + f.write( f"access_id = {self.s3_credentials['access_id']}\n" ) + f.write( f"secret = {self.s3_credentials['secret']}\n" ) + #endwith + + #endif # (re)new + + # read content: + with open(credfile,"r") as f: + lines = f.readlines() + #endwith + # target values: + access_id = None + secret = None + # extract content: + for line in lines: + line = line.strip() + if "=" in line: + key,value = map( str.strip, line.split("=",1) ) + if key == "access_id": + access_id = value + elif key == "secret": + secret = value + #endif + #endif + #endif + # check .. + if access_id is None: + logging.error(f"no 'access_id' found in credentials file: {credfile}") + raise Exception + #endif + # check .. + if secret is None: + logging.error(f"no 'secret' found in credentials file: {secret}") + raise Exception + #endif + # store: + self.s3_credentials = {} + self.s3_credentials['access_id'] = access_id + self.s3_credentials['secret'] = secret + + #enddef Create_S3_Credentials + + # * + + def Download_S3_Product( self, bucket, product: str, target: str ) -> None: + """ + Download product file in bucket and save to target. + An error is raised if not exactly one file matching the ``product`` name is found. + + Arguments: + + * ``bucket``: boto3 Resource bucket object + * ``product``: path to product + * ``target``: target file, sub-directories in path are assumed to be present already + """ + + # search files for this product: + pfiles = bucket.objects.filter( Prefix=product ) + + # count: + npfile = len(list(pfiles)) + # check ... + if npfile != 1: + print(f"ERROR - found {npfile} files in S3 bucket for product: {product}") + raise Exception + #endif + + # loop over (single) files: + for pfile in pfiles: + # download: + bucket.download_file( pfile.key, target ) + #endfor + + #enddef Download_S3_Product # * @@ -758,9 +768,9 @@ class CSO_DataSpace_Downloader(object): Arguments: - * ``href`` : download url, for example:: + * ``href`` : download url, for example for file in S3 bucket:: - https://zipper.dataspace.copernicus.eu/odata/v1/Products('d483baa0-3a61-4985-aa0c-5642a83c9214')/$value + s3://eodata/Sentinel-5P/TROPOMI/L2__NO2___/2018/06/30/S5P_RPRO_L2__NO2____20180630T231701_20180701T010029_03697_01_010202_20190211T183716.nc * ``output_file`` : target file @@ -777,160 +787,100 @@ class CSO_DataSpace_Downloader(object): import sys import os import time - import requests - import magic - import zipfile - import shutil + import urllib + import boto3 # tools: from . import cso_file - # no token yet? - if self.access_token is None: - # info .. - logging.info(f"{indent}create token ...") - # create token, re-use until error is received ... - self.CreateToken(href, indent=indent) - # endif + # + # example data link: + # + # s3://eodata/Sentinel-5P/TROPOMI/L2__NO2___/2018/06/30/S5P_RPRO_L2__NO2____20180630T231701_20180701T010029_03697_01_010202_20190211T183716.nc + # [] [----][-------------------------------------------------------------------------------------------------------------------------------] + # scheme netloc path + # + # extract parts: + p = urllib.parse.urlparse(href) + # switch: + if p.scheme == "s3": - # info .. - logging.info(f"{indent}download ...") - # retry loop .. - ntry = 1 - while True: - # try to download and save: - try: - # fill authorization token in header: - headers = {"Authorization": f"Bearer {self.access_token}"} - # ensure that "~/.netrc" is ignored by passing null-authorization, - # otherwise the token in the header is overwritten by a token formed - # from the login/password in the netrc file if that is found: - r = requests.get(href, auth=NullAuth(), headers=headers) - # check status, raise error if request failed: - r.raise_for_status() + # known end points ... + if p.netloc == "eodata": + # set end point: + s3_endpoint_url = "https://eodata.dataspace.copernicus.eu" + else: + logging.error(f"could not define S3 endpoint url for net location '{p.netloc}'") + raise Exception + #endif - # product is either a zip-file or a netcdf file ... - product_file = "product.dat" + # no token yet? + if self.access_token is None: # info .. - logging.info(f"{indent}write to {product_file} ...") - # write to temporary target first .. - tmpfile = product_file + ".tmp" - # open destination file for binary write: - with open(tmpfile, "wb") as fd: - # prefered way to write content following: - # https://docs.python-requests.org/en/master/user/quickstart/ - for chunk in r.iter_content(chunk_size=128): - fd.write(chunk) - # endfor - # endwith - # rename: - os.rename(tmpfile, product_file) - - # file type: - mimetype = magic.from_file( product_file, mime=True ) - # switch: - #~ nc file: - if mimetype == "application/x-hdf": + logging.info(f"{indent}create token ...") + # create token, re-use until error is received ... + self.Create_Token(s3_endpoint_url, indent=indent) + # endif + + # info .. + logging.info(f"{indent}download file from S3 bucket ...") + # retry loop .. + ntry = 1 + while True: + # try to download and save: + try: + # create new or re-use "s3_credentials" attribute: + self.Create_S3_Credentials( indent=indent ) + + ## info .. + #logging.info(f"{indent} setup S3 resource ..") + # set up S3 client and resource with temporary credentials + s3_resource = boto3.resource( 's3', + endpoint_url=s3_endpoint_url, + aws_access_key_id=self.s3_credentials["access_id"], + aws_secret_access_key=self.s3_credentials["secret"]) - # info .. - logging.info(f"{indent}product is netcdf file, store ...") - # this is the target netcdf file already; # create target dir if necessary: cso_file.CheckDir(output_file, dmode=dmode) - # rename to destination: - shutil.move(product_file, output_file) - - #~ zip file: - elif mimetype == "application/zip": - - # info .. - logging.info(f"{indent}product is zip file, unpack ...") - # open as zipfile: - arch = zipfile.ZipFile(product_file, mode="r") - # loop over members, probably two files in a directory: - # S5P_RPRO_L2__CH4____20200101T005246_etc/S5P_RPRO_L2__CH4____20200101T005246_etc.cdl - # S5P_RPRO_L2__CH4____20200101T005246_etc.nc - for member in arch.namelist(): - # ncfile? - if member.endswith(".nc"): - # this should be the target file .. - if os.path.basename(member) != os.path.basename(output_file): - logging.error(f"member of archive file: {member}") - logging.error(f"differs from target name: {output_file}") - raise Exception - # endif - # info .. - logging.info(f"{indent}extract {member} ...") - # extract here, including leading directory: - arch.extract(member) - # info .. - logging.info(f"{indent}store ...") - # create target dir if necessary: - cso_file.CheckDir(output_file, dmode=dmode) - # move to destination: - shutil.move(member, output_file) - # remove directory tree: - shutil.rmtree(os.path.dirname(member)) - # only one file in package; leave loop over members - break - # endif - # endfor # members - # info .. - logging.info(f"{indent}remove product file ...") - # remove package: - os.remove(product_file) - - #~ unknown ... - else: - logger.error( f"unsupported mimetype '{mimetype}'" ) - raise Exception - #endif - # all ok, leave retry loop: - break + # download file: + # - create bucket "eodata" + # - provide path in bucket (without leading path seperator) + # - write to target file + self.Download_S3_Product( s3_resource.Bucket(p.netloc), p.path.lstrip("/"), output_file ) - except requests.exceptions.HTTPError as err: - # info .. - msg = str(err) - logging.warning(f"{indent}exception from download; message received:") - logging.warning(f"{indent} %s" % msg) - # catch known problems ... - if msg.startswith("401 Client Error: Unauthorized for url:"): - logging.warning(f"{indent}renew token ...") - self.CreateToken(href, indent=indent) - elif msg.startswith("404 Client Error: Not Found for url:"): - # # info ... - # logging.error(f"url seems not available anymore; maybe need to update inquiry table?") - # # quit with error: - # raise - # warning ... - logging.warning(f"url seems not available, try again ...") - # endif + # all ok, leave retry loop: + break - except MemoryError as err: - logging.error("memory error from download; increase resources?") - # quit with error: - raise + # unknown ... + except Exception as err: + logging.error(f"{indent}unknown error:") + logging.error(f"{indent} {str(err)}") - # endtry + # endtry - # increase counter: - ntry += 1 - # switch: - if ntry == maxtry: - # Otherwise raise an Exception and exit. - logging.warning(f"{indent}tried {maxtry} times; exit ...") - raise Exception - else: - logging.warning(f"{indent}wait {nsec_wait} seconds ...") - time.sleep(nsec_wait) - # next time, wait a bit longer, but not too long ... - nsec_wait = min(nsec_wait * 2, nsec_wait_max) - logging.warning(f"{indent}attempt {ntry} / {maxtry} ...") - continue # while-loop - # endif + # increase counter: + ntry += 1 + # switch: + if ntry == maxtry: + # Otherwise raise an Exception and exit. + logging.warning(f"{indent}tried {maxtry} times; exit ...") + raise Exception + else: + logging.warning(f"{indent}wait {nsec_wait} seconds ...") + time.sleep(nsec_wait) + # next time, wait a bit longer, but not too long ... + nsec_wait = min(nsec_wait * 2, nsec_wait_max) + logging.warning(f"{indent}attempt {ntry} / {maxtry} ...") + continue # while-loop + # endif - # endwhile # retry + # endwhile # retry + + else: + logging.error(f"ERROR - unsupported scheme '{p.scheme}' for href: {href}") + raise Exception + #endif # enddef DownloadFile diff --git a/src/cso/cso_file.py b/src/cso/cso_file.py index 0da54bd91cef0d1eb495d56638b38c78ea6b2cfe..b638e39e3631c8467eb34d8336e49fb7a02fce77 100644 --- a/src/cso/cso_file.py +++ b/src/cso/cso_file.py @@ -63,6 +63,7 @@ # # 2026-01, Arjo Segers # Updated use of Dataset.dims following deprication warning. +# Removed whitespace from template replacement. # ######################################################################## @@ -1666,7 +1667,7 @@ class CSO_Listing(object): # to: # df['orbit'] == '12345' for key in self.df.keys(): - selection = selection.replace(f"%{{{key}}}", f"df['{key}']") + selection = selection.replace(f"%{{{key}}}", f"df['{key}']").strip() # endfor # testing ... logging.info(f"{indent}selection `{selection}` ...") diff --git a/src/cso/cso_gridded.py b/src/cso/cso_gridded.py index cd54bdbc3d8f4fd207097d89b364da44cffe49ec..7dcdd3cf31a924d608f7c97530e1ea638c440e12 100644 --- a/src/cso/cso_gridded.py +++ b/src/cso/cso_gridded.py @@ -41,6 +41,9 @@ # Updated calculation of temporal means for files with time records. # # 2026-01, Arjo Segers +# Fixed escape characters in (document) strings. +# +# 2026-01, Arjo Segers # Fixed timerange setup. # @@ -113,15 +116,15 @@ class CSO_GriddedAverage(utopya.UtopyaRc): using the total overlapping area: .. math:: - x(i_k,j_k) ~=~ \left(\ \sum\limits_{p\in P_k} y_p\ w_{p,k}\ \\right)\ /\ \sum\limits_{p\in P_k}\ w_{p,k} + x(i_k,j_k) ~=~ \\left(\\ \\sum\\limits_{p\\in P_k} y_p\\ w_{p,k}\\ \\right)\\ /\\ \\sum\\limits_{p\\in P_k}\\ w_{p,k} where: * :math:`i_k,j_k` are the indices of grid cell :math:`k`; * :math:`P_k` is the set of pixels that overlap with cell :math:`k`; * :math:`y_p` is the data value of pixel :math:`p`; - * :math:`w_p` is the footprint area [m\ :sup:`2`] of pixel :math:`p` - * :math:`w_{p,k}` is the area [m\ :sup:`2`] of pixel :math:`p` that overlaps with the cell :math:`k`. + * :math:`w_p` is the footprint area [m\\ :sup:`2`] of pixel :math:`p` + * :math:`w_{p,k}` is the area [m\\ :sup:`2`] of pixel :math:`p` that overlaps with the cell :math:`k`. The overlapping area is computed using the :py:meth:`LonLatPolygonCentroids ` method. This fractions a footprint area into a large number of triangles, and returns @@ -630,7 +633,6 @@ class CSO_GriddedAverage(utopya.UtopyaRc): # array with total weights ww_out = numpy.zeros((nrec, nlat, nlon), dtype="f4") - logging.info(f"{ww_out.shape=}") # loop over output records: for irec in range(nrec): @@ -746,7 +748,7 @@ class CSO_GriddedAverage(utopya.UtopyaRc): # endfor # hours # info .. logging.info( - f"{indent} found {len(sourcefiles)} file(s) matching pattern ..." + f"{indent} found {len(sourcefiles)} file(s) matching pattern ..." ) # endif # listing or filenames diff --git a/src/cso/cso_inquire.py b/src/cso/cso_inquire.py index 70c9792e64db42b9213d9e03644de17c50977e3e..0944609ae60d64dddc2168da95f128efa4d94714 100644 --- a/src/cso/cso_inquire.py +++ b/src/cso/cso_inquire.py @@ -31,6 +31,9 @@ # 2025-04, Arjo Segers # Changed imports for python packaging. # +# 2026-03, Arjo Segers +# Fixed sorting of collections and processors for latests versions of pandas. +# ######################################################################## ### @@ -267,8 +270,11 @@ class CSO_Inquire_Plot(utopya.UtopyaRc): # switch: if table_type == "S5p": - # collections: - collections = df["collection"].unique() + # list of collections ; + # newer pandas versions return a 'StringArray', + # convert to numpy.array to ensure that "sort()" method is available: + collections = numpy.array( df["collection"].unique() ) + # sort in-place: collections.sort() # adhoc: skip test collections "90", "91", .. @@ -305,8 +311,10 @@ class CSO_Inquire_Plot(utopya.UtopyaRc): # endif # table type - # procesors: '010101', ... - procs = df["processor_version"].unique() + # procesors: '010101', ... ; + # convert to numpy.array to have "sort()" methode: + procs = numpy.array( df["processor_version"].unique() ) + # sort in-place: procs.sort() # count: nproc = len(procs) diff --git a/src/cso/cso_mapping.py b/src/cso/cso_mapping.py index fe95bb71d062253aaff32679bdc8b5c9c2ebbdae..b807acd503d4258045d698879f64372d7f40d8a1 100644 --- a/src/cso/cso_mapping.py +++ b/src/cso/cso_mapping.py @@ -17,6 +17,9 @@ # 2026-01, Arjo Segers # Changed imports for python packaging. # +# 2026-01, Arjo Segers +# Fixed escape characters in (document) strings. +# ######################################################################## ### @@ -165,13 +168,13 @@ def LonLatTrianglesArea(xx, yy): Using the radius :math:`R` of Earth (m) the area is: .. math:: - A ~=~ R^2 \int\limits_{x,y} \cos(y)\ dx\ dy + A ~=~ R^2 \\int\\limits_{x,y} \\cos(y)\\ dx\\ dy Approximate this by first computing the area in degrees2 and use the average latitude: .. math:: - A ~=~ R^2 \int\limits_{x,y} dx\ dy\ \cos(y_{aver}) + A ~=~ R^2 \\int\\limits_{x,y} dx\\ dy\\ \\cos(y_{aver}) """ @@ -279,9 +282,9 @@ def LonLatPolygonCentroids(xx, yy, maxlevel=5, _level=0, indent=""): from with the sides of the polygon as base and the centroid as top:: o---o - |\ /| + |\\ /| | * | - |/ \| + |/ \\| o---o Each triangle is devided into 2 new triangles with the middle of the longest side as their top @@ -289,8 +292,8 @@ def LonLatPolygonCentroids(xx, yy, maxlevel=5, _level=0, indent=""): 2 o - 0 / | \ 2 - / | \ + 0 / | \\ 2 + / | \\ o-----*-----o 0 1 1 diff --git a/src/cso/cso_pal.py b/src/cso/cso_pal.py index e5132a7f49c81f6c43899f971a258c67260dabcd..6daf02f561db281eda5b872038d290aeb0caff25 100644 --- a/src/cso/cso_pal.py +++ b/src/cso/cso_pal.py @@ -22,6 +22,9 @@ # 2025-04, Arjo Segers # Changed imports for python packaging. # +# 2026-03, Arjo Segers +# Updated comment. +# ######################################################################## ### diff --git a/src/cso/cso_regions.py b/src/cso/cso_regions.py index 89492e87d3f76abe783cf042a8c54c889d732378..f0e7ae1a03c03b22a884c83ffc2311c3575173c0 100644 --- a/src/cso/cso_regions.py +++ b/src/cso/cso_regions.py @@ -26,6 +26,9 @@ # 2025-04, Arjo Segers # Changed imports for python packaging. # +# 2026-01, Arjo Segers +# Fixed escape characters in (document) strings. +# ######################################################################## ### @@ -1306,7 +1309,7 @@ class RegionMap(object): # endif # read: df_to = pandas.read_csv( - tonew, sep="\s*;", skipinitialspace=True, comment="#", na_filter=False, engine="python" + tonew, sep="\\s*;", skipinitialspace=True, comment="#", na_filter=False, engine="python" ) # check .. for key in ["code", "newcode"]: @@ -1345,7 +1348,7 @@ class RegionMap(object): # endif # read: df_new = pandas.read_csv( - new, sep="\s*;", skipinitialspace=True, comment="#", na_filter=False, engine="python" + new, sep="\\s*;", skipinitialspace=True, comment="#", na_filter=False, engine="python" ) # check .. for key in ["code", "name"]: @@ -2738,7 +2741,7 @@ class CSO_Statistics_RegionsTables(utopya.UtopyaRc): ctab_file = self.GetSetting("ctab.file") # read: ctab = pandas.read_csv( - ctab_file, sep="\s*;", skipinitialspace=True, na_filter=False, engine="python" + ctab_file, sep="\\s*;", skipinitialspace=True, na_filter=False, engine="python" ) # info .. diff --git a/src/cso/cso_s5p.py b/src/cso/cso_s5p.py index e3a6d77116670602563a8dfc008cb79654016c00..f6ef687c4a1c4b9e08fcae81ce965e31f6ef3b1e 100644 --- a/src/cso/cso_s5p.py +++ b/src/cso/cso_s5p.py @@ -78,7 +78,9 @@ # Updated use of Dataset.dims following deprication warning. # Explicitly defined data type in encoding of time array folowing warning. # - +# 2026-03, Arjo Segers +# Updated selection of download source for Copernicus Dataspace. +# ######################################################################## ### @@ -3030,7 +3032,7 @@ class CSO_S5p_Convert(utopya.UtopyaRc): # initialize download? if downloader is None: # init downloader based on url: - if "dataspace.copernicus.eu" in href: + if href.startswith("s3://eodata/"): # download from Copernicus DataSpace: downloader = cso_dataspace.CSO_DataSpace_Downloader() # diff --git a/src/cso/cso_superobs.py b/src/cso/cso_superobs.py index aec6d443cac3ae870aa1d7f8bc0ed811b95d8c83..f33b64c278d056b4e5cc8cd49600c6050b9dfc05 100644 --- a/src/cso/cso_superobs.py +++ b/src/cso/cso_superobs.py @@ -30,6 +30,9 @@ # 2026-01, Alessandro D'ausilio # Trap case that no pixels are selected. # +# 2026-01, Arjo Segers +# Fixed escape characters in (document) strings. +# ######################################################################## @@ -101,15 +104,15 @@ class CSO_SuperObs(utopya.UtopyaRc): using the total overlapping area: .. math:: - x(i_k,j_k) ~=~ \left(\ \sum\limits_{p\in P_k} y_p\ w_{p,k}\ \\right)\ /\ \sum\limits_{p\in P_k}\ w_{p,k} + x(i_k,j_k) ~=~ \\left(\\ \\sum\\limits_{p\\in P_k} y_p\\ w_{p,k}\\ \\right)\\ /\\ \\sum\\limits_{p\\in P_k}\\ w_{p,k} where: * :math:`i_k,j_k` are the indices of grid cell :math:`k`; * :math:`P_k` is the set of pixels that overlap with cell :math:`k`; * :math:`y_p` is the data value of pixel :math:`p`; - * :math:`w_p` is the footprint area [m\ :sup:`2`] of pixel :math:`p` - * :math:`w_{p,k}` is the area [m\ :sup:`2`] of pixel :math:`p` that overlaps with the cell :math:`k`. + * :math:`w_p` is the footprint area [m\\ :sup:`2`] of pixel :math:`p` + * :math:`w_{p,k}` is the area [m\\ :sup:`2`] of pixel :math:`p` that overlaps with the cell :math:`k`. The overlapping area is computed using the :py:meth:`LonLatPolygonCentroids ` method. This fractions a footprint area into a large number of triangles, and returns diff --git a/src/cso/cso_tools.py b/src/cso/cso_tools.py index c3c0dbbc7adbed1e918d613f2c3e218d42349821..2fce5a9b6efa8c48ce539473cec8b9944fcb0cd1 100644 --- a/src/cso/cso_tools.py +++ b/src/cso/cso_tools.py @@ -7,6 +7,9 @@ # 2024-03, Arjo Segers # Added `mid2bounds`, `mid2corners`, `GetCornerGridX`, and `GetCornerGridY` methods. # +# 2026-01, Arjo Segers +# Fixed escape characters in (document) strings. +# ######################################################################## ### @@ -330,32 +333,34 @@ def linearize_avg_kernel(AK, xa, xa_ratio_max): Based on Appendix A of: - Zhang et al., Intercomparison methods for satellite measurements of atmospheric composition, ACP, 2010 + | Zhang, L., Jacob, D. J., Liu, X., Logan, J. A., Chance, K., Eldering, A., and Bojkov, B. R.: + | Intercomparison methods for satellite measurements of atmospheric composition: application to tropospheric ozone from TES and OMI, + | Atmos. Chem. Phys., 10, 4725-4739, `doi:10.5194/acp-10-4725-2010 `_, 2010. - * in log space with :math:`\mathbf{A}` in log(VMR)-based: + * in log space with :math:`\\mathbf{A}` in log(VMR)-based: .. math:: - \ln(\mathbf{x}_{est}) = \ln(\mathbf{x}_a) + \mathbf{\mathbf{A}} ( \ln(\mathbf{x}) - \ln(\mathbf{x}_a) ) + \\ln(\\mathbf{x}_{est}) = \\ln(\\mathbf{x}_a) + \\mathbf{\\mathbf{A}} ( \\ln(\\mathbf{x}) - \\ln(\\mathbf{x}_a) ) - * We want to convert :math:`\mathbf{A}` into :math:`\mathbf{A}^T` in VMR-based. Thus by definition: + * We want to convert :math:`\\mathbf{A}` into :math:`\\mathbf{A}^T` in VMR-based. Thus by definition: .. math:: - \mathbf{x}_{ext} = \mathbf{x}_a + \mathbf{A}^T( \mathbf{x} - \mathbf{x}_a) + \\mathbf{x}_{ext} = \\mathbf{x}_a + \\mathbf{A}^T( \\mathbf{x} - \\mathbf{x}_a) - * Let :math:`\mathbf{x}(i)` be the VMR for layer/level :math:`i`, which is the i-th element - of the vector :math:`\mathbf{x}`. Like above, assuming the difference between :math:`\mathbf{x}` - and :math:`\mathbf{x}_a` is relatively small so that: + * Let :math:`\\mathbf{x}(i)` be the VMR for layer/level :math:`i`, which is the i-th element + of the vector :math:`\\mathbf{x}`. Like above, assuming the difference between :math:`\\mathbf{x}` + and :math:`\\mathbf{x}_a` is relatively small so that: .. math:: - ln(\mathbf{x}) - ln(\mathbf{x}_a) ~= (\mathbf{x} - \mathbf{x}_a) / \mathbf{x}_a + ln(\\mathbf{x}) - ln(\\mathbf{x}_a) ~= (\\mathbf{x} - \\mathbf{x}_a) / \\mathbf{x}_a - with the same relationship holding between :math:`\mathbf{x}_{est}` and :math:`\mathbf{x}_a`. + with the same relationship holding between :math:`\\mathbf{x}_{est}` and :math:`\\mathbf{x}_a`. - * The elements :math:`\mathbf{A}^T(i,j)` of the converted matrix :math:`\mathbf{A}^T` are then related - to the elements :math:`\mathbf{A}(i,j)` of the averaging kernel :math:`\mathbf{A}` by: + * The elements :math:`\\mathbf{A}^T(i,j)` of the converted matrix :math:`\\mathbf{A}^T` are then related + to the elements :math:`\\mathbf{A}(i,j)` of the averaging kernel :math:`\\mathbf{A}` by: .. math:: - \mathbf{A}^T(i,j) = (\mathbf{x}_a(i)/\mathbf{x}_a(j))\ \mathbf{A}(i,j) + \\mathbf{A}^T(i,j) = (\\mathbf{x}_a(i)/\\mathbf{x}_a(j))\\ \\mathbf{A}(i,j) """ # modules: diff --git a/src/cso/cso_viirs.py b/src/cso/cso_viirs.py index f2e7bfeb2608b61e76539735d8831ede4a936495..24137bf40a908e91e95e66cbd207254e0530b71a 100644 --- a/src/cso/cso_viirs.py +++ b/src/cso/cso_viirs.py @@ -1260,7 +1260,7 @@ class CSO_VIIRS_Convert(utopya.UtopyaRc): import numpy # tools: - import cso_file + from . import cso_file # info ... logging.info(f"{indent}") diff --git a/src/rc.py b/src/rc.py index badc2bf6138673a0b8a03739ffbfdbb401e21d1e..cbdc1777ec29b44388992e164a67978080e3f2c7 100644 --- a/src/rc.py +++ b/src/rc.py @@ -15,6 +15,9 @@ # in same directory as current file. # When writing rcfile content, create directory if necessary. # +# 2026-01, Arjo Segers +# Fixed escape characters in (document) strings. +# # ------------------------------------------------ # doc @@ -71,7 +74,7 @@ readible and understandable. To have a value including exclamation marks, use an escaped version '``\\!``':: my.value : -999 ! just an integer value - my.message : This value has 64 characters \! Count if you don't believe it ... + my.message : This value has 64 characters \\! Count if you don't believe it ... Note that currently the remainder of the value is not scanned for comment. @@ -798,8 +801,8 @@ class RcFile(object): import re # ensure that common marks are evaluated correctly: - start_mark = marks[0].replace("{", "\{").replace("<", "\<").replace("$", "\$") - close_mark = marks[1].replace("}", "\}").replace(">", "\>") + start_mark = marks[0].replace("{", "\\{").replace("<", "\\<").replace("$", "\\$") + close_mark = marks[1].replace("}", "\\}").replace(">", "\\>") # set syntax of keywords to be matched, e.g. '${...}' : pattern = start_mark + "[A-Za-z0-9_.-]+" + close_mark @@ -1203,8 +1206,8 @@ class _RcLine(object): # not hold and not resolved yet ? if (not self._comment) and (not self._hold) and (not self._resolved): # ensure that common marks are evaluated correctly: - start_mark = marks[0].replace("{", "\{").replace("<", "\<").replace("$", "\$") - close_mark = marks[1].replace("}", "\}").replace(">", "\>") + start_mark = marks[0].replace("{", "\\{").replace("<", "\\<").replace("$", "\\$") + close_mark = marks[1].replace("}", "\\}").replace(">", "\\>") # set syntax of keywords to be matched, e.g. '${...}' : pattern = start_mark + "[A-Za-z0-9_.-]+" + close_mark @@ -1317,7 +1320,7 @@ class _RcLine(object): # not hold, but all substitutions resolved ? if (not self._comment) and (not self._hold) and self._resolved: # set syntax of keywords to be matched, e.g. '$((...))' : - pattern = "\$\(\(.*\)\)" + pattern = "\\$\\(\\(.*\\)\\)" # make a regular expression that matches all variables: rc_varpat = re.compile(pattern) @@ -1425,10 +1428,10 @@ class _RcLine(object): # remove comment from value: if "!" in val: # not if '\!' is in the value ... - if not "\!" in val: + if not "\\!" in val: val, comment = val.split("!") # replace all slash-comments: - val = val.replace("\!", "!") + val = val.replace("\\!", "!") # endif # remove spaces: diff --git a/src/utopya/utopya_tools.py b/src/utopya/utopya_tools.py index eb6b71acd1d718a9a192936f152fb876e5af3a03..0943f5bcd48ba42a94e3dc8cec5b940cc41cb28f 100644 --- a/src/utopya/utopya_tools.py +++ b/src/utopya/utopya_tools.py @@ -19,6 +19,9 @@ # 2025-01, Arjo Segers # Use "importlib" again as "imp" seems depricated. # +# 2026-01, Arjo Segers +# Fixed escape characters in (document) strings. +# # ------------------------------------------------- # help @@ -438,7 +441,7 @@ def GetValuesFunction(part): # default kwargs for "read_csv": kwargs = dict( - sep="\s*;", dtype="str", skipinitialspace=True, na_filter=False, engine="python" + sep="\\s*;", dtype="str", skipinitialspace=True, na_filter=False, engine="python" ) # convert extra arguments: try: