Loading py/cso_pal.py 0 → 100644 +280 −0 Original line number Diff line number Diff line ######################################################################## ### ### help ### ######################################################################## """ .. _cso-pal: ****************** ``cso_pal`` module ****************** The :py:mod:`cso_pal` module provides classes for accessing data from the `Product Algorithm Laboratory <https://www.s5p-pal.com/>`_. Data is available from a special portal: `S5P-PAL Data Portal <https://data-portal.s5p-pal.com/>`_. To browse through the data, use the `browser interface <https://data-portal.s5p-pal.com/browser/>`_. .. _pal-api: PAL API ======= See the PAL `API info <https://data-portal.s5p-pal.com/cat-doc>`_ for latest info. S5P-PAL product files can be selected and downloaded using the *Spatio Temporal Asset Catalog* (STAC). The `PySTAC <https://pystac-client.readthedocs.io/en/latest/` Python interface is used for access. Class hierchy ============= The classes and are defined according to the following hierchy: * :py:class:`.UtopyaRc` * :py:class:`.CSO_PAL_Inquire` Classes ======= """ ######################################################################## ### ### modules ### ######################################################################## # modules: import logging # tools: import utopya ######################################################################## ### ### OpenSearch inquire ### ######################################################################## class CSO_PAL_Inquire( utopya.UtopyaRc ) : """ Inquire available Sentinel data from the `Product Algorithm Laboratory <https://www.s5p-pal.com/>`_. A query is sent to search for products that are available for a certain time and overlap with a specified region. The result is a list with orbit files and instructions on how to download them. In the settings, specify the url of the portal:: <rcbase>.url : https://data-portal.s5p-pal.com/cat/sentinel-5p/catalog.json Specify the time range over which files should be downloaded:: <rcbase>.timerange.start : 2018-07-01 00:00 <rcbase>.timerange.end : 2018-07-01 23:59 Provide a product type:: ! product type (always 10 characters!): <rcbase>.producttype : L2__NO2___ Eventually specify a target area, only orbits with some pixels within the defined box will be downloaded:: ! target area, leave empty for globe; format: west,south:east,north <rcbase>.area : !<rcbase>.area : -30,30:35,76 Name of output csv file:: ! output table, date of today: cso.s5p.no2.inquire-s5phub.output.file : ${my.work}/PAL_S5P_NO2_%Y-%m-%d.csv Example records:: orbit; start_time; end_time;processing;collection;processor_version;filename ;href 02832;2018-05-01 00:00:52;2018-05-01 01:42:22;PAL_ ;01 ;020301 ;S5P_PAL__L2__NO2____20180501T000052_20180501T014222_02832_01_020301_20211108T132200.nc;https://data-portal.s5p-pal.com/cat/sentinel-5p/download/c3e90b62-c1f6-47b6-9ed7-bd70aa6f46ce 02833;2018-05-01 01:42:22;2018-05-01 03:23:52;PAL_ ;01 ;020301 ;S5P_PAL__L2__NO2____20180501T014222_20180501T032352_02833_01_020301_20211108T163508.nc;https://data-portal.s5p-pal.com/cat/sentinel-5p/download/a08cda20-0018-434e-bdcc-800dd580db0f : """ def __init__( self, rcfile, rcbase='', env={}, indent='' ) : """ Inquire oribt files. """ # modules: import os import datetime import calendar import pystac import pystac_client import pandas # info ... logging.info( indent+'' ) logging.info( indent+'** Inquire files available on PAL' ) logging.info( indent+'' ) # init base object: utopya.UtopyaRc.__init__( self, rcfile=rcfile, rcbase=rcbase, env=env ) # domain: url = self.GetSetting( 'url' ) # info ... logging.info( indent+'url : %s' % url ) # time range: t1 = self.GetSetting( 'timerange.start', totype='datetime' ) t2 = self.GetSetting( 'timerange.end' , totype='datetime' ) # info ... tfmt = '%Y-%m-%d %H:%M' logging.info( indent+'timerange: [%s,%s]' % (t1.strftime(tfmt),t2.strftime(tfmt)) ) # product type (always 10 characters!): # L2__NO2___ producttype = self.GetSetting( 'producttype' ) # filter for ItemSearch: sfilter = "s5p:file_type='%s'" % producttype # area of interest: west,south:east,north area = self.GetSetting( 'area' ) # defined? if len(area) > 0 : # convert from format for "dhusget.sh": # west,south:east,north west,south,east,north = map( float, area.replace(':',' ').replace(',',' ').split() ) # info ... logging.info( indent+'area : [%8.2f,%8.2f] x [%8.2f,%8.2f]' % (west,east,south,north) ) # intersection description: intersects = { 'type' : 'Polygon', 'coordinates' : [[[west,south],[east,south],[east,north],[west,north],[west,south]]] } else : # info ... logging.info( indent+'area : no' ) # no intersection: intersects = None #endif # target file, might include time templates: output_file__template = self.GetSetting( 'output.file' ) # current time: output_file = datetime.datetime.now().strftime( output_file__template ) # new output table: output_df = pandas.DataFrame() # info ... logging.info( indent+'search all items in timerange ...' ) # loop over months to follow progress .. xt2 = t1 while xt2 < t2 : # start: xt1 = xt2 # end of month: weekday,nday = calendar.monthrange(xt1.year,xt1.month) xt2 = datetime.datetime(xt1.year,xt1.month,1) + datetime.timedelta(nday) # limit: xt2 = min( xt2, t2 ) # info .. logging.info( indent+' %s ..' % xt1.strftime('%Y-%m') ) # collection: collection = pystac.Collection.from_file( url ) # end point for item search: endpoint_url = collection.get_single_link("search").target # search: item_collection = pystac_client.ItemSearch( endpoint_url, datetime=(xt1,xt2), intersects=intersects, filter=sfilter ).get_all_items() # info .. logging.info( indent+' found %i items' % len(item_collection) ) # testing .. for item in item_collection : # target file: filename = item.properties["physical_name"] ## info ... #logging.info( indent+' file : %s' % filename ) # # filenames: # # S5P_OFFL_L2__NO2____20180701T005930_20180701T024100_03698_01_010002_20180707T022838.nc # plt proc [product-] [starttime....] [endtime......] orbit cl prvers [prodtime.....] # bname = os.path.basename(filename).replace('.nc','') # split: platform_name,rest = bname.split('_',1) processing = rest[0:4] product_type = rest[5:15] start_time,end_time,orbit,collection,processor_version,production_time = rest[16:].split('_') # convert: tfmt = '%Y%m%dT%H%M%S' ts = datetime.datetime.strptime(start_time,tfmt) te = datetime.datetime.strptime(end_time ,tfmt) # extract download link: href = item.assets['download'].href # row: rec = { 'orbit' : [orbit], 'start_time' : [ts], 'end_time' : [te], 'processing' : [processing], 'collection' : [collection], 'processor_version' : [processor_version], 'filename' : [filename], 'href' : [href] } # add record: output_df = output_df.append( pandas.DataFrame(rec), ignore_index=True ) #endfor # entries #endwhile # months # info .. logging.info( 'save to: %s ...' % output_file ) # create directory: dirname = os.path.dirname( output_file ) if len(dirname) > 0 : if not os.path.isdir(dirname) : os.makedirs( dirname ) #endif # write: output_df.to_csv( output_file, sep=';', index=False ) # info ... logging.info( indent+'' ) logging.info( indent+'** end inquire' ) logging.info( indent+'' ) #enddef __init__ #endclass CSO_PAL_Inquire ######################################################################## ### ### end ### ######################################################################## Loading
py/cso_pal.py 0 → 100644 +280 −0 Original line number Diff line number Diff line ######################################################################## ### ### help ### ######################################################################## """ .. _cso-pal: ****************** ``cso_pal`` module ****************** The :py:mod:`cso_pal` module provides classes for accessing data from the `Product Algorithm Laboratory <https://www.s5p-pal.com/>`_. Data is available from a special portal: `S5P-PAL Data Portal <https://data-portal.s5p-pal.com/>`_. To browse through the data, use the `browser interface <https://data-portal.s5p-pal.com/browser/>`_. .. _pal-api: PAL API ======= See the PAL `API info <https://data-portal.s5p-pal.com/cat-doc>`_ for latest info. S5P-PAL product files can be selected and downloaded using the *Spatio Temporal Asset Catalog* (STAC). The `PySTAC <https://pystac-client.readthedocs.io/en/latest/` Python interface is used for access. Class hierchy ============= The classes and are defined according to the following hierchy: * :py:class:`.UtopyaRc` * :py:class:`.CSO_PAL_Inquire` Classes ======= """ ######################################################################## ### ### modules ### ######################################################################## # modules: import logging # tools: import utopya ######################################################################## ### ### OpenSearch inquire ### ######################################################################## class CSO_PAL_Inquire( utopya.UtopyaRc ) : """ Inquire available Sentinel data from the `Product Algorithm Laboratory <https://www.s5p-pal.com/>`_. A query is sent to search for products that are available for a certain time and overlap with a specified region. The result is a list with orbit files and instructions on how to download them. In the settings, specify the url of the portal:: <rcbase>.url : https://data-portal.s5p-pal.com/cat/sentinel-5p/catalog.json Specify the time range over which files should be downloaded:: <rcbase>.timerange.start : 2018-07-01 00:00 <rcbase>.timerange.end : 2018-07-01 23:59 Provide a product type:: ! product type (always 10 characters!): <rcbase>.producttype : L2__NO2___ Eventually specify a target area, only orbits with some pixels within the defined box will be downloaded:: ! target area, leave empty for globe; format: west,south:east,north <rcbase>.area : !<rcbase>.area : -30,30:35,76 Name of output csv file:: ! output table, date of today: cso.s5p.no2.inquire-s5phub.output.file : ${my.work}/PAL_S5P_NO2_%Y-%m-%d.csv Example records:: orbit; start_time; end_time;processing;collection;processor_version;filename ;href 02832;2018-05-01 00:00:52;2018-05-01 01:42:22;PAL_ ;01 ;020301 ;S5P_PAL__L2__NO2____20180501T000052_20180501T014222_02832_01_020301_20211108T132200.nc;https://data-portal.s5p-pal.com/cat/sentinel-5p/download/c3e90b62-c1f6-47b6-9ed7-bd70aa6f46ce 02833;2018-05-01 01:42:22;2018-05-01 03:23:52;PAL_ ;01 ;020301 ;S5P_PAL__L2__NO2____20180501T014222_20180501T032352_02833_01_020301_20211108T163508.nc;https://data-portal.s5p-pal.com/cat/sentinel-5p/download/a08cda20-0018-434e-bdcc-800dd580db0f : """ def __init__( self, rcfile, rcbase='', env={}, indent='' ) : """ Inquire oribt files. """ # modules: import os import datetime import calendar import pystac import pystac_client import pandas # info ... logging.info( indent+'' ) logging.info( indent+'** Inquire files available on PAL' ) logging.info( indent+'' ) # init base object: utopya.UtopyaRc.__init__( self, rcfile=rcfile, rcbase=rcbase, env=env ) # domain: url = self.GetSetting( 'url' ) # info ... logging.info( indent+'url : %s' % url ) # time range: t1 = self.GetSetting( 'timerange.start', totype='datetime' ) t2 = self.GetSetting( 'timerange.end' , totype='datetime' ) # info ... tfmt = '%Y-%m-%d %H:%M' logging.info( indent+'timerange: [%s,%s]' % (t1.strftime(tfmt),t2.strftime(tfmt)) ) # product type (always 10 characters!): # L2__NO2___ producttype = self.GetSetting( 'producttype' ) # filter for ItemSearch: sfilter = "s5p:file_type='%s'" % producttype # area of interest: west,south:east,north area = self.GetSetting( 'area' ) # defined? if len(area) > 0 : # convert from format for "dhusget.sh": # west,south:east,north west,south,east,north = map( float, area.replace(':',' ').replace(',',' ').split() ) # info ... logging.info( indent+'area : [%8.2f,%8.2f] x [%8.2f,%8.2f]' % (west,east,south,north) ) # intersection description: intersects = { 'type' : 'Polygon', 'coordinates' : [[[west,south],[east,south],[east,north],[west,north],[west,south]]] } else : # info ... logging.info( indent+'area : no' ) # no intersection: intersects = None #endif # target file, might include time templates: output_file__template = self.GetSetting( 'output.file' ) # current time: output_file = datetime.datetime.now().strftime( output_file__template ) # new output table: output_df = pandas.DataFrame() # info ... logging.info( indent+'search all items in timerange ...' ) # loop over months to follow progress .. xt2 = t1 while xt2 < t2 : # start: xt1 = xt2 # end of month: weekday,nday = calendar.monthrange(xt1.year,xt1.month) xt2 = datetime.datetime(xt1.year,xt1.month,1) + datetime.timedelta(nday) # limit: xt2 = min( xt2, t2 ) # info .. logging.info( indent+' %s ..' % xt1.strftime('%Y-%m') ) # collection: collection = pystac.Collection.from_file( url ) # end point for item search: endpoint_url = collection.get_single_link("search").target # search: item_collection = pystac_client.ItemSearch( endpoint_url, datetime=(xt1,xt2), intersects=intersects, filter=sfilter ).get_all_items() # info .. logging.info( indent+' found %i items' % len(item_collection) ) # testing .. for item in item_collection : # target file: filename = item.properties["physical_name"] ## info ... #logging.info( indent+' file : %s' % filename ) # # filenames: # # S5P_OFFL_L2__NO2____20180701T005930_20180701T024100_03698_01_010002_20180707T022838.nc # plt proc [product-] [starttime....] [endtime......] orbit cl prvers [prodtime.....] # bname = os.path.basename(filename).replace('.nc','') # split: platform_name,rest = bname.split('_',1) processing = rest[0:4] product_type = rest[5:15] start_time,end_time,orbit,collection,processor_version,production_time = rest[16:].split('_') # convert: tfmt = '%Y%m%dT%H%M%S' ts = datetime.datetime.strptime(start_time,tfmt) te = datetime.datetime.strptime(end_time ,tfmt) # extract download link: href = item.assets['download'].href # row: rec = { 'orbit' : [orbit], 'start_time' : [ts], 'end_time' : [te], 'processing' : [processing], 'collection' : [collection], 'processor_version' : [processor_version], 'filename' : [filename], 'href' : [href] } # add record: output_df = output_df.append( pandas.DataFrame(rec), ignore_index=True ) #endfor # entries #endwhile # months # info .. logging.info( 'save to: %s ...' % output_file ) # create directory: dirname = os.path.dirname( output_file ) if len(dirname) > 0 : if not os.path.isdir(dirname) : os.makedirs( dirname ) #endif # write: output_df.to_csv( output_file, sep=';', index=False ) # info ... logging.info( indent+'' ) logging.info( indent+'** end inquire' ) logging.info( indent+'' ) #enddef __init__ #endclass CSO_PAL_Inquire ######################################################################## ### ### end ### ########################################################################