TNO Intern

Commit d6317b83 authored by Arjo Segers's avatar Arjo Segers
Browse files

Support ESA `PAL` archive.

parent 40100cc7
Loading
Loading
Loading
Loading

py/cso_pal.py

0 → 100644
+280 −0
Original line number Diff line number Diff line

########################################################################
###
### help
###
########################################################################

"""
.. _cso-pal:

******************
``cso_pal`` module
******************

The :py:mod:`cso_pal` module provides classes for accessing  data from the 
`Product Algorithm Laboratory <https://www.s5p-pal.com/>`_.

Data is available from a special portal: `S5P-PAL Data Portal <https://data-portal.s5p-pal.com/>`_.

To browse through the data, use the `browser interface <https://data-portal.s5p-pal.com/browser/>`_.


.. _pal-api:

PAL API
=======

See the PAL `API info <https://data-portal.s5p-pal.com/cat-doc>`_ for latest info.

S5P-PAL product files can be selected and downloaded using the *Spatio Temporal Asset Catalog* (STAC).
The `PySTAC <https://pystac-client.readthedocs.io/en/latest/` Python interface is used for access.


Class hierchy
=============

The classes and are defined according to the following hierchy:


* :py:class:`.UtopyaRc`

  * :py:class:`.CSO_PAL_Inquire`


Classes
=======


"""


########################################################################
###
### modules
###
########################################################################

# modules:
import logging

# tools:
import utopya


########################################################################
###
### OpenSearch inquire
###
########################################################################

class CSO_PAL_Inquire( utopya.UtopyaRc ) :

    """
    Inquire available Sentinel data from the `Product Algorithm Laboratory <https://www.s5p-pal.com/>`_.
      
    A query is sent to search for products that are available 
    for a certain time and overlap with a specified region. 
    The result is a list with orbit files and instructions on how to download them.
    
    In the settings, specify the url of the portal::

      <rcbase>.url              :  https://data-portal.s5p-pal.com/cat/sentinel-5p/catalog.json

    Specify the time range over which files should be downloaded::

      <rcbase>.timerange.start  :  2018-07-01 00:00
      <rcbase>.timerange.end    :  2018-07-01 23:59

    Provide a product type::

        ! product type (always 10 characters!):
        <rcbase>.producttype            :  L2__NO2___
    
    Eventually specify a target area, only orbits with some pixels within the defined box will be downloaded::

      ! target area, leave empty for globe; format:  west,south:east,north
      <rcbase>.area             :  
      !<rcbase>.area             :  -30,30:35,76
      
    Name of output csv file::
    
        ! output table, date of today:
        cso.s5p.no2.inquire-s5phub.output.file            :  ${my.work}/PAL_S5P_NO2_%Y-%m-%d.csv
    
    Example records::
    
        orbit;         start_time;           end_time;processing;collection;processor_version;filename                                                                              ;href
        02832;2018-05-01 00:00:52;2018-05-01 01:42:22;PAL_      ;01        ;020301           ;S5P_PAL__L2__NO2____20180501T000052_20180501T014222_02832_01_020301_20211108T132200.nc;https://data-portal.s5p-pal.com/cat/sentinel-5p/download/c3e90b62-c1f6-47b6-9ed7-bd70aa6f46ce
        02833;2018-05-01 01:42:22;2018-05-01 03:23:52;PAL_      ;01        ;020301           ;S5P_PAL__L2__NO2____20180501T014222_20180501T032352_02833_01_020301_20211108T163508.nc;https://data-portal.s5p-pal.com/cat/sentinel-5p/download/a08cda20-0018-434e-bdcc-800dd580db0f
        :

    """
    
    def __init__( self, rcfile, rcbase='', env={}, indent='' ) :
    
        """
        Inquire oribt files.
        """
                        
        # modules:
        import os
        import datetime
        import calendar
        import pystac
        import pystac_client
        import pandas

        # info ...
        logging.info( indent+'' )
        logging.info( indent+'** Inquire files available on PAL' )
        logging.info( indent+'' )
        
        # init base object:
        utopya.UtopyaRc.__init__( self, rcfile=rcfile, rcbase=rcbase, env=env )
        
        # domain:
        url = self.GetSetting( 'url' )
        # info ...
        logging.info( indent+'url          : %s' % url )
        
        # time range:
        t1 = self.GetSetting( 'timerange.start', totype='datetime' )
        t2 = self.GetSetting( 'timerange.end'  , totype='datetime' )
        # info ...
        tfmt = '%Y-%m-%d %H:%M'
        logging.info( indent+'timerange: [%s,%s]' % (t1.strftime(tfmt),t2.strftime(tfmt)) )
        
        # product type (always 10 characters!):
        #    L2__NO2___
        producttype = self.GetSetting( 'producttype' )
        # filter for ItemSearch:
        sfilter = "s5p:file_type='%s'" % producttype

        # area of interest: west,south:east,north
        area = self.GetSetting( 'area' )
        # defined?
        if len(area) > 0 :
            # convert from format for "dhusget.sh":
            #    west,south:east,north
            west,south,east,north = map( float, area.replace(':',' ').replace(',',' ').split() )
            # info ...
            logging.info( indent+'area         : [%8.2f,%8.2f] x [%8.2f,%8.2f]' % (west,east,south,north) )
            # intersection description:
            intersects = { 'type' : 'Polygon',
                           'coordinates' : [[[west,south],[east,south],[east,north],[west,north],[west,south]]] }
        else :
            # info ...
            logging.info( indent+'area         : no' )
            # no intersection:
            intersects = None
        #endif
        
        # target file, might include time templates:
        output_file__template = self.GetSetting( 'output.file' )
        # current time:
        output_file = datetime.datetime.now().strftime( output_file__template )
        
        # new output table:
        output_df = pandas.DataFrame()

        # info ...
        logging.info( indent+'search all items in timerange ...' )
        
        # loop over months to follow progress ..
        xt2 = t1
        while xt2 < t2 :
            # start:
            xt1 = xt2
            # end of month:
            weekday,nday = calendar.monthrange(xt1.year,xt1.month)
            xt2 = datetime.datetime(xt1.year,xt1.month,1) + datetime.timedelta(nday)
            # limit:
            xt2 = min( xt2, t2 )
            # info ..
            logging.info( indent+'  %s ..' % xt1.strftime('%Y-%m') )
        
            # collection:
            collection = pystac.Collection.from_file( url )
            # end point for item search:
            endpoint_url = collection.get_single_link("search").target
            # search:
            item_collection = pystac_client.ItemSearch( endpoint_url, 
                                                        datetime=(xt1,xt2),
                                                        intersects=intersects,
                                                        filter=sfilter ).get_all_items()
            # info ..
            logging.info( indent+'    found %i items' % len(item_collection) )

            # testing ..
            for item in item_collection :

                # target file:
                filename = item.properties["physical_name"]
                ## info ...
                #logging.info( indent+'    file : %s' % filename )

                #
                # filenames:
                #
                #    S5P_OFFL_L2__NO2____20180701T005930_20180701T024100_03698_01_010002_20180707T022838.nc
                #    plt proc [product-] [starttime....] [endtime......] orbit cl prvers [prodtime.....]
                #
                bname = os.path.basename(filename).replace('.nc','')
                # split:
                platform_name,rest = bname.split('_',1)
                processing   = rest[0:4]
                product_type = rest[5:15]
                start_time,end_time,orbit,collection,processor_version,production_time = rest[16:].split('_')

                # convert:
                tfmt = '%Y%m%dT%H%M%S'
                ts = datetime.datetime.strptime(start_time,tfmt)
                te = datetime.datetime.strptime(end_time  ,tfmt)

                # extract download link:
                href = item.assets['download'].href

                # row:
                rec = { 'orbit'               : [orbit],
                        'start_time'          : [ts],
                        'end_time'            : [te],
                        'processing'          : [processing],
                        'collection'          : [collection],
                        'processor_version'   : [processor_version],
                        'filename'            : [filename],
                        'href'                : [href] }
                # add record:
                output_df = output_df.append( pandas.DataFrame(rec), ignore_index=True )

            #endfor # entries
            
        #endwhile  # months

        # info ..
        logging.info( 'save to: %s ...' % output_file )
        # create directory:
        dirname = os.path.dirname( output_file )
        if len(dirname) > 0 :
            if not os.path.isdir(dirname) : os.makedirs( dirname )
        #endif
        # write:
        output_df.to_csv( output_file, sep=';', index=False )

        # info ...
        logging.info( indent+'' )
        logging.info( indent+'** end inquire' )
        logging.info( indent+'' )
        
    #enddef __init__
        
#endclass CSO_PAL_Inquire


########################################################################
###
### end
###
########################################################################