TNO Intern

Commit 2c9a4eac authored by Arjo Segers's avatar Arjo Segers
Browse files

Introduced tools to inquire S5p archives.

parent d6317b83
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -23,8 +23,9 @@ Actual implementations can be found in submodules:
.. toctree::
   :maxdepth: 1

   pymod-cso_inquire
   pymod-cso_scihub

   pymod-cso_pal

.. Label, use :ref:`text <label>` for reference
.. _cso-classes:

py/cso_inquire.py

0 → 100644
+365 −0
Original line number Diff line number Diff line

########################################################################
###
### help
###
########################################################################

"""
.. _cso-inquire:

**********************
``cso_inquire`` module
**********************

The :py:mod:`cso_inquire` module provides some general classes to support
inqueries of data portals.


Class hierchy
=============

The classes and are defined according to the following hierchy:

* :py:class:`.UtopyaRc`

  * :py:class:`.CSO_Inquire_Plot`



Classes
=======


"""


########################################################################
###
### modules
###
########################################################################

# modules:
import logging

# tools:
import utopya


########################################################################
###
### modules
###
########################################################################


class CSO_Inquire_Plot( utopya.UtopyaRc ) :

    """
    Create plot of processing version versus time to indicate the available orbits in the SciHub archive.
    
    The information on orbits is taken from a csv table created by :py:class:`CSO_SciHub_Inquire` class.
    Specifify the name of the table file in the settings::
    
        ! listing file:
        cso.tutorial.inquire-s5phub-plot.file     :  ${my.work}/Copernicus/Copernicus_S5p_NO2_s5phub_%Y-%m-%d.csv
        
    The date templates are by default filled for the current day.
    Alternatively, specify an explicit date::
    
        !~ specify dates ("yyyy-mm-dd") to use historic table:
        cso.tutorial.inquire-s5phub-plot.filedate  :  2022-01-28
    
    The plot could also be created by combining multiple tables;
    use a semi-colon to seperate the file names (and eventually the dates)::
    
        ! listing files:
        cso.tutorial.inquire-s5phub-plot.file       :  ${my.work}/Copernicus/Copernicus_S5p_NO2_s5phub_%Y-%m-%d.csv ; \\
                                                       ${my.work}/Copernicus/Copernicus_S5p_NO2_pal_%Y-%m-%d.csv
        !~ specify dates ("yyyy-mm-dd") to use historic tables:
        !cso.tutorial.inquire-s5phub-plot.filedate  :  2022-01-28 ; 2022-01-28

    The created plot shows a time line and on the vertical ax the processor versions;
    a bar indicates when a certain version was used to process orbits:
    
    .. figure:: figs/NO2/Copernicus_S5p_NO2.png
       :scale: 50 %
       :align: center
       :alt: Overview of available NO2 processings.
       
    Specify the name of the target plot::
    
        ! output table, date of today:
        <rcbase>.output.file            :  ${my.work}/Copernicus_S5P_NO2_%Y-%m-%d.png

    Specify

    The following flag is used to ensure that the plot is renewed::
    
        ! renew existing plots?
        cso.tutorial.inquire-s5phub-plot.renew                    :  True

    """
    
    def __init__( self, rcfile, rcbase='', env={}, indent='' ) :
    
        """
        Convert data.
        """
                        
        # modules:
        import os
        import numpy
        import datetime
        import pandas
        import matplotlib.pyplot as plt
        import matplotlib.dates  as mdates
        
        # info ...
        logging.info( indent+'' )
        logging.info( indent+'** create inquire plot' )
        logging.info( indent+'' )

        # init base object:
        utopya.UtopyaRc.__init__( self, rcfile=rcfile, rcbase=rcbase, env=env )
        
        # renew output?
        renew = self.GetSetting( 'renew', totype='bool' )

        # table file(s) to be used:
        filename_templates = self.GetSetting( 'file' ).split(';')
        # count:
        nfile = len(filename_templates)
        # time stamp in file?
        filedates = self.GetSetting( 'filedate', default=(';'*(nfile-1)) ).split(';')
        
        # list with time expanded:
        filenames = []
        for ifile in range(len(filename_templates)) :
            # current:
            filename_template = filename_templates[ifile]
            # time?
            if len(filedates[ifile]) > 0 :
                t0 = datetime.datetime.strptime(filedates[ifile].strip(),'%Y-%m-%d')
            else :
                t0 = datetime.datetime.now()
            #endif
            # expand time templates:
            filenames.append( t0.strftime( filename_template.strip() ) )
        #endfor
        
        # target file template:
        figfile_template = self.GetSetting( 'output.file' )
        # target file:
        t = datetime.datetime.now()
        fig_file = t.strftime( figfile_template )

        # create?
        if (not os.path.isfile(fig_file)) or renew :
        
            # info ..
            logging.info( indent+'create %s ...' % fig_file )
            
            # combine tables:
            df = None
            # loop:
            for filename in filenames :
                # check ..
                if not os.path.isfile(filename) :
                    logging.error( 'inquire table not found: %s' % filename )
                    raise Exception
                #endif
                # info ..
                logging.info( indent+'read inquire table: %s' % filename )
                # read:
                xdf = pandas.read_csv( filename, sep=';', skip_blank_lines=True,
                                        parse_dates=['start_time','end_time'],
                                        dtype='str' )
                # combine:
                if df is None :
                    df = xdf
                else :
                    df = df.append( xdf )
                #endif
            #endfor

            # first ...
            filename = filenames[0]
            # annote:
            title = os.path.basename( filename ).replace('.csv','').replace('_',' ')

            # frequency:
            #freq = 'MS'  ; freqlabel = 'month'
            freq = 'W'   ; freqlabel = 'week'
            #freq = 'DS'  ; freqlabel = 'day'

            # time range:
            t1 = df['start_time'].min()
            t2 = df['start_time'].max()

            # full years, extra space for text:
            t1 = pandas.Timestamp( year=t1.year  , month=1, day=1 )
            t2 = max( t2+pandas.Timedelta(180,'days'), pandas.Timestamp( year=t2.year+1, month=1, day=1 ) )
            # annotation:
            #t2x = t2-pandas.Timedelta(50,'days')
            t2x = t2-pandas.Timedelta(20,'days')
            
            # collections:
            collections = df['collection'].unique()
            collections.sort()

            # streams: 'OFFL', 'RPRO', ..
            #streams = df['processing'].unique()
            #streams.sort()
            streams = ['NRTI','OFFL','RPRO','PAL_']

            # color list  for streams:
            colors = {}
            colors['NRTI'] = 'blue'
            colors['OFFL'] = 'orange'
            colors['RPRO'] = 'green'
            colors['PAL_'] = 'lightgreen'

            # procesors: '010101', ...
            procs = df['processor_version'].unique()
            procs.sort()
            # count:
            nproc = len(procs)

            # convert processor labels '010203' to version 'v1.2.3':
            proclabs = {}
            for proc in procs :
                proclabs[proc] = 'v%i.%i.%i' % (int(proc[0:2]),int(proc[2:4]),int(proc[4:6]))
            #endfor

            # storage for handles used for legend:
            streamh = {}

            # previous row:
            #collection_prev = ''
            proc_prev = ''
            # row index in plot:
            irow = 0
            # new:
            fig = plt.figure( figsize=(12,4) )
            ax = fig.add_axes( [0.03,0.07,0.94,0.85] )
            # loop over collections:
            #for collection in collections :
            # ignore collection, this is mainly a change in L1B data ..
            if True :
                # loop over processings:
                for proc in procs :

                    # loop:
                    for stream in streams :
                        ## select:
                        #df2 = df[ (df['collection'       ] == collection) & \
                        #          (df['processing'       ] == stream    ) & \
                        #          (df['processor_version'] == proc      )   ]
                        # select:
                        df2 = df[ (df['processing'       ] == stream    ) & \
                                  (df['processor_version'] == proc      )   ]
                        # any?
                        if len(df2) > 0 :

                            # next row:
                            irow += 1
                            # seperation line?
                            if (irow > 1) and (proc != proc_prev) :
                                # style depends on what is changed:
                                if proc[0:2] != proc_prev[0:2] :
                                    linestyle = '-'
                                elif proc[2:4] != proc_prev[2:4] :
                                    linestyle = '--'
                                elif proc[4:6] != proc_prev[4:6] :
                                    linestyle = ':'
                                #endif
                                # horizontal line:
                                ax.plot( [t1,t2], [irow-0.5,irow-0.5], color='0.5', linestyle=linestyle )
                            #endif
                            # label?
                            if proc != proc_prev :
                                # annote:
                                proclab = proclabs[proc]
                                # add text:
                                ax.text( t2x, irow, proclab, horizontalalignment='right', verticalalignment='center' )
                            #endif
                            ## label?
                            #if collection != collection_prev :
                            #    # annote:
                            #    lab = '  C%i' % int(collection)
                            #    # add text:
                            #    ax.text( t2x, irow, lab, color='red', horizontalalignment='left', verticalalignment='center' )
                            ##endif
                            # store for next line:
                            #collection_prev = collection
                            proc_prev       = proc

                            # group by month, count number of orbits:
                            nn = df2.set_index('start_time').groupby(pandas.Grouper(freq=freq))['orbit'].count()
                            # non-zero values:
                            xvalues = nn[nn>0].index
                            yvalues = numpy.zeros(len(xvalues)) + irow
                            # style:
                            color = colors[stream]
                            style = dict( linestyle='None', marker='s', color=color, markerfacecolor=color )
                            # markers:
                            p = ax.plot( xvalues, yvalues, **style )

                            # store handle for legend:
                            if stream  not in streamh.keys() : streamh[stream] = p[0]

                        #endif # orbits found
                    #endfor # streams

                #endfor  # processors
            #endfor # streams
            # time axis:
            ax.set_xlim((t1,t2))
            ax.xaxis.set_minor_locator( mdates.MonthLocator() )
            ax.xaxis.set_major_locator( mdates.YearLocator() )
            ax.xaxis.set_major_formatter( mdates.DateFormatter('%Y'))
            ax.grid(axis='x')
            # y-axis:
            ax.set_yticks([])
            ax.set_ylim([0.5,irow+0.5])
            # annote:
            ax.set_title( title )
            # legend for streams:
            labs = []
            hh = []
            for stream in streams :
                if stream in streamh.keys() :
                    labs.append( stream )
                    hh.append( streamh[stream] )
                #endif
            #endfor
            ax.legend( hh, labs, loc='upper left' )

            # save:
            fig.savefig( fig_file )
            
        else :
        
            # info ..
            logging.info( indent+'keep  %s ...' % fig_file )
            
        #endif # renew

        # info ...
        logging.info( indent+'' )
        logging.info( indent+'** end inquire plot' )
        logging.info( indent+'' )
        
    #enddef __init__
    
#endclass CSO_Inquire_Plot



########################################################################
###
### end
###
########################################################################
+876 −75

File changed.

Preview size limit exceeded, changes collapsed.