Loading py/cso.py +2 −1 Original line number Diff line number Diff line Loading @@ -23,8 +23,9 @@ Actual implementations can be found in submodules: .. toctree:: :maxdepth: 1 pymod-cso_inquire pymod-cso_scihub pymod-cso_pal .. Label, use :ref:`text <label>` for reference .. _cso-classes: Loading py/cso_inquire.py 0 → 100644 +365 −0 Original line number Diff line number Diff line ######################################################################## ### ### help ### ######################################################################## """ .. _cso-inquire: ********************** ``cso_inquire`` module ********************** The :py:mod:`cso_inquire` module provides some general classes to support inqueries of data portals. Class hierchy ============= The classes and are defined according to the following hierchy: * :py:class:`.UtopyaRc` * :py:class:`.CSO_Inquire_Plot` Classes ======= """ ######################################################################## ### ### modules ### ######################################################################## # modules: import logging # tools: import utopya ######################################################################## ### ### modules ### ######################################################################## class CSO_Inquire_Plot( utopya.UtopyaRc ) : """ Create plot of processing version versus time to indicate the available orbits in the SciHub archive. The information on orbits is taken from a csv table created by :py:class:`CSO_SciHub_Inquire` class. Specifify the name of the table file in the settings:: ! listing file: cso.tutorial.inquire-s5phub-plot.file : ${my.work}/Copernicus/Copernicus_S5p_NO2_s5phub_%Y-%m-%d.csv The date templates are by default filled for the current day. Alternatively, specify an explicit date:: !~ specify dates ("yyyy-mm-dd") to use historic table: cso.tutorial.inquire-s5phub-plot.filedate : 2022-01-28 The plot could also be created by combining multiple tables; use a semi-colon to seperate the file names (and eventually the dates):: ! listing files: cso.tutorial.inquire-s5phub-plot.file : ${my.work}/Copernicus/Copernicus_S5p_NO2_s5phub_%Y-%m-%d.csv ; \\ ${my.work}/Copernicus/Copernicus_S5p_NO2_pal_%Y-%m-%d.csv !~ specify dates ("yyyy-mm-dd") to use historic tables: !cso.tutorial.inquire-s5phub-plot.filedate : 2022-01-28 ; 2022-01-28 The created plot shows a time line and on the vertical ax the processor versions; a bar indicates when a certain version was used to process orbits: .. figure:: figs/NO2/Copernicus_S5p_NO2.png :scale: 50 % :align: center :alt: Overview of available NO2 processings. Specify the name of the target plot:: ! output table, date of today: <rcbase>.output.file : ${my.work}/Copernicus_S5P_NO2_%Y-%m-%d.png Specify The following flag is used to ensure that the plot is renewed:: ! renew existing plots? cso.tutorial.inquire-s5phub-plot.renew : True """ def __init__( self, rcfile, rcbase='', env={}, indent='' ) : """ Convert data. """ # modules: import os import numpy import datetime import pandas import matplotlib.pyplot as plt import matplotlib.dates as mdates # info ... logging.info( indent+'' ) logging.info( indent+'** create inquire plot' ) logging.info( indent+'' ) # init base object: utopya.UtopyaRc.__init__( self, rcfile=rcfile, rcbase=rcbase, env=env ) # renew output? renew = self.GetSetting( 'renew', totype='bool' ) # table file(s) to be used: filename_templates = self.GetSetting( 'file' ).split(';') # count: nfile = len(filename_templates) # time stamp in file? filedates = self.GetSetting( 'filedate', default=(';'*(nfile-1)) ).split(';') # list with time expanded: filenames = [] for ifile in range(len(filename_templates)) : # current: filename_template = filename_templates[ifile] # time? if len(filedates[ifile]) > 0 : t0 = datetime.datetime.strptime(filedates[ifile].strip(),'%Y-%m-%d') else : t0 = datetime.datetime.now() #endif # expand time templates: filenames.append( t0.strftime( filename_template.strip() ) ) #endfor # target file template: figfile_template = self.GetSetting( 'output.file' ) # target file: t = datetime.datetime.now() fig_file = t.strftime( figfile_template ) # create? if (not os.path.isfile(fig_file)) or renew : # info .. logging.info( indent+'create %s ...' % fig_file ) # combine tables: df = None # loop: for filename in filenames : # check .. if not os.path.isfile(filename) : logging.error( 'inquire table not found: %s' % filename ) raise Exception #endif # info .. logging.info( indent+'read inquire table: %s' % filename ) # read: xdf = pandas.read_csv( filename, sep=';', skip_blank_lines=True, parse_dates=['start_time','end_time'], dtype='str' ) # combine: if df is None : df = xdf else : df = df.append( xdf ) #endif #endfor # first ... filename = filenames[0] # annote: title = os.path.basename( filename ).replace('.csv','').replace('_',' ') # frequency: #freq = 'MS' ; freqlabel = 'month' freq = 'W' ; freqlabel = 'week' #freq = 'DS' ; freqlabel = 'day' # time range: t1 = df['start_time'].min() t2 = df['start_time'].max() # full years, extra space for text: t1 = pandas.Timestamp( year=t1.year , month=1, day=1 ) t2 = max( t2+pandas.Timedelta(180,'days'), pandas.Timestamp( year=t2.year+1, month=1, day=1 ) ) # annotation: #t2x = t2-pandas.Timedelta(50,'days') t2x = t2-pandas.Timedelta(20,'days') # collections: collections = df['collection'].unique() collections.sort() # streams: 'OFFL', 'RPRO', .. #streams = df['processing'].unique() #streams.sort() streams = ['NRTI','OFFL','RPRO','PAL_'] # color list for streams: colors = {} colors['NRTI'] = 'blue' colors['OFFL'] = 'orange' colors['RPRO'] = 'green' colors['PAL_'] = 'lightgreen' # procesors: '010101', ... procs = df['processor_version'].unique() procs.sort() # count: nproc = len(procs) # convert processor labels '010203' to version 'v1.2.3': proclabs = {} for proc in procs : proclabs[proc] = 'v%i.%i.%i' % (int(proc[0:2]),int(proc[2:4]),int(proc[4:6])) #endfor # storage for handles used for legend: streamh = {} # previous row: #collection_prev = '' proc_prev = '' # row index in plot: irow = 0 # new: fig = plt.figure( figsize=(12,4) ) ax = fig.add_axes( [0.03,0.07,0.94,0.85] ) # loop over collections: #for collection in collections : # ignore collection, this is mainly a change in L1B data .. if True : # loop over processings: for proc in procs : # loop: for stream in streams : ## select: #df2 = df[ (df['collection' ] == collection) & \ # (df['processing' ] == stream ) & \ # (df['processor_version'] == proc ) ] # select: df2 = df[ (df['processing' ] == stream ) & \ (df['processor_version'] == proc ) ] # any? if len(df2) > 0 : # next row: irow += 1 # seperation line? if (irow > 1) and (proc != proc_prev) : # style depends on what is changed: if proc[0:2] != proc_prev[0:2] : linestyle = '-' elif proc[2:4] != proc_prev[2:4] : linestyle = '--' elif proc[4:6] != proc_prev[4:6] : linestyle = ':' #endif # horizontal line: ax.plot( [t1,t2], [irow-0.5,irow-0.5], color='0.5', linestyle=linestyle ) #endif # label? if proc != proc_prev : # annote: proclab = proclabs[proc] # add text: ax.text( t2x, irow, proclab, horizontalalignment='right', verticalalignment='center' ) #endif ## label? #if collection != collection_prev : # # annote: # lab = ' C%i' % int(collection) # # add text: # ax.text( t2x, irow, lab, color='red', horizontalalignment='left', verticalalignment='center' ) ##endif # store for next line: #collection_prev = collection proc_prev = proc # group by month, count number of orbits: nn = df2.set_index('start_time').groupby(pandas.Grouper(freq=freq))['orbit'].count() # non-zero values: xvalues = nn[nn>0].index yvalues = numpy.zeros(len(xvalues)) + irow # style: color = colors[stream] style = dict( linestyle='None', marker='s', color=color, markerfacecolor=color ) # markers: p = ax.plot( xvalues, yvalues, **style ) # store handle for legend: if stream not in streamh.keys() : streamh[stream] = p[0] #endif # orbits found #endfor # streams #endfor # processors #endfor # streams # time axis: ax.set_xlim((t1,t2)) ax.xaxis.set_minor_locator( mdates.MonthLocator() ) ax.xaxis.set_major_locator( mdates.YearLocator() ) ax.xaxis.set_major_formatter( mdates.DateFormatter('%Y')) ax.grid(axis='x') # y-axis: ax.set_yticks([]) ax.set_ylim([0.5,irow+0.5]) # annote: ax.set_title( title ) # legend for streams: labs = [] hh = [] for stream in streams : if stream in streamh.keys() : labs.append( stream ) hh.append( streamh[stream] ) #endif #endfor ax.legend( hh, labs, loc='upper left' ) # save: fig.savefig( fig_file ) else : # info .. logging.info( indent+'keep %s ...' % fig_file ) #endif # renew # info ... logging.info( indent+'' ) logging.info( indent+'** end inquire plot' ) logging.info( indent+'' ) #enddef __init__ #endclass CSO_Inquire_Plot ######################################################################## ### ### end ### ######################################################################## py/cso_scihub.py +876 −75 File changed.Preview size limit exceeded, changes collapsed. Show changes Loading
py/cso.py +2 −1 Original line number Diff line number Diff line Loading @@ -23,8 +23,9 @@ Actual implementations can be found in submodules: .. toctree:: :maxdepth: 1 pymod-cso_inquire pymod-cso_scihub pymod-cso_pal .. Label, use :ref:`text <label>` for reference .. _cso-classes: Loading
py/cso_inquire.py 0 → 100644 +365 −0 Original line number Diff line number Diff line ######################################################################## ### ### help ### ######################################################################## """ .. _cso-inquire: ********************** ``cso_inquire`` module ********************** The :py:mod:`cso_inquire` module provides some general classes to support inqueries of data portals. Class hierchy ============= The classes and are defined according to the following hierchy: * :py:class:`.UtopyaRc` * :py:class:`.CSO_Inquire_Plot` Classes ======= """ ######################################################################## ### ### modules ### ######################################################################## # modules: import logging # tools: import utopya ######################################################################## ### ### modules ### ######################################################################## class CSO_Inquire_Plot( utopya.UtopyaRc ) : """ Create plot of processing version versus time to indicate the available orbits in the SciHub archive. The information on orbits is taken from a csv table created by :py:class:`CSO_SciHub_Inquire` class. Specifify the name of the table file in the settings:: ! listing file: cso.tutorial.inquire-s5phub-plot.file : ${my.work}/Copernicus/Copernicus_S5p_NO2_s5phub_%Y-%m-%d.csv The date templates are by default filled for the current day. Alternatively, specify an explicit date:: !~ specify dates ("yyyy-mm-dd") to use historic table: cso.tutorial.inquire-s5phub-plot.filedate : 2022-01-28 The plot could also be created by combining multiple tables; use a semi-colon to seperate the file names (and eventually the dates):: ! listing files: cso.tutorial.inquire-s5phub-plot.file : ${my.work}/Copernicus/Copernicus_S5p_NO2_s5phub_%Y-%m-%d.csv ; \\ ${my.work}/Copernicus/Copernicus_S5p_NO2_pal_%Y-%m-%d.csv !~ specify dates ("yyyy-mm-dd") to use historic tables: !cso.tutorial.inquire-s5phub-plot.filedate : 2022-01-28 ; 2022-01-28 The created plot shows a time line and on the vertical ax the processor versions; a bar indicates when a certain version was used to process orbits: .. figure:: figs/NO2/Copernicus_S5p_NO2.png :scale: 50 % :align: center :alt: Overview of available NO2 processings. Specify the name of the target plot:: ! output table, date of today: <rcbase>.output.file : ${my.work}/Copernicus_S5P_NO2_%Y-%m-%d.png Specify The following flag is used to ensure that the plot is renewed:: ! renew existing plots? cso.tutorial.inquire-s5phub-plot.renew : True """ def __init__( self, rcfile, rcbase='', env={}, indent='' ) : """ Convert data. """ # modules: import os import numpy import datetime import pandas import matplotlib.pyplot as plt import matplotlib.dates as mdates # info ... logging.info( indent+'' ) logging.info( indent+'** create inquire plot' ) logging.info( indent+'' ) # init base object: utopya.UtopyaRc.__init__( self, rcfile=rcfile, rcbase=rcbase, env=env ) # renew output? renew = self.GetSetting( 'renew', totype='bool' ) # table file(s) to be used: filename_templates = self.GetSetting( 'file' ).split(';') # count: nfile = len(filename_templates) # time stamp in file? filedates = self.GetSetting( 'filedate', default=(';'*(nfile-1)) ).split(';') # list with time expanded: filenames = [] for ifile in range(len(filename_templates)) : # current: filename_template = filename_templates[ifile] # time? if len(filedates[ifile]) > 0 : t0 = datetime.datetime.strptime(filedates[ifile].strip(),'%Y-%m-%d') else : t0 = datetime.datetime.now() #endif # expand time templates: filenames.append( t0.strftime( filename_template.strip() ) ) #endfor # target file template: figfile_template = self.GetSetting( 'output.file' ) # target file: t = datetime.datetime.now() fig_file = t.strftime( figfile_template ) # create? if (not os.path.isfile(fig_file)) or renew : # info .. logging.info( indent+'create %s ...' % fig_file ) # combine tables: df = None # loop: for filename in filenames : # check .. if not os.path.isfile(filename) : logging.error( 'inquire table not found: %s' % filename ) raise Exception #endif # info .. logging.info( indent+'read inquire table: %s' % filename ) # read: xdf = pandas.read_csv( filename, sep=';', skip_blank_lines=True, parse_dates=['start_time','end_time'], dtype='str' ) # combine: if df is None : df = xdf else : df = df.append( xdf ) #endif #endfor # first ... filename = filenames[0] # annote: title = os.path.basename( filename ).replace('.csv','').replace('_',' ') # frequency: #freq = 'MS' ; freqlabel = 'month' freq = 'W' ; freqlabel = 'week' #freq = 'DS' ; freqlabel = 'day' # time range: t1 = df['start_time'].min() t2 = df['start_time'].max() # full years, extra space for text: t1 = pandas.Timestamp( year=t1.year , month=1, day=1 ) t2 = max( t2+pandas.Timedelta(180,'days'), pandas.Timestamp( year=t2.year+1, month=1, day=1 ) ) # annotation: #t2x = t2-pandas.Timedelta(50,'days') t2x = t2-pandas.Timedelta(20,'days') # collections: collections = df['collection'].unique() collections.sort() # streams: 'OFFL', 'RPRO', .. #streams = df['processing'].unique() #streams.sort() streams = ['NRTI','OFFL','RPRO','PAL_'] # color list for streams: colors = {} colors['NRTI'] = 'blue' colors['OFFL'] = 'orange' colors['RPRO'] = 'green' colors['PAL_'] = 'lightgreen' # procesors: '010101', ... procs = df['processor_version'].unique() procs.sort() # count: nproc = len(procs) # convert processor labels '010203' to version 'v1.2.3': proclabs = {} for proc in procs : proclabs[proc] = 'v%i.%i.%i' % (int(proc[0:2]),int(proc[2:4]),int(proc[4:6])) #endfor # storage for handles used for legend: streamh = {} # previous row: #collection_prev = '' proc_prev = '' # row index in plot: irow = 0 # new: fig = plt.figure( figsize=(12,4) ) ax = fig.add_axes( [0.03,0.07,0.94,0.85] ) # loop over collections: #for collection in collections : # ignore collection, this is mainly a change in L1B data .. if True : # loop over processings: for proc in procs : # loop: for stream in streams : ## select: #df2 = df[ (df['collection' ] == collection) & \ # (df['processing' ] == stream ) & \ # (df['processor_version'] == proc ) ] # select: df2 = df[ (df['processing' ] == stream ) & \ (df['processor_version'] == proc ) ] # any? if len(df2) > 0 : # next row: irow += 1 # seperation line? if (irow > 1) and (proc != proc_prev) : # style depends on what is changed: if proc[0:2] != proc_prev[0:2] : linestyle = '-' elif proc[2:4] != proc_prev[2:4] : linestyle = '--' elif proc[4:6] != proc_prev[4:6] : linestyle = ':' #endif # horizontal line: ax.plot( [t1,t2], [irow-0.5,irow-0.5], color='0.5', linestyle=linestyle ) #endif # label? if proc != proc_prev : # annote: proclab = proclabs[proc] # add text: ax.text( t2x, irow, proclab, horizontalalignment='right', verticalalignment='center' ) #endif ## label? #if collection != collection_prev : # # annote: # lab = ' C%i' % int(collection) # # add text: # ax.text( t2x, irow, lab, color='red', horizontalalignment='left', verticalalignment='center' ) ##endif # store for next line: #collection_prev = collection proc_prev = proc # group by month, count number of orbits: nn = df2.set_index('start_time').groupby(pandas.Grouper(freq=freq))['orbit'].count() # non-zero values: xvalues = nn[nn>0].index yvalues = numpy.zeros(len(xvalues)) + irow # style: color = colors[stream] style = dict( linestyle='None', marker='s', color=color, markerfacecolor=color ) # markers: p = ax.plot( xvalues, yvalues, **style ) # store handle for legend: if stream not in streamh.keys() : streamh[stream] = p[0] #endif # orbits found #endfor # streams #endfor # processors #endfor # streams # time axis: ax.set_xlim((t1,t2)) ax.xaxis.set_minor_locator( mdates.MonthLocator() ) ax.xaxis.set_major_locator( mdates.YearLocator() ) ax.xaxis.set_major_formatter( mdates.DateFormatter('%Y')) ax.grid(axis='x') # y-axis: ax.set_yticks([]) ax.set_ylim([0.5,irow+0.5]) # annote: ax.set_title( title ) # legend for streams: labs = [] hh = [] for stream in streams : if stream in streamh.keys() : labs.append( stream ) hh.append( streamh[stream] ) #endif #endfor ax.legend( hh, labs, loc='upper left' ) # save: fig.savefig( fig_file ) else : # info .. logging.info( indent+'keep %s ...' % fig_file ) #endif # renew # info ... logging.info( indent+'' ) logging.info( indent+'** end inquire plot' ) logging.info( indent+'' ) #enddef __init__ #endclass CSO_Inquire_Plot ######################################################################## ### ### end ### ########################################################################