Introduced tools to inquire S5p archives. (2c9a4eac) · Commits · CAMS / CSO

py/cso.py

+2 −1

Original line number	Diff line number	Diff line
		@@ -23,8 +23,9 @@ Actual implementations can be found in submodules:
		.. toctree::
		:maxdepth: 1

		pymod-cso_inquire
		pymod-cso_scihub

		pymod-cso_pal

		.. Label, use :ref:`text <label>` for reference
		.. _cso-classes:

py/cso_inquire.py

0 → 100644

+365 −0

Original line number	Diff line number	Diff line

		########################################################################
		###
		### help
		###
		########################################################################

		"""
		.. _cso-inquire:

		**********************
		``cso_inquire`` module
		**********************

		The :py:mod:`cso_inquire` module provides some general classes to support
		inqueries of data portals.


		Class hierchy
		=============

		The classes and are defined according to the following hierchy:

		* :py:class:`.UtopyaRc`

		* :py:class:`.CSO_Inquire_Plot`



		Classes
		=======


		"""


		########################################################################
		###
		### modules
		###
		########################################################################

		# modules:
		import logging

		# tools:
		import utopya


		########################################################################
		###
		### modules
		###
		########################################################################


		class CSO_Inquire_Plot( utopya.UtopyaRc ) :

		"""
		Create plot of processing version versus time to indicate the available orbits in the SciHub archive.

		The information on orbits is taken from a csv table created by :py:class:`CSO_SciHub_Inquire` class.
		Specifify the name of the table file in the settings::

		! listing file:
		cso.tutorial.inquire-s5phub-plot.file : ${my.work}/Copernicus/Copernicus_S5p_NO2_s5phub_%Y-%m-%d.csv

		The date templates are by default filled for the current day.
		Alternatively, specify an explicit date::

		!~ specify dates ("yyyy-mm-dd") to use historic table:
		cso.tutorial.inquire-s5phub-plot.filedate : 2022-01-28

		The plot could also be created by combining multiple tables;
		use a semi-colon to seperate the file names (and eventually the dates)::

		! listing files:
		cso.tutorial.inquire-s5phub-plot.file : ${my.work}/Copernicus/Copernicus_S5p_NO2_s5phub_%Y-%m-%d.csv ; \\
		${my.work}/Copernicus/Copernicus_S5p_NO2_pal_%Y-%m-%d.csv
		!~ specify dates ("yyyy-mm-dd") to use historic tables:
		!cso.tutorial.inquire-s5phub-plot.filedate : 2022-01-28 ; 2022-01-28

		The created plot shows a time line and on the vertical ax the processor versions;
		a bar indicates when a certain version was used to process orbits:

		.. figure:: figs/NO2/Copernicus_S5p_NO2.png
		:scale: 50 %
		:align: center
		:alt: Overview of available NO2 processings.

		Specify the name of the target plot::

		! output table, date of today:
		<rcbase>.output.file : ${my.work}/Copernicus_S5P_NO2_%Y-%m-%d.png

		Specify

		The following flag is used to ensure that the plot is renewed::

		! renew existing plots?
		cso.tutorial.inquire-s5phub-plot.renew : True

		"""

		def __init__( self, rcfile, rcbase='', env={}, indent='' ) :

		"""
		Convert data.
		"""

		# modules:
		import os
		import numpy
		import datetime
		import pandas
		import matplotlib.pyplot as plt
		import matplotlib.dates as mdates

		# info ...
		logging.info( indent+'' )
		logging.info( indent+'** create inquire plot' )
		logging.info( indent+'' )

		# init base object:
		utopya.UtopyaRc.__init__( self, rcfile=rcfile, rcbase=rcbase, env=env )

		# renew output?
		renew = self.GetSetting( 'renew', totype='bool' )

		# table file(s) to be used:
		filename_templates = self.GetSetting( 'file' ).split(';')
		# count:
		nfile = len(filename_templates)
		# time stamp in file?
		filedates = self.GetSetting( 'filedate', default=(';'*(nfile-1)) ).split(';')

		# list with time expanded:
		filenames = []
		for ifile in range(len(filename_templates)) :
		# current:
		filename_template = filename_templates[ifile]
		# time?
		if len(filedates[ifile]) > 0 :
		t0 = datetime.datetime.strptime(filedates[ifile].strip(),'%Y-%m-%d')
		else :
		t0 = datetime.datetime.now()
		#endif
		# expand time templates:
		filenames.append( t0.strftime( filename_template.strip() ) )
		#endfor

		# target file template:
		figfile_template = self.GetSetting( 'output.file' )
		# target file:
		t = datetime.datetime.now()
		fig_file = t.strftime( figfile_template )

		# create?
		if (not os.path.isfile(fig_file)) or renew :

		# info ..
		logging.info( indent+'create %s ...' % fig_file )

		# combine tables:
		df = None
		# loop:
		for filename in filenames :
		# check ..
		if not os.path.isfile(filename) :
		logging.error( 'inquire table not found: %s' % filename )
		raise Exception
		#endif
		# info ..
		logging.info( indent+'read inquire table: %s' % filename )
		# read:
		xdf = pandas.read_csv( filename, sep=';', skip_blank_lines=True,
		parse_dates=['start_time','end_time'],
		dtype='str' )
		# combine:
		if df is None :
		df = xdf
		else :
		df = df.append( xdf )
		#endif
		#endfor

		# first ...
		filename = filenames[0]
		# annote:
		title = os.path.basename( filename ).replace('.csv','').replace('_',' ')

		# frequency:
		#freq = 'MS' ; freqlabel = 'month'
		freq = 'W' ; freqlabel = 'week'
		#freq = 'DS' ; freqlabel = 'day'

		# time range:
		t1 = df['start_time'].min()
		t2 = df['start_time'].max()

		# full years, extra space for text:
		t1 = pandas.Timestamp( year=t1.year , month=1, day=1 )
		t2 = max( t2+pandas.Timedelta(180,'days'), pandas.Timestamp( year=t2.year+1, month=1, day=1 ) )
		# annotation:
		#t2x = t2-pandas.Timedelta(50,'days')
		t2x = t2-pandas.Timedelta(20,'days')

		# collections:
		collections = df['collection'].unique()
		collections.sort()

		# streams: 'OFFL', 'RPRO', ..
		#streams = df['processing'].unique()
		#streams.sort()
		streams = ['NRTI','OFFL','RPRO','PAL_']

		# color list for streams:
		colors = {}
		colors['NRTI'] = 'blue'
		colors['OFFL'] = 'orange'
		colors['RPRO'] = 'green'
		colors['PAL_'] = 'lightgreen'

		# procesors: '010101', ...
		procs = df['processor_version'].unique()
		procs.sort()
		# count:
		nproc = len(procs)

		# convert processor labels '010203' to version 'v1.2.3':
		proclabs = {}
		for proc in procs :
		proclabs[proc] = 'v%i.%i.%i' % (int(proc[0:2]),int(proc[2:4]),int(proc[4:6]))
		#endfor

		# storage for handles used for legend:
		streamh = {}

		# previous row:
		#collection_prev = ''
		proc_prev = ''
		# row index in plot:
		irow = 0
		# new:
		fig = plt.figure( figsize=(12,4) )
		ax = fig.add_axes( [0.03,0.07,0.94,0.85] )
		# loop over collections:
		#for collection in collections :
		# ignore collection, this is mainly a change in L1B data ..
		if True :
		# loop over processings:
		for proc in procs :

		# loop:
		for stream in streams :
		## select:
		#df2 = df[ (df['collection' ] == collection) & \
		# (df['processing' ] == stream ) & \
		# (df['processor_version'] == proc ) ]
		# select:
		df2 = df[ (df['processing' ] == stream ) & \
		(df['processor_version'] == proc ) ]
		# any?
		if len(df2) > 0 :

		# next row:
		irow += 1
		# seperation line?
		if (irow > 1) and (proc != proc_prev) :
		# style depends on what is changed:
		if proc[0:2] != proc_prev[0:2] :
		linestyle = '-'
		elif proc[2:4] != proc_prev[2:4] :
		linestyle = '--'
		elif proc[4:6] != proc_prev[4:6] :
		linestyle = ':'
		#endif
		# horizontal line:
		ax.plot( [t1,t2], [irow-0.5,irow-0.5], color='0.5', linestyle=linestyle )
		#endif
		# label?
		if proc != proc_prev :
		# annote:
		proclab = proclabs[proc]
		# add text:
		ax.text( t2x, irow, proclab, horizontalalignment='right', verticalalignment='center' )
		#endif
		## label?
		#if collection != collection_prev :
		# # annote:
		# lab = ' C%i' % int(collection)
		# # add text:
		# ax.text( t2x, irow, lab, color='red', horizontalalignment='left', verticalalignment='center' )
		##endif
		# store for next line:
		#collection_prev = collection
		proc_prev = proc

		# group by month, count number of orbits:
		nn = df2.set_index('start_time').groupby(pandas.Grouper(freq=freq))['orbit'].count()
		# non-zero values:
		xvalues = nn[nn>0].index
		yvalues = numpy.zeros(len(xvalues)) + irow
		# style:
		color = colors[stream]
		style = dict( linestyle='None', marker='s', color=color, markerfacecolor=color )
		# markers:
		p = ax.plot( xvalues, yvalues, **style )

		# store handle for legend:
		if stream not in streamh.keys() : streamh[stream] = p[0]

		#endif # orbits found
		#endfor # streams

		#endfor # processors
		#endfor # streams
		# time axis:
		ax.set_xlim((t1,t2))
		ax.xaxis.set_minor_locator( mdates.MonthLocator() )
		ax.xaxis.set_major_locator( mdates.YearLocator() )
		ax.xaxis.set_major_formatter( mdates.DateFormatter('%Y'))
		ax.grid(axis='x')
		# y-axis:
		ax.set_yticks([])
		ax.set_ylim([0.5,irow+0.5])
		# annote:
		ax.set_title( title )
		# legend for streams:
		labs = []
		hh = []
		for stream in streams :
		if stream in streamh.keys() :
		labs.append( stream )
		hh.append( streamh[stream] )
		#endif
		#endfor
		ax.legend( hh, labs, loc='upper left' )

		# save:
		fig.savefig( fig_file )

		else :

		# info ..
		logging.info( indent+'keep %s ...' % fig_file )

		#endif # renew

		# info ...
		logging.info( indent+'' )
		logging.info( indent+'** end inquire plot' )
		logging.info( indent+'' )

		#enddef __init__

		#endclass CSO_Inquire_Plot



		########################################################################
		###
		### end
		###
		########################################################################

py/cso_scihub.py

+876 −75

File changed.

Preview size limit exceeded, changes collapsed.