Updated computation and plotting of gridded means. (2a495433) · Commits · CAMS / CSO

src/cso/cso_catalogue.py

+124 −83

Original line number	Diff line number	Diff line
		@@ -30,6 +30,10 @@
		# 2025-04, Arjo Segers
		# Changed imports for python packaging.
		#
		# 2025-09, Arjo Segers
		# Updated plotting of gridded fields.
		# Extended unit conversions.
		#


		########################################################################
		@@ -132,7 +136,7 @@ class CSO_CatalogueBase(utopya.UtopyaRc):
		# which conversion?
		conversion = "%s -> %s" % (attrs["units"], vunits)
		# switch:
		if conversion in ["1e-9 -> ppb"]:
		if conversion in ["1e-9 -> ppb","mol m-2 -> mol/m2"]:
		# just other notation ...
		pass

		@@ -439,20 +443,20 @@ class CSO_Catalogue(CSO_CatalogueBase):
		# settings for this variable:
		vkey = "var.%s" % varname
		# plot type:
		ptype = self.GetSetting(vkey + ".type", default="map")
		ptype = self.GetSetting("var.{varname}.type", default="map")
		# input variable:
		vname = self.GetSetting(vkey + ".source", default=varname)
		vname = self.GetSetting("var.{varname}.source", default=varname)
		# target units:
		vunits = self.GetSetting(vkey + ".units", default="None")
		vunits = self.GetSetting("var.{varname}.units", default="None")
		# long name used in labels:
		long_name = self.GetSetting(vkey + ".long_name", default=varname)
		long_name = self.GetSetting("var.{varname}.long_name", default=varname)

		# switch:
		if ptype == "map":
		# style:
		vmin = eval(self.GetSetting(vkey + ".vmin", default="None"))
		vmax = eval(self.GetSetting(vkey + ".vmax", default="None"))
		colors = eval(self.GetSetting(vkey + ".colors", default="None"))
		vmin = eval(self.GetSetting("var.{varname}.vmin", default="None"))
		vmax = eval(self.GetSetting("var.{varname}.vmax", default="None"))
		colors = eval(self.GetSetting("var.{varname}.colors", default="None"))

		# extract corner grids and values:
		xx, yy, values, attrs = orb.GetTrack(vname)
		@@ -784,20 +788,20 @@ class CSO_SimCatalogue(CSO_CatalogueBase):
		# settings for this variable:
		vkey = "var.%s" % varname
		# originating variable:
		vsource = self.GetSetting(vkey + ".source", default="data:" + varname)
		vsource = self.GetSetting("var.{varname}.source", default="data:" + varname)
		# target units:
		vunits = self.GetSetting(vkey + ".units", default="None")
		vunits = self.GetSetting("var.{varname}.units", default="None")
		# plot type:
		ptype = self.GetSetting(vkey + ".type", default="map")
		ptype = self.GetSetting("var.{varname}.type", default="map")
		# long name used in labels:
		long_name = self.GetSetting(vkey + ".long_name", default=varname)
		long_name = self.GetSetting("var.{varname}.long_name", default=varname)

		# switch:
		if ptype == "map":
		# style:
		vmin = eval(self.GetSetting(vkey + ".vmin", default="None"))
		vmax = eval(self.GetSetting(vkey + ".vmax", default="None"))
		colors = eval(self.GetSetting(vkey + ".colors", default="None"))
		vmin = eval(self.GetSetting("var.{varname}.vmin", default="None"))
		vmax = eval(self.GetSetting("var.{varname}.vmax", default="None"))
		colors = eval(self.GetSetting("var.{varname}.colors", default="None"))

		# variable source:
		# [data:]vname
		@@ -940,36 +944,35 @@ class CSO_GriddedCatalogue(CSO_CatalogueBase):
		! step is one of: hour \| day \| month
		<rcbase>.timerange.step : hour

		The time range is used to scan for output files from the :py:class:`CSO_GriddedAverage` class::

		<rcbase>.input.file : /Scratch/model/output/CSO_output_%Y%m%d_%H%M_gridded.nc

		Specify a list of variables to be plotted, for example the retrieved and simulated column::
		Specify a list of variables to be plotted, for example the retrieved and simulated column;
		also the difference between two variables is supported::

		<rcbase>.vars : yr ys
		<rcbase>.vars : yr ys yr-ys

		For each variable, define the name of the source variable::
		For each variable (individual, or the variables to be subtraced from eachother),
		define the name of the input file(s) and the source variable::

		<rcbase>.var.yr.source : yr
		<rcbase>.var.yr.source : ys
		<rcbase>.var.yr.input.file : /Scratch/model/output/CSO_output_%Y%m%d_%H%M_gridded.nc
		<rcbase>.var.yr.input.var : yr

		Optionally specify a source layer for profiles, default is first layer::

		<rcbase>.var.yr.source_layer : 0

		Optionally specify target units that are different from the input::
		Optionally specify target units, default is the units of the input variable::

		! convert units:
		<rcbase>.var.yr.units : 1e15 mlc/cm2
		<rcbase>.var.yr.units : umol/m2

		The value range of the colorbar could be tunes using (default limits are based on data values)::

		<rcbase>.var.vcd.vmin : 0.0
		<rcbase>.var.vcd.vmax : 10.0
		<rcbase>.var.yr.vmin : 0.0
		<rcbase>.var.yr.vmax : 10.0

		The colors in the colorbar could be changed using::
		The colors in the colorbar could be changed using a line that is evaluated into a ``dict()``;
		see :py:meth:`cso_plot.Set_ColorMap` method for supported elements::

		<rcbase>.var.qa_flag.colors : ['red','yellow','green']
		<rcbase>.var.qa_flag.cmap : color=['red','yellow','green'], ncolor=9, color_under='maroon'

		The label below the colorbar will by default show the variable name,
		unless a ``long_name`` is defined::
		@@ -978,7 +981,8 @@ class CSO_GriddedCatalogue(CSO_CatalogueBase):

		The name of the created image files is read from::

		! target files, time tempates are replaced:
		! target files, time tempates are replaced,
		! use '%{var}' for the variable name:
		<rcbase>.output.file : %Y/%m/%d/S5p_RPRO_NO2_%Y%m%d_%H%M_gridded_%{var}.png

		which will give::
		@@ -997,6 +1001,12 @@ class CSO_GriddedCatalogue(CSO_CatalogueBase):
		! figure size (inches), default is (8,6):
		<rcbase>.figsize : (6,6)

		Optional settings to setup the map plot, these are used to fill the ``bmp`` dict
		that is passed to :py:meth:`cso_plot.QuickPat`::

		! map properties:
		domain=[-30,90,30,75]

		"""

		def __init__(self, rcfile, rcbase="", env={}, indent=""):
		@@ -1083,18 +1093,6 @@ class CSO_GriddedCatalogue(CSO_CatalogueBase):
		raise Exception
		# endif

		# input file:
		infile = self.GetSetting("input.file")
		infile = t.strftime(infile)

		# input file present?
		if os.path.isfile(infile):
		# info ...
		logging.info(indent + " found input file: %s" % infile)

		# no data read yet:
		indata = None

		# loop over variables ...
		for varname in varnames:
		# target file:
		@@ -1106,41 +1104,79 @@ class CSO_GriddedCatalogue(CSO_CatalogueBase):
		# info ..
		logging.info(indent + " create %s ..." % figfile)

		# read?
		if indata is None:
		# input file(s):
		if "-" in varname:
		# split:
		varname1,varname2 = varname.split("-")
		# first input file:
		infile1 = self.GetSetting(f"var.{varname1}.input.file")
		infile1 = t.strftime(infile1)
		# second input file:
		infile2 = self.GetSetting(f"var.{varname2}.input.file")
		infile2 = t.strftime(infile2)
		else:
		# copy:
		varname1 = varname
		# input file:
		infile1 = self.GetSetting(f"var.{varname}.input.file")
		infile1 = t.strftime(infile1)
		# no second file
		infile2 = None
		#endif
		# input file present?
		if os.path.isfile(infile1):
		# info ...
		logging.info(indent + " read ...")
		logging.info(f"{indent} found input file: {infile1}")

		# info ...
		logging.info(f"{indent} read ...")
		# init storage and read:
		indata = cso_file.CSO_File(filename=infile)
		indata1 = cso_file.CSO_File(filename=infile1)
		# second file?
		if infile2 is not None:
		indata2 = cso_file.CSO_File(filename=infile2)
		#endif

		# annote:
		title = long_timestamp

		# settings for this variable:
		vkey = "var.%s" % varname
		# originating variable:
		vsource = self.GetSetting(vkey + ".source")
		vsource1 = self.GetSetting(f"var.{varname1}.input.var")
		if infile2 is not None:
		vsource2 = self.GetSetting(f"var.{varname2}.input.var")
		# optional layer selection:
		vsource_layer = self.GetSetting(
		vkey + ".source_layer", totype="int", default=0
		"var.{varname1}.source_layer", totype="int", default=0
		)
		# target units:
		vunits = self.GetSetting(vkey + ".units", default="None")
		vunits = self.GetSetting(f"var.{varname}.units", default="None")
		# plot type:
		ptype = self.GetSetting(vkey + ".type", default="map")
		ptype = self.GetSetting(f"var.{varname}.type", default="map")
		# long name used in labels:
		long_name = self.GetSetting(vkey + ".long_name", default=varname)
		long_name = self.GetSetting(f"var.{varname}.long_name" )#, default=varname)

		# switch:
		if ptype == "map":
		# style:
		vmin = eval(self.GetSetting(vkey + ".vmin", default="None"))
		vmax = eval(self.GetSetting(vkey + ".vmax", default="None"))
		colors = eval(self.GetSetting(vkey + ".colors", default="None"))
		vmin = eval(self.GetSetting(f"var.{varname}.vmin", default="None"))
		vmax = eval(self.GetSetting(f"var.{varname}.vmax", default="None"))

		# colormap:
		#~ older setting:
		colors = self.GetSetting(f"var.{varname}.colors", default="None")
		if colors != "None":
		cmap = eval( f"dict( colors={colors}" )
		else:
		cmap = self.GetSetting(f"var.{varname}.cmap", totype="dict", default="{}")
		#endif

		# extract values:
		vda = indata.ds[vsource]
		if infile2 is not None:
		vda = indata1.ds[vsource1] - indata2.ds[vsource2]
		vda.attrs = indata1.ds[vsource1].attrs
		else:
		vda = indata1.ds[vsource1]
		#endif

		# grid:
		if "longitude" in vda.coords.keys():
		@@ -1183,22 +1219,27 @@ class CSO_GriddedCatalogue(CSO_CatalogueBase):
		values, units = self._ConvertUnits(values, vda.attrs, vunits)

		# annote:
		label = "%s [%s]" % (long_name, units)
		label = f"{long_name} [{units}]"

		# map properties:
		bmp = dict(countries=True)
		bmp.update(bmp_kwargs)

		# colorbar properties:
		cbar = dict(label=label)
		if "ncolor" in cmap :
		cbar["ntick"] = cmap["ncolor"]

		# create map figure:
		fig = cso_plot.QuickPat(
		values,
		x=xm,
		y=ym,
		xm=xm,
		ym=ym,
		vmin=vmin,
		vmax=vmax,
		cmap=dict(colors=colors),
		cmap=cmap,
		bmp=bmp,
		cbar=dict(label=label),
		cbar=cbar,
		title=title,
		figsize=figsize,
		)
		@@ -1209,7 +1250,7 @@ class CSO_GriddedCatalogue(CSO_CatalogueBase):
		fig.Close()

		else:
		logging.error('unsupported plot type "%s"' % (ptype))
		logging.error(f"unsupported plot type '{ptype}'")
		raise Exception
		# endif # ptype

		@@ -1218,12 +1259,12 @@ class CSO_GriddedCatalogue(CSO_CatalogueBase):
		## testing ...
		# break

		# endfor # variables

		else:
		logging.info(indent + " no input file: %s" % infile)
		logging.info(f"{indent} no input file: {infile}")
		# endif # state file present

		# endfor # variables

		# next:
		t = t + dt

src/cso/cso_gridded.py

+199 −163

Original line number	Diff line number	Diff line
		@@ -37,6 +37,9 @@
		# 2025-04, Arjo Segers
		# Changed imports for python packaging.
		#
		# 2025-09, Arjo Segers
		# Updated calculation of temporal means for files with time records.
		#


		########################################################################
		@@ -1155,8 +1158,15 @@ class CSO_GriddedAverageMeans(utopya.UtopyaRc):
		! time resolutions: daily, monthly, yearly
		<rcbase>.resolutions : daily monthly yearly

		For each of these resolutions, specify a filename pattern for the input files,
		and a template for the output file.
		A number of files could be proccessed in one call
		Define keywords for these files, for example the file with observations
		and the one with simulations::

		! output files to be processed:
		<rcbase>.files : obs sim

		For each of the resolutions and for each of the file keywords,
		specify a filename pattern for the input files, and a template for the output file.
		Both should have templates to evaluate the target time a temporal mean,
		for example ``%%Y%%m%%d`` for a specific day.
		The input pattern should use ``*`` or ``?`` patterns to denote the higher time resolutions.
		@@ -1164,21 +1174,21 @@ class CSO_GriddedAverageMeans(utopya.UtopyaRc):
		and an output pattern for a daily file::

		! daily means from hourly files:
		<rcbase>.resolution.daily.input.file : /work/gridded/europe/%Y/%m/CSO_gridded_%Y%m%d_????_S5p-glyox.nc
		<rcbase>.resolution.daily.output.file : /work/gridded/europe/daily/CSO_gridded_%Y%m%d_S5p-glyox.nc
		<rcbase>.resolution.daily.obs.input.file : /work/gridded/europe/%Y/%m/CSO_gridded_%Y%m%d_????_S5p-glyox.nc
		<rcbase>.resolution.daily.obs.output.file : /work/gridded/europe/daily/CSO_gridded_%Y%m%d_S5p-glyox.nc

		Similar specification should be provided for ``monthly`` and ``yearly`` resolutions.
		To speedup calculations, the ``monthly`` means could use the ``daily`` means as input::

		! monthly means from daily files:
		<rcbase>.resolution.monthly.input.file : /work/gridded/europe/daily/CSO_gridded_%Y%m??_S5p-glyox.nc
		<rcbase>.resolution.monthly.output.file : /work/gridded/europe/monthly/CSO_gridded_%Y%m_S5p-glyox.nc
		<rcbase>.resolution.monthly.obs.input.file : /work/gridded/europe/daily/CSO_gridded_%Y%m??_S5p-glyox.nc
		<rcbase>.resolution.monthly.obs.output.file : /work/gridded/europe/monthly/CSO_gridded_%Y%m_S5p-glyox.nc

		and similar the ``yearly`` means could use the ``monthly`` means as input::

		! yearly means from monthly files
		<rcbase>.resolution.yearly.input.file : /work/gridded/europe/monthly/CSO_gridded_%Y??_S5p-glyox.nc
		<rcbase>.resolution.yearly.output.file : /work/gridded/europe/yearly/CSO_gridded_%Y_S5p-glyox.nc
		<rcbase>.resolution.yearly.obs.input.file : /work/gridded/europe/monthly/CSO_gridded_%Y??_S5p-glyox.nc
		<rcbase>.resolution.yearly.obs.output.file : /work/gridded/europe/yearly/CSO_gridded_%Y_S5p-glyox.nc

		To reduce file size, by default all variables are written to file as short-integers (2 bytes)
		accompanied by ``add_offset`` and ``scale_factor`` attributes.
		@@ -1241,49 +1251,65 @@ class CSO_GriddedAverageMeans(utopya.UtopyaRc):
		# renew?
		renew = self.GetSetting("renew", totype="bool")

		# keys to process multiple files:
		filekeys = self.GetSetting("files").split()

		# resolutions to be created:
		resolutions = self.GetSetting("resolutions").split()
		# loop:
		for resolution in resolutions:
		# info ...
		logging.info(indent + 'time resoloution "%s" ...' % resolution)
		logging.info(f"{indent}time resoloution '{resolution}' ...")

		# time values:
		if resolution == "daily":
		tt = pandas.date_range(t1, t2, freq="D")
		xtt1 = pandas.date_range(t1, t2, freq="D")
		dt = pandas.Timedelta(days=1)
		xtt2 = pandas.date_range(t1 + dt, t2 + dt, freq="D")
		tfmt = "%Y-%m-%d"
		elif resolution == "monthly":
		tt = pandas.date_range(t1, t2, freq="M")
		xtt1 = pandas.date_range(t1, t2, freq="MS")
		xtt2 = pandas.date_range(t1, t2, freq="M") + pandas.Timedelta(days=1)
		tfmt = "%Y-%m"
		elif resolution == "yearly":
		tt = pandas.date_range(t1, t2, freq="Y")
		xtt1 = pandas.date_range(t1, t2, freq="YS")
		xtt2 = pandas.date_range(t1, t2, freq="Y")
		tfmt = "%Y"
		else:
		logging.error('unsupported time resolution "%s"' % resolution)
		raise Exception
		# endif

		# loop over files:
		for filekey in filekeys:
		# info ...
		logging.info(f"{indent} filekey '{filekey}' ...")

		# pattern for input files:
		input_files = self.GetSetting("resolution.%s.input.file" % resolution)
		input_files = self.GetSetting(f"resolution.{resolution}.{filekey}.input.file")
		# output file:
		output_file = self.GetSetting("resolution.%s.output.file" % resolution)
		output_file = self.GetSetting(f"resolution.{resolution}.{filekey}.output.file")

		# info ..
		logging.info(f"{indent} time loop ...")
		# loop:
		for t in tt:
		for irec in range(len(xtt1)):
		# current interval:
		xt1 = xtt1[irec]
		xt2 = xtt2[irec]
		# info ..
		logging.info(f"{indent} %s .." % t.strftime(tfmt))
		logging.info(f"{indent} {xt1.strftime(tfmt)} .." )
		logging.info(f"{indent} collect samples in [{xt1},{xt2})")

		# target file:
		outfile = t.strftime(output_file)
		outfile = xt1.strftime(output_file)
		# renew?
		if (not os.path.isfile(outfile)) or renew:
		# info ...
		logging.info(f"{indent} create %s ..." % outfile)

		# search pattern for input files:
		fpattern = t.strftime(input_files)
		fpattern = xt1.strftime(input_files)
		# list:
		fnames = glob.glob(fpattern)
		# check ...
		@@ -1296,18 +1322,23 @@ class CSO_GriddedAverageMeans(utopya.UtopyaRc):
		ds = xarray.open_mfdataset(fnames)
		# info ...
		logging.info(
		indent
		+ " read %i records with time range [%s,%s]"
		% (
		len(ds["time"]),
		ds["time"].values.min(),
		ds["time"].values.max(),
		)
		f"{indent} read {len(ds['time'])} records with" +
		f" time range [{ds['time'].values.min()},{ds['time'].values.max()}]"
		)
		# shape:
		nlon = ds.dims["longitude"]
		nlat = ds.dims["latitude"]

		# select time records:
		iit, = numpy.where( (ds["time"] >= xt1) & (ds["time"] < xt2) )
		# check ..
		if len(iit) == 0:
		logging.error(f"no input records found withing sampling time range")
		raise Exception
		#endif
		# info ...
		logging.info(f"{indent} selected {len(iit)} records ...")

		# update history list:
		history = []
		history.append(
		@@ -1325,7 +1356,7 @@ class CSO_GriddedAverageMeans(utopya.UtopyaRc):
		# endif
		# new time coordinate:
		coords["time"] = xarray.DataArray(
		[t], dims=("time"), attrs={"standard_name": "time"}
		[xt1], dims=("time"), attrs={"standard_name": "time"}
		)
		coords["time"].encoding["units"] = tunits

		@@ -1372,7 +1403,7 @@ class CSO_GriddedAverageMeans(utopya.UtopyaRc):
		if vname in ["cell_area", "pixel_area"]:
		continue
		# info ...
		logging.info(indent + ' variable "%s" ..' % vname)
		logging.info(f" variable '{vname}' ...")

		# convert to masked array to avoid warnings about all-nan series:
		values_in = numpy.ma.array(
		@@ -1386,7 +1417,7 @@ class CSO_GriddedAverageMeans(utopya.UtopyaRc):
		if len(shp) == 3:
		# weighted average, convert output to array with nans:
		values[0, :, :] = numpy.ma.average(
		values_in, axis=0, weights=pixel_area
		values_in[iit,:,:], axis=0, weights=pixel_area[iit,:,:]
		).filled(fill_value=numpy.nan)
		#
		elif len(shp) == 4:
		@@ -1396,7 +1427,7 @@ class CSO_GriddedAverageMeans(utopya.UtopyaRc):
		for iv in range(shp[3]):
		# weighted average, convert output to array with nans:
		values[0, :, :, iv] = numpy.ma.average(
		values_in[:, :, :, iv], axis=0, weights=pixel_area
		values_in[iit, :, :, iv], axis=0, weights=pixel_area[iit,:,:]
		).filled(fill_value=numpy.nan)
		# endif
		#
		@@ -1430,14 +1461,19 @@ class CSO_GriddedAverageMeans(utopya.UtopyaRc):
		# logging.warning( 'break after first created file' )
		# break

		# endif # create
		else:
		# info ...
		logging.info(f"{indent} keep {outfile} ...")
		# endif # create target file?

		# endfor # time
		# endfor # target time record

		## testing ..
		# logging.warning( 'break after first resolution' )
		# break

		#endfor # timekeys

		# endfor # resolution

		# info ...

src/cso/cso_plot.py

+7 −1

File changed.

Preview size limit exceeded, changes collapsed.