Loading src/cso/cso_catalogue.py +124 −83 Original line number Diff line number Diff line Loading @@ -30,6 +30,10 @@ # 2025-04, Arjo Segers # Changed imports for python packaging. # # 2025-09, Arjo Segers # Updated plotting of gridded fields. # Extended unit conversions. # ######################################################################## Loading Loading @@ -132,7 +136,7 @@ class CSO_CatalogueBase(utopya.UtopyaRc): # which conversion? conversion = "%s -> %s" % (attrs["units"], vunits) # switch: if conversion in ["1e-9 -> ppb"]: if conversion in ["1e-9 -> ppb","mol m-2 -> mol/m2"]: # just other notation ... pass Loading Loading @@ -439,20 +443,20 @@ class CSO_Catalogue(CSO_CatalogueBase): # settings for this variable: vkey = "var.%s" % varname # plot type: ptype = self.GetSetting(vkey + ".type", default="map") ptype = self.GetSetting("var.{varname}.type", default="map") # input variable: vname = self.GetSetting(vkey + ".source", default=varname) vname = self.GetSetting("var.{varname}.source", default=varname) # target units: vunits = self.GetSetting(vkey + ".units", default="None") vunits = self.GetSetting("var.{varname}.units", default="None") # long name used in labels: long_name = self.GetSetting(vkey + ".long_name", default=varname) long_name = self.GetSetting("var.{varname}.long_name", default=varname) # switch: if ptype == "map": # style: vmin = eval(self.GetSetting(vkey + ".vmin", default="None")) vmax = eval(self.GetSetting(vkey + ".vmax", default="None")) colors = eval(self.GetSetting(vkey + ".colors", default="None")) vmin = eval(self.GetSetting("var.{varname}.vmin", default="None")) vmax = eval(self.GetSetting("var.{varname}.vmax", default="None")) colors = eval(self.GetSetting("var.{varname}.colors", default="None")) # extract corner grids and values: xx, yy, values, attrs = orb.GetTrack(vname) Loading Loading @@ -784,20 +788,20 @@ class CSO_SimCatalogue(CSO_CatalogueBase): # settings for this variable: vkey = "var.%s" % varname # originating variable: vsource = self.GetSetting(vkey + ".source", default="data:" + varname) vsource = self.GetSetting("var.{varname}.source", default="data:" + varname) # target units: vunits = self.GetSetting(vkey + ".units", default="None") vunits = self.GetSetting("var.{varname}.units", default="None") # plot type: ptype = self.GetSetting(vkey + ".type", default="map") ptype = self.GetSetting("var.{varname}.type", default="map") # long name used in labels: long_name = self.GetSetting(vkey + ".long_name", default=varname) long_name = self.GetSetting("var.{varname}.long_name", default=varname) # switch: if ptype == "map": # style: vmin = eval(self.GetSetting(vkey + ".vmin", default="None")) vmax = eval(self.GetSetting(vkey + ".vmax", default="None")) colors = eval(self.GetSetting(vkey + ".colors", default="None")) vmin = eval(self.GetSetting("var.{varname}.vmin", default="None")) vmax = eval(self.GetSetting("var.{varname}.vmax", default="None")) colors = eval(self.GetSetting("var.{varname}.colors", default="None")) # variable source: # [data:]vname Loading Loading @@ -940,36 +944,35 @@ class CSO_GriddedCatalogue(CSO_CatalogueBase): ! step is one of: hour | day | month <rcbase>.timerange.step : hour The time range is used to scan for output files from the :py:class:`CSO_GriddedAverage` class:: <rcbase>.input.file : /Scratch/model/output/CSO_output_%Y%m%d_%H%M_gridded.nc Specify a list of variables to be plotted, for example the retrieved and simulated column:: Specify a list of variables to be plotted, for example the retrieved and simulated column; also the difference between two variables is supported:: <rcbase>.vars : yr ys <rcbase>.vars : yr ys yr-ys For each variable, define the name of the source variable:: For each variable (individual, or the variables to be subtraced from eachother), define the name of the input file(s) and the source variable:: <rcbase>.var.yr.source : yr <rcbase>.var.yr.source : ys <rcbase>.var.yr.input.file : /Scratch/model/output/CSO_output_%Y%m%d_%H%M_gridded.nc <rcbase>.var.yr.input.var : yr Optionally specify a source layer for profiles, default is first layer:: <rcbase>.var.yr.source_layer : 0 Optionally specify target units that are different from the input:: Optionally specify target units, default is the units of the input variable:: ! convert units: <rcbase>.var.yr.units : 1e15 mlc/cm2 <rcbase>.var.yr.units : umol/m2 The value range of the colorbar could be tunes using (default limits are based on data values):: <rcbase>.var.vcd.vmin : 0.0 <rcbase>.var.vcd.vmax : 10.0 <rcbase>.var.yr.vmin : 0.0 <rcbase>.var.yr.vmax : 10.0 The colors in the colorbar could be changed using:: The colors in the colorbar could be changed using a line that is evaluated into a ``dict()``; see :py:meth:`cso_plot.Set_ColorMap` method for supported elements:: <rcbase>.var.qa_flag.colors : ['red','yellow','green'] <rcbase>.var.qa_flag.cmap : color=['red','yellow','green'], ncolor=9, color_under='maroon' The label below the colorbar will by default show the variable name, unless a ``long_name`` is defined:: Loading @@ -978,7 +981,8 @@ class CSO_GriddedCatalogue(CSO_CatalogueBase): The name of the created image files is read from:: ! target files, time tempates are replaced: ! target files, time tempates are replaced, ! use '%{var}' for the variable name: <rcbase>.output.file : %Y/%m/%d/S5p_RPRO_NO2_%Y%m%d_%H%M_gridded_%{var}.png which will give:: Loading @@ -997,6 +1001,12 @@ class CSO_GriddedCatalogue(CSO_CatalogueBase): ! figure size (inches), default is (8,6): <rcbase>.figsize : (6,6) Optional settings to setup the map plot, these are used to fill the ``bmp`` dict that is passed to :py:meth:`cso_plot.QuickPat`:: ! map properties: domain=[-30,90,30,75] """ def __init__(self, rcfile, rcbase="", env={}, indent=""): Loading Loading @@ -1083,18 +1093,6 @@ class CSO_GriddedCatalogue(CSO_CatalogueBase): raise Exception # endif # input file: infile = self.GetSetting("input.file") infile = t.strftime(infile) # input file present? if os.path.isfile(infile): # info ... logging.info(indent + " found input file: %s" % infile) # no data read yet: indata = None # loop over variables ... for varname in varnames: # target file: Loading @@ -1106,41 +1104,79 @@ class CSO_GriddedCatalogue(CSO_CatalogueBase): # info .. logging.info(indent + " create %s ..." % figfile) # read? if indata is None: # input file(s): if "-" in varname: # split: varname1,varname2 = varname.split("-") # first input file: infile1 = self.GetSetting(f"var.{varname1}.input.file") infile1 = t.strftime(infile1) # second input file: infile2 = self.GetSetting(f"var.{varname2}.input.file") infile2 = t.strftime(infile2) else: # copy: varname1 = varname # input file: infile1 = self.GetSetting(f"var.{varname}.input.file") infile1 = t.strftime(infile1) # no second file infile2 = None #endif # input file present? if os.path.isfile(infile1): # info ... logging.info(indent + " read ...") logging.info(f"{indent} found input file: {infile1}") # info ... logging.info(f"{indent} read ...") # init storage and read: indata = cso_file.CSO_File(filename=infile) indata1 = cso_file.CSO_File(filename=infile1) # second file? if infile2 is not None: indata2 = cso_file.CSO_File(filename=infile2) #endif # annote: title = long_timestamp # settings for this variable: vkey = "var.%s" % varname # originating variable: vsource = self.GetSetting(vkey + ".source") vsource1 = self.GetSetting(f"var.{varname1}.input.var") if infile2 is not None: vsource2 = self.GetSetting(f"var.{varname2}.input.var") # optional layer selection: vsource_layer = self.GetSetting( vkey + ".source_layer", totype="int", default=0 "var.{varname1}.source_layer", totype="int", default=0 ) # target units: vunits = self.GetSetting(vkey + ".units", default="None") vunits = self.GetSetting(f"var.{varname}.units", default="None") # plot type: ptype = self.GetSetting(vkey + ".type", default="map") ptype = self.GetSetting(f"var.{varname}.type", default="map") # long name used in labels: long_name = self.GetSetting(vkey + ".long_name", default=varname) long_name = self.GetSetting(f"var.{varname}.long_name" )#, default=varname) # switch: if ptype == "map": # style: vmin = eval(self.GetSetting(vkey + ".vmin", default="None")) vmax = eval(self.GetSetting(vkey + ".vmax", default="None")) colors = eval(self.GetSetting(vkey + ".colors", default="None")) vmin = eval(self.GetSetting(f"var.{varname}.vmin", default="None")) vmax = eval(self.GetSetting(f"var.{varname}.vmax", default="None")) # colormap: #~ older setting: colors = self.GetSetting(f"var.{varname}.colors", default="None") if colors != "None": cmap = eval( f"dict( colors={colors}" ) else: cmap = self.GetSetting(f"var.{varname}.cmap", totype="dict", default="{}") #endif # extract values: vda = indata.ds[vsource] if infile2 is not None: vda = indata1.ds[vsource1] - indata2.ds[vsource2] vda.attrs = indata1.ds[vsource1].attrs else: vda = indata1.ds[vsource1] #endif # grid: if "longitude" in vda.coords.keys(): Loading Loading @@ -1183,22 +1219,27 @@ class CSO_GriddedCatalogue(CSO_CatalogueBase): values, units = self._ConvertUnits(values, vda.attrs, vunits) # annote: label = "%s [%s]" % (long_name, units) label = f"{long_name} [{units}]" # map properties: bmp = dict(countries=True) bmp.update(bmp_kwargs) # colorbar properties: cbar = dict(label=label) if "ncolor" in cmap : cbar["ntick"] = cmap["ncolor"] # create map figure: fig = cso_plot.QuickPat( values, x=xm, y=ym, xm=xm, ym=ym, vmin=vmin, vmax=vmax, cmap=dict(colors=colors), cmap=cmap, bmp=bmp, cbar=dict(label=label), cbar=cbar, title=title, figsize=figsize, ) Loading @@ -1209,7 +1250,7 @@ class CSO_GriddedCatalogue(CSO_CatalogueBase): fig.Close() else: logging.error('unsupported plot type "%s"' % (ptype)) logging.error(f"unsupported plot type '{ptype}'") raise Exception # endif # ptype Loading @@ -1218,12 +1259,12 @@ class CSO_GriddedCatalogue(CSO_CatalogueBase): ## testing ... # break # endfor # variables else: logging.info(indent + " no input file: %s" % infile) logging.info(f"{indent} no input file: {infile}") # endif # state file present # endfor # variables # next: t = t + dt Loading src/cso/cso_gridded.py +199 −163 Original line number Diff line number Diff line Loading @@ -37,6 +37,9 @@ # 2025-04, Arjo Segers # Changed imports for python packaging. # # 2025-09, Arjo Segers # Updated calculation of temporal means for files with time records. # ######################################################################## Loading Loading @@ -1155,8 +1158,15 @@ class CSO_GriddedAverageMeans(utopya.UtopyaRc): ! time resolutions: daily, monthly, yearly <rcbase>.resolutions : daily monthly yearly For each of these resolutions, specify a filename pattern for the input files, and a template for the output file. A number of files could be proccessed in one call Define keywords for these files, for example the file with observations and the one with simulations:: ! output files to be processed: <rcbase>.files : obs sim For each of the resolutions and for each of the file keywords, specify a filename pattern for the input files, and a template for the output file. Both should have templates to evaluate the target time a temporal mean, for example ``%%Y%%m%%d`` for a specific day. The input pattern should use ``*`` or ``?`` patterns to denote the higher time resolutions. Loading @@ -1164,21 +1174,21 @@ class CSO_GriddedAverageMeans(utopya.UtopyaRc): and an output pattern for a daily file:: ! daily means from hourly files: <rcbase>.resolution.daily.input.file : /work/gridded/europe/%Y/%m/CSO_gridded_%Y%m%d_????_S5p-glyox.nc <rcbase>.resolution.daily.output.file : /work/gridded/europe/daily/CSO_gridded_%Y%m%d_S5p-glyox.nc <rcbase>.resolution.daily.obs.input.file : /work/gridded/europe/%Y/%m/CSO_gridded_%Y%m%d_????_S5p-glyox.nc <rcbase>.resolution.daily.obs.output.file : /work/gridded/europe/daily/CSO_gridded_%Y%m%d_S5p-glyox.nc Similar specification should be provided for ``monthly`` and ``yearly`` resolutions. To speedup calculations, the ``monthly`` means could use the ``daily`` means as input:: ! monthly means from daily files: <rcbase>.resolution.monthly.input.file : /work/gridded/europe/daily/CSO_gridded_%Y%m??_S5p-glyox.nc <rcbase>.resolution.monthly.output.file : /work/gridded/europe/monthly/CSO_gridded_%Y%m_S5p-glyox.nc <rcbase>.resolution.monthly.obs.input.file : /work/gridded/europe/daily/CSO_gridded_%Y%m??_S5p-glyox.nc <rcbase>.resolution.monthly.obs.output.file : /work/gridded/europe/monthly/CSO_gridded_%Y%m_S5p-glyox.nc and similar the ``yearly`` means could use the ``monthly`` means as input:: ! yearly means from monthly files <rcbase>.resolution.yearly.input.file : /work/gridded/europe/monthly/CSO_gridded_%Y??_S5p-glyox.nc <rcbase>.resolution.yearly.output.file : /work/gridded/europe/yearly/CSO_gridded_%Y_S5p-glyox.nc <rcbase>.resolution.yearly.obs.input.file : /work/gridded/europe/monthly/CSO_gridded_%Y??_S5p-glyox.nc <rcbase>.resolution.yearly.obs.output.file : /work/gridded/europe/yearly/CSO_gridded_%Y_S5p-glyox.nc To reduce file size, by default all variables are written to file as short-integers (2 bytes) accompanied by ``add_offset`` and ``scale_factor`` attributes. Loading Loading @@ -1241,49 +1251,65 @@ class CSO_GriddedAverageMeans(utopya.UtopyaRc): # renew? renew = self.GetSetting("renew", totype="bool") # keys to process multiple files: filekeys = self.GetSetting("files").split() # resolutions to be created: resolutions = self.GetSetting("resolutions").split() # loop: for resolution in resolutions: # info ... logging.info(indent + 'time resoloution "%s" ...' % resolution) logging.info(f"{indent}time resoloution '{resolution}' ...") # time values: if resolution == "daily": tt = pandas.date_range(t1, t2, freq="D") xtt1 = pandas.date_range(t1, t2, freq="D") dt = pandas.Timedelta(days=1) xtt2 = pandas.date_range(t1 + dt, t2 + dt, freq="D") tfmt = "%Y-%m-%d" elif resolution == "monthly": tt = pandas.date_range(t1, t2, freq="M") xtt1 = pandas.date_range(t1, t2, freq="MS") xtt2 = pandas.date_range(t1, t2, freq="M") + pandas.Timedelta(days=1) tfmt = "%Y-%m" elif resolution == "yearly": tt = pandas.date_range(t1, t2, freq="Y") xtt1 = pandas.date_range(t1, t2, freq="YS") xtt2 = pandas.date_range(t1, t2, freq="Y") tfmt = "%Y" else: logging.error('unsupported time resolution "%s"' % resolution) raise Exception # endif # loop over files: for filekey in filekeys: # info ... logging.info(f"{indent} filekey '{filekey}' ...") # pattern for input files: input_files = self.GetSetting("resolution.%s.input.file" % resolution) input_files = self.GetSetting(f"resolution.{resolution}.{filekey}.input.file") # output file: output_file = self.GetSetting("resolution.%s.output.file" % resolution) output_file = self.GetSetting(f"resolution.{resolution}.{filekey}.output.file") # info .. logging.info(f"{indent} time loop ...") # loop: for t in tt: for irec in range(len(xtt1)): # current interval: xt1 = xtt1[irec] xt2 = xtt2[irec] # info .. logging.info(f"{indent} %s .." % t.strftime(tfmt)) logging.info(f"{indent} {xt1.strftime(tfmt)} .." ) logging.info(f"{indent} collect samples in [{xt1},{xt2})") # target file: outfile = t.strftime(output_file) outfile = xt1.strftime(output_file) # renew? if (not os.path.isfile(outfile)) or renew: # info ... logging.info(f"{indent} create %s ..." % outfile) # search pattern for input files: fpattern = t.strftime(input_files) fpattern = xt1.strftime(input_files) # list: fnames = glob.glob(fpattern) # check ... Loading @@ -1296,18 +1322,23 @@ class CSO_GriddedAverageMeans(utopya.UtopyaRc): ds = xarray.open_mfdataset(fnames) # info ... logging.info( indent + " read %i records with time range [%s,%s]" % ( len(ds["time"]), ds["time"].values.min(), ds["time"].values.max(), ) f"{indent} read {len(ds['time'])} records with" + f" time range [{ds['time'].values.min()},{ds['time'].values.max()}]" ) # shape: nlon = ds.dims["longitude"] nlat = ds.dims["latitude"] # select time records: iit, = numpy.where( (ds["time"] >= xt1) & (ds["time"] < xt2) ) # check .. if len(iit) == 0: logging.error(f"no input records found withing sampling time range") raise Exception #endif # info ... logging.info(f"{indent} selected {len(iit)} records ...") # update history list: history = [] history.append( Loading @@ -1325,7 +1356,7 @@ class CSO_GriddedAverageMeans(utopya.UtopyaRc): # endif # new time coordinate: coords["time"] = xarray.DataArray( [t], dims=("time"), attrs={"standard_name": "time"} [xt1], dims=("time"), attrs={"standard_name": "time"} ) coords["time"].encoding["units"] = tunits Loading Loading @@ -1372,7 +1403,7 @@ class CSO_GriddedAverageMeans(utopya.UtopyaRc): if vname in ["cell_area", "pixel_area"]: continue # info ... logging.info(indent + ' variable "%s" ..' % vname) logging.info(f" variable '{vname}' ...") # convert to masked array to avoid warnings about all-nan series: values_in = numpy.ma.array( Loading @@ -1386,7 +1417,7 @@ class CSO_GriddedAverageMeans(utopya.UtopyaRc): if len(shp) == 3: # weighted average, convert output to array with nans: values[0, :, :] = numpy.ma.average( values_in, axis=0, weights=pixel_area values_in[iit,:,:], axis=0, weights=pixel_area[iit,:,:] ).filled(fill_value=numpy.nan) # elif len(shp) == 4: Loading @@ -1396,7 +1427,7 @@ class CSO_GriddedAverageMeans(utopya.UtopyaRc): for iv in range(shp[3]): # weighted average, convert output to array with nans: values[0, :, :, iv] = numpy.ma.average( values_in[:, :, :, iv], axis=0, weights=pixel_area values_in[iit, :, :, iv], axis=0, weights=pixel_area[iit,:,:] ).filled(fill_value=numpy.nan) # endif # Loading Loading @@ -1430,14 +1461,19 @@ class CSO_GriddedAverageMeans(utopya.UtopyaRc): # logging.warning( 'break after first created file' ) # break # endif # create else: # info ... logging.info(f"{indent} keep {outfile} ...") # endif # create target file? # endfor # time # endfor # target time record ## testing .. # logging.warning( 'break after first resolution' ) # break #endfor # timekeys # endfor # resolution # info ... Loading src/cso/cso_plot.py +7 −1 File changed.Preview size limit exceeded, changes collapsed. Show changes Loading
src/cso/cso_catalogue.py +124 −83 Original line number Diff line number Diff line Loading @@ -30,6 +30,10 @@ # 2025-04, Arjo Segers # Changed imports for python packaging. # # 2025-09, Arjo Segers # Updated plotting of gridded fields. # Extended unit conversions. # ######################################################################## Loading Loading @@ -132,7 +136,7 @@ class CSO_CatalogueBase(utopya.UtopyaRc): # which conversion? conversion = "%s -> %s" % (attrs["units"], vunits) # switch: if conversion in ["1e-9 -> ppb"]: if conversion in ["1e-9 -> ppb","mol m-2 -> mol/m2"]: # just other notation ... pass Loading Loading @@ -439,20 +443,20 @@ class CSO_Catalogue(CSO_CatalogueBase): # settings for this variable: vkey = "var.%s" % varname # plot type: ptype = self.GetSetting(vkey + ".type", default="map") ptype = self.GetSetting("var.{varname}.type", default="map") # input variable: vname = self.GetSetting(vkey + ".source", default=varname) vname = self.GetSetting("var.{varname}.source", default=varname) # target units: vunits = self.GetSetting(vkey + ".units", default="None") vunits = self.GetSetting("var.{varname}.units", default="None") # long name used in labels: long_name = self.GetSetting(vkey + ".long_name", default=varname) long_name = self.GetSetting("var.{varname}.long_name", default=varname) # switch: if ptype == "map": # style: vmin = eval(self.GetSetting(vkey + ".vmin", default="None")) vmax = eval(self.GetSetting(vkey + ".vmax", default="None")) colors = eval(self.GetSetting(vkey + ".colors", default="None")) vmin = eval(self.GetSetting("var.{varname}.vmin", default="None")) vmax = eval(self.GetSetting("var.{varname}.vmax", default="None")) colors = eval(self.GetSetting("var.{varname}.colors", default="None")) # extract corner grids and values: xx, yy, values, attrs = orb.GetTrack(vname) Loading Loading @@ -784,20 +788,20 @@ class CSO_SimCatalogue(CSO_CatalogueBase): # settings for this variable: vkey = "var.%s" % varname # originating variable: vsource = self.GetSetting(vkey + ".source", default="data:" + varname) vsource = self.GetSetting("var.{varname}.source", default="data:" + varname) # target units: vunits = self.GetSetting(vkey + ".units", default="None") vunits = self.GetSetting("var.{varname}.units", default="None") # plot type: ptype = self.GetSetting(vkey + ".type", default="map") ptype = self.GetSetting("var.{varname}.type", default="map") # long name used in labels: long_name = self.GetSetting(vkey + ".long_name", default=varname) long_name = self.GetSetting("var.{varname}.long_name", default=varname) # switch: if ptype == "map": # style: vmin = eval(self.GetSetting(vkey + ".vmin", default="None")) vmax = eval(self.GetSetting(vkey + ".vmax", default="None")) colors = eval(self.GetSetting(vkey + ".colors", default="None")) vmin = eval(self.GetSetting("var.{varname}.vmin", default="None")) vmax = eval(self.GetSetting("var.{varname}.vmax", default="None")) colors = eval(self.GetSetting("var.{varname}.colors", default="None")) # variable source: # [data:]vname Loading Loading @@ -940,36 +944,35 @@ class CSO_GriddedCatalogue(CSO_CatalogueBase): ! step is one of: hour | day | month <rcbase>.timerange.step : hour The time range is used to scan for output files from the :py:class:`CSO_GriddedAverage` class:: <rcbase>.input.file : /Scratch/model/output/CSO_output_%Y%m%d_%H%M_gridded.nc Specify a list of variables to be plotted, for example the retrieved and simulated column:: Specify a list of variables to be plotted, for example the retrieved and simulated column; also the difference between two variables is supported:: <rcbase>.vars : yr ys <rcbase>.vars : yr ys yr-ys For each variable, define the name of the source variable:: For each variable (individual, or the variables to be subtraced from eachother), define the name of the input file(s) and the source variable:: <rcbase>.var.yr.source : yr <rcbase>.var.yr.source : ys <rcbase>.var.yr.input.file : /Scratch/model/output/CSO_output_%Y%m%d_%H%M_gridded.nc <rcbase>.var.yr.input.var : yr Optionally specify a source layer for profiles, default is first layer:: <rcbase>.var.yr.source_layer : 0 Optionally specify target units that are different from the input:: Optionally specify target units, default is the units of the input variable:: ! convert units: <rcbase>.var.yr.units : 1e15 mlc/cm2 <rcbase>.var.yr.units : umol/m2 The value range of the colorbar could be tunes using (default limits are based on data values):: <rcbase>.var.vcd.vmin : 0.0 <rcbase>.var.vcd.vmax : 10.0 <rcbase>.var.yr.vmin : 0.0 <rcbase>.var.yr.vmax : 10.0 The colors in the colorbar could be changed using:: The colors in the colorbar could be changed using a line that is evaluated into a ``dict()``; see :py:meth:`cso_plot.Set_ColorMap` method for supported elements:: <rcbase>.var.qa_flag.colors : ['red','yellow','green'] <rcbase>.var.qa_flag.cmap : color=['red','yellow','green'], ncolor=9, color_under='maroon' The label below the colorbar will by default show the variable name, unless a ``long_name`` is defined:: Loading @@ -978,7 +981,8 @@ class CSO_GriddedCatalogue(CSO_CatalogueBase): The name of the created image files is read from:: ! target files, time tempates are replaced: ! target files, time tempates are replaced, ! use '%{var}' for the variable name: <rcbase>.output.file : %Y/%m/%d/S5p_RPRO_NO2_%Y%m%d_%H%M_gridded_%{var}.png which will give:: Loading @@ -997,6 +1001,12 @@ class CSO_GriddedCatalogue(CSO_CatalogueBase): ! figure size (inches), default is (8,6): <rcbase>.figsize : (6,6) Optional settings to setup the map plot, these are used to fill the ``bmp`` dict that is passed to :py:meth:`cso_plot.QuickPat`:: ! map properties: domain=[-30,90,30,75] """ def __init__(self, rcfile, rcbase="", env={}, indent=""): Loading Loading @@ -1083,18 +1093,6 @@ class CSO_GriddedCatalogue(CSO_CatalogueBase): raise Exception # endif # input file: infile = self.GetSetting("input.file") infile = t.strftime(infile) # input file present? if os.path.isfile(infile): # info ... logging.info(indent + " found input file: %s" % infile) # no data read yet: indata = None # loop over variables ... for varname in varnames: # target file: Loading @@ -1106,41 +1104,79 @@ class CSO_GriddedCatalogue(CSO_CatalogueBase): # info .. logging.info(indent + " create %s ..." % figfile) # read? if indata is None: # input file(s): if "-" in varname: # split: varname1,varname2 = varname.split("-") # first input file: infile1 = self.GetSetting(f"var.{varname1}.input.file") infile1 = t.strftime(infile1) # second input file: infile2 = self.GetSetting(f"var.{varname2}.input.file") infile2 = t.strftime(infile2) else: # copy: varname1 = varname # input file: infile1 = self.GetSetting(f"var.{varname}.input.file") infile1 = t.strftime(infile1) # no second file infile2 = None #endif # input file present? if os.path.isfile(infile1): # info ... logging.info(indent + " read ...") logging.info(f"{indent} found input file: {infile1}") # info ... logging.info(f"{indent} read ...") # init storage and read: indata = cso_file.CSO_File(filename=infile) indata1 = cso_file.CSO_File(filename=infile1) # second file? if infile2 is not None: indata2 = cso_file.CSO_File(filename=infile2) #endif # annote: title = long_timestamp # settings for this variable: vkey = "var.%s" % varname # originating variable: vsource = self.GetSetting(vkey + ".source") vsource1 = self.GetSetting(f"var.{varname1}.input.var") if infile2 is not None: vsource2 = self.GetSetting(f"var.{varname2}.input.var") # optional layer selection: vsource_layer = self.GetSetting( vkey + ".source_layer", totype="int", default=0 "var.{varname1}.source_layer", totype="int", default=0 ) # target units: vunits = self.GetSetting(vkey + ".units", default="None") vunits = self.GetSetting(f"var.{varname}.units", default="None") # plot type: ptype = self.GetSetting(vkey + ".type", default="map") ptype = self.GetSetting(f"var.{varname}.type", default="map") # long name used in labels: long_name = self.GetSetting(vkey + ".long_name", default=varname) long_name = self.GetSetting(f"var.{varname}.long_name" )#, default=varname) # switch: if ptype == "map": # style: vmin = eval(self.GetSetting(vkey + ".vmin", default="None")) vmax = eval(self.GetSetting(vkey + ".vmax", default="None")) colors = eval(self.GetSetting(vkey + ".colors", default="None")) vmin = eval(self.GetSetting(f"var.{varname}.vmin", default="None")) vmax = eval(self.GetSetting(f"var.{varname}.vmax", default="None")) # colormap: #~ older setting: colors = self.GetSetting(f"var.{varname}.colors", default="None") if colors != "None": cmap = eval( f"dict( colors={colors}" ) else: cmap = self.GetSetting(f"var.{varname}.cmap", totype="dict", default="{}") #endif # extract values: vda = indata.ds[vsource] if infile2 is not None: vda = indata1.ds[vsource1] - indata2.ds[vsource2] vda.attrs = indata1.ds[vsource1].attrs else: vda = indata1.ds[vsource1] #endif # grid: if "longitude" in vda.coords.keys(): Loading Loading @@ -1183,22 +1219,27 @@ class CSO_GriddedCatalogue(CSO_CatalogueBase): values, units = self._ConvertUnits(values, vda.attrs, vunits) # annote: label = "%s [%s]" % (long_name, units) label = f"{long_name} [{units}]" # map properties: bmp = dict(countries=True) bmp.update(bmp_kwargs) # colorbar properties: cbar = dict(label=label) if "ncolor" in cmap : cbar["ntick"] = cmap["ncolor"] # create map figure: fig = cso_plot.QuickPat( values, x=xm, y=ym, xm=xm, ym=ym, vmin=vmin, vmax=vmax, cmap=dict(colors=colors), cmap=cmap, bmp=bmp, cbar=dict(label=label), cbar=cbar, title=title, figsize=figsize, ) Loading @@ -1209,7 +1250,7 @@ class CSO_GriddedCatalogue(CSO_CatalogueBase): fig.Close() else: logging.error('unsupported plot type "%s"' % (ptype)) logging.error(f"unsupported plot type '{ptype}'") raise Exception # endif # ptype Loading @@ -1218,12 +1259,12 @@ class CSO_GriddedCatalogue(CSO_CatalogueBase): ## testing ... # break # endfor # variables else: logging.info(indent + " no input file: %s" % infile) logging.info(f"{indent} no input file: {infile}") # endif # state file present # endfor # variables # next: t = t + dt Loading
src/cso/cso_gridded.py +199 −163 Original line number Diff line number Diff line Loading @@ -37,6 +37,9 @@ # 2025-04, Arjo Segers # Changed imports for python packaging. # # 2025-09, Arjo Segers # Updated calculation of temporal means for files with time records. # ######################################################################## Loading Loading @@ -1155,8 +1158,15 @@ class CSO_GriddedAverageMeans(utopya.UtopyaRc): ! time resolutions: daily, monthly, yearly <rcbase>.resolutions : daily monthly yearly For each of these resolutions, specify a filename pattern for the input files, and a template for the output file. A number of files could be proccessed in one call Define keywords for these files, for example the file with observations and the one with simulations:: ! output files to be processed: <rcbase>.files : obs sim For each of the resolutions and for each of the file keywords, specify a filename pattern for the input files, and a template for the output file. Both should have templates to evaluate the target time a temporal mean, for example ``%%Y%%m%%d`` for a specific day. The input pattern should use ``*`` or ``?`` patterns to denote the higher time resolutions. Loading @@ -1164,21 +1174,21 @@ class CSO_GriddedAverageMeans(utopya.UtopyaRc): and an output pattern for a daily file:: ! daily means from hourly files: <rcbase>.resolution.daily.input.file : /work/gridded/europe/%Y/%m/CSO_gridded_%Y%m%d_????_S5p-glyox.nc <rcbase>.resolution.daily.output.file : /work/gridded/europe/daily/CSO_gridded_%Y%m%d_S5p-glyox.nc <rcbase>.resolution.daily.obs.input.file : /work/gridded/europe/%Y/%m/CSO_gridded_%Y%m%d_????_S5p-glyox.nc <rcbase>.resolution.daily.obs.output.file : /work/gridded/europe/daily/CSO_gridded_%Y%m%d_S5p-glyox.nc Similar specification should be provided for ``monthly`` and ``yearly`` resolutions. To speedup calculations, the ``monthly`` means could use the ``daily`` means as input:: ! monthly means from daily files: <rcbase>.resolution.monthly.input.file : /work/gridded/europe/daily/CSO_gridded_%Y%m??_S5p-glyox.nc <rcbase>.resolution.monthly.output.file : /work/gridded/europe/monthly/CSO_gridded_%Y%m_S5p-glyox.nc <rcbase>.resolution.monthly.obs.input.file : /work/gridded/europe/daily/CSO_gridded_%Y%m??_S5p-glyox.nc <rcbase>.resolution.monthly.obs.output.file : /work/gridded/europe/monthly/CSO_gridded_%Y%m_S5p-glyox.nc and similar the ``yearly`` means could use the ``monthly`` means as input:: ! yearly means from monthly files <rcbase>.resolution.yearly.input.file : /work/gridded/europe/monthly/CSO_gridded_%Y??_S5p-glyox.nc <rcbase>.resolution.yearly.output.file : /work/gridded/europe/yearly/CSO_gridded_%Y_S5p-glyox.nc <rcbase>.resolution.yearly.obs.input.file : /work/gridded/europe/monthly/CSO_gridded_%Y??_S5p-glyox.nc <rcbase>.resolution.yearly.obs.output.file : /work/gridded/europe/yearly/CSO_gridded_%Y_S5p-glyox.nc To reduce file size, by default all variables are written to file as short-integers (2 bytes) accompanied by ``add_offset`` and ``scale_factor`` attributes. Loading Loading @@ -1241,49 +1251,65 @@ class CSO_GriddedAverageMeans(utopya.UtopyaRc): # renew? renew = self.GetSetting("renew", totype="bool") # keys to process multiple files: filekeys = self.GetSetting("files").split() # resolutions to be created: resolutions = self.GetSetting("resolutions").split() # loop: for resolution in resolutions: # info ... logging.info(indent + 'time resoloution "%s" ...' % resolution) logging.info(f"{indent}time resoloution '{resolution}' ...") # time values: if resolution == "daily": tt = pandas.date_range(t1, t2, freq="D") xtt1 = pandas.date_range(t1, t2, freq="D") dt = pandas.Timedelta(days=1) xtt2 = pandas.date_range(t1 + dt, t2 + dt, freq="D") tfmt = "%Y-%m-%d" elif resolution == "monthly": tt = pandas.date_range(t1, t2, freq="M") xtt1 = pandas.date_range(t1, t2, freq="MS") xtt2 = pandas.date_range(t1, t2, freq="M") + pandas.Timedelta(days=1) tfmt = "%Y-%m" elif resolution == "yearly": tt = pandas.date_range(t1, t2, freq="Y") xtt1 = pandas.date_range(t1, t2, freq="YS") xtt2 = pandas.date_range(t1, t2, freq="Y") tfmt = "%Y" else: logging.error('unsupported time resolution "%s"' % resolution) raise Exception # endif # loop over files: for filekey in filekeys: # info ... logging.info(f"{indent} filekey '{filekey}' ...") # pattern for input files: input_files = self.GetSetting("resolution.%s.input.file" % resolution) input_files = self.GetSetting(f"resolution.{resolution}.{filekey}.input.file") # output file: output_file = self.GetSetting("resolution.%s.output.file" % resolution) output_file = self.GetSetting(f"resolution.{resolution}.{filekey}.output.file") # info .. logging.info(f"{indent} time loop ...") # loop: for t in tt: for irec in range(len(xtt1)): # current interval: xt1 = xtt1[irec] xt2 = xtt2[irec] # info .. logging.info(f"{indent} %s .." % t.strftime(tfmt)) logging.info(f"{indent} {xt1.strftime(tfmt)} .." ) logging.info(f"{indent} collect samples in [{xt1},{xt2})") # target file: outfile = t.strftime(output_file) outfile = xt1.strftime(output_file) # renew? if (not os.path.isfile(outfile)) or renew: # info ... logging.info(f"{indent} create %s ..." % outfile) # search pattern for input files: fpattern = t.strftime(input_files) fpattern = xt1.strftime(input_files) # list: fnames = glob.glob(fpattern) # check ... Loading @@ -1296,18 +1322,23 @@ class CSO_GriddedAverageMeans(utopya.UtopyaRc): ds = xarray.open_mfdataset(fnames) # info ... logging.info( indent + " read %i records with time range [%s,%s]" % ( len(ds["time"]), ds["time"].values.min(), ds["time"].values.max(), ) f"{indent} read {len(ds['time'])} records with" + f" time range [{ds['time'].values.min()},{ds['time'].values.max()}]" ) # shape: nlon = ds.dims["longitude"] nlat = ds.dims["latitude"] # select time records: iit, = numpy.where( (ds["time"] >= xt1) & (ds["time"] < xt2) ) # check .. if len(iit) == 0: logging.error(f"no input records found withing sampling time range") raise Exception #endif # info ... logging.info(f"{indent} selected {len(iit)} records ...") # update history list: history = [] history.append( Loading @@ -1325,7 +1356,7 @@ class CSO_GriddedAverageMeans(utopya.UtopyaRc): # endif # new time coordinate: coords["time"] = xarray.DataArray( [t], dims=("time"), attrs={"standard_name": "time"} [xt1], dims=("time"), attrs={"standard_name": "time"} ) coords["time"].encoding["units"] = tunits Loading Loading @@ -1372,7 +1403,7 @@ class CSO_GriddedAverageMeans(utopya.UtopyaRc): if vname in ["cell_area", "pixel_area"]: continue # info ... logging.info(indent + ' variable "%s" ..' % vname) logging.info(f" variable '{vname}' ...") # convert to masked array to avoid warnings about all-nan series: values_in = numpy.ma.array( Loading @@ -1386,7 +1417,7 @@ class CSO_GriddedAverageMeans(utopya.UtopyaRc): if len(shp) == 3: # weighted average, convert output to array with nans: values[0, :, :] = numpy.ma.average( values_in, axis=0, weights=pixel_area values_in[iit,:,:], axis=0, weights=pixel_area[iit,:,:] ).filled(fill_value=numpy.nan) # elif len(shp) == 4: Loading @@ -1396,7 +1427,7 @@ class CSO_GriddedAverageMeans(utopya.UtopyaRc): for iv in range(shp[3]): # weighted average, convert output to array with nans: values[0, :, :, iv] = numpy.ma.average( values_in[:, :, :, iv], axis=0, weights=pixel_area values_in[iit, :, :, iv], axis=0, weights=pixel_area[iit,:,:] ).filled(fill_value=numpy.nan) # endif # Loading Loading @@ -1430,14 +1461,19 @@ class CSO_GriddedAverageMeans(utopya.UtopyaRc): # logging.warning( 'break after first created file' ) # break # endif # create else: # info ... logging.info(f"{indent} keep {outfile} ...") # endif # create target file? # endfor # time # endfor # target time record ## testing .. # logging.warning( 'break after first resolution' ) # break #endfor # timekeys # endfor # resolution # info ... Loading