TNO Intern

Commit 2a495433 authored by Arjo Segers's avatar Arjo Segers
Browse files

Updated computation and plotting of gridded means.

parent 9c0cb981
Loading
Loading
Loading
Loading
+124 −83
Original line number Diff line number Diff line
@@ -30,6 +30,10 @@
# 2025-04, Arjo Segers
#   Changed imports for python packaging.
#
# 2025-09, Arjo Segers
#   Updated plotting of gridded fields.
#   Extended unit conversions.
#


########################################################################
@@ -132,7 +136,7 @@ class CSO_CatalogueBase(utopya.UtopyaRc):
            # which conversion?
            conversion = "%s -> %s" % (attrs["units"], vunits)
            # switch:
            if conversion in ["1e-9 -> ppb"]:
            if conversion in ["1e-9 -> ppb","mol m-2 -> mol/m2"]:
                # just other notation ...
                pass

@@ -439,20 +443,20 @@ class CSO_Catalogue(CSO_CatalogueBase):
                        # settings for this variable:
                        vkey = "var.%s" % varname
                        # plot type:
                        ptype = self.GetSetting(vkey + ".type", default="map")
                        ptype = self.GetSetting("var.{varname}.type", default="map")
                        # input variable:
                        vname = self.GetSetting(vkey + ".source", default=varname)
                        vname = self.GetSetting("var.{varname}.source", default=varname)
                        # target units:
                        vunits = self.GetSetting(vkey + ".units", default="None")
                        vunits = self.GetSetting("var.{varname}.units", default="None")
                        # long name used in labels:
                        long_name = self.GetSetting(vkey + ".long_name", default=varname)
                        long_name = self.GetSetting("var.{varname}.long_name", default=varname)

                        # switch:
                        if ptype == "map":
                            # style:
                            vmin = eval(self.GetSetting(vkey + ".vmin", default="None"))
                            vmax = eval(self.GetSetting(vkey + ".vmax", default="None"))
                            colors = eval(self.GetSetting(vkey + ".colors", default="None"))
                            vmin = eval(self.GetSetting("var.{varname}.vmin", default="None"))
                            vmax = eval(self.GetSetting("var.{varname}.vmax", default="None"))
                            colors = eval(self.GetSetting("var.{varname}.colors", default="None"))

                            # extract corner grids and values:
                            xx, yy, values, attrs = orb.GetTrack(vname)
@@ -784,20 +788,20 @@ class CSO_SimCatalogue(CSO_CatalogueBase):
                        # settings for this variable:
                        vkey = "var.%s" % varname
                        # originating variable:
                        vsource = self.GetSetting(vkey + ".source", default="data:" + varname)
                        vsource = self.GetSetting("var.{varname}.source", default="data:" + varname)
                        # target units:
                        vunits = self.GetSetting(vkey + ".units", default="None")
                        vunits = self.GetSetting("var.{varname}.units", default="None")
                        # plot type:
                        ptype = self.GetSetting(vkey + ".type", default="map")
                        ptype = self.GetSetting("var.{varname}.type", default="map")
                        # long name used in labels:
                        long_name = self.GetSetting(vkey + ".long_name", default=varname)
                        long_name = self.GetSetting("var.{varname}.long_name", default=varname)

                        # switch:
                        if ptype == "map":
                            # style:
                            vmin = eval(self.GetSetting(vkey + ".vmin", default="None"))
                            vmax = eval(self.GetSetting(vkey + ".vmax", default="None"))
                            colors = eval(self.GetSetting(vkey + ".colors", default="None"))
                            vmin = eval(self.GetSetting("var.{varname}.vmin", default="None"))
                            vmax = eval(self.GetSetting("var.{varname}.vmax", default="None"))
                            colors = eval(self.GetSetting("var.{varname}.colors", default="None"))

                            # variable source:
                            #    [data:]vname
@@ -940,36 +944,35 @@ class CSO_GriddedCatalogue(CSO_CatalogueBase):
      ! step is one of: hour | day | month
      <rcbase>.timerange.step   :  hour

    The time range is used to scan for output files from the :py:class:`CSO_GriddedAverage` class::

        <rcbase>.input.file        :  /Scratch/model/output/CSO_output_%Y%m%d_%H%M_gridded.nc

    Specify a list of variables to be plotted, for example the retrieved and simulated column::
    Specify a list of variables to be plotted, for example the retrieved and simulated column;
    also the difference between two variables is supported::

      <rcbase>.vars                   :  yr ys
      <rcbase>.vars                   :  yr ys yr-ys
      
    For each variable, define the name of the source variable::
    For each variable (individual, or the variables to be subtraced from eachother), 
    define the name of the input file(s) and the source variable::

      <rcbase>.var.yr.source               :  yr
      <rcbase>.var.yr.source               :  ys
      <rcbase>.var.yr.input.file        :  /Scratch/model/output/CSO_output_%Y%m%d_%H%M_gridded.nc
      <rcbase>.var.yr.input.var         :  yr

    Optionally specify a source layer for profiles, default is first layer::

      <rcbase>.var.yr.source_layer         :  0

    Optionally specify target units that are  different from the input::
    Optionally specify target units, default is the units of the input variable::

        ! convert units:
        <rcbase>.var.yr.units              :  1e15 mlc/cm2
        <rcbase>.var.yr.units              :  umol/m2

    The value range of the colorbar could be tunes using (default limits are based on data values)::

        <rcbase>.var.vcd.vmin              :   0.0
        <rcbase>.var.vcd.vmax              :  10.0
        <rcbase>.var.yr.vmin              :   0.0
        <rcbase>.var.yr.vmax              :  10.0

    The colors in the colorbar could be changed using::
    The colors in the colorbar could be changed using a line that is evaluated into a ``dict()``;
    see :py:meth:`cso_plot.Set_ColorMap` method for supported elements::

        <rcbase>.var.qa_flag.colors        :   ['red','yellow','green']
        <rcbase>.var.qa_flag.cmap         :   color=['red','yellow','green'], ncolor=9, color_under='maroon'

    The label below the colorbar will by default show the variable name,
    unless a ``long_name`` is defined::
@@ -978,7 +981,8 @@ class CSO_GriddedCatalogue(CSO_CatalogueBase):

    The name of the created image files is read from::

        ! target files, time tempates are replaced:
        ! target files, time tempates are replaced,
        ! use '%{var}' for the variable name:
        <rcbase>.output.file            :  %Y/%m/%d/S5p_RPRO_NO2_%Y%m%d_%H%M_gridded_%{var}.png

    which will give::
@@ -997,6 +1001,12 @@ class CSO_GriddedCatalogue(CSO_CatalogueBase):
        ! figure size (inches), default is (8,6):
        <rcbase>.figsize             :  (6,6)

    Optional settings to setup the map plot, these are used to fill the ``bmp`` dict
    that is passed to :py:meth:`cso_plot.QuickPat`::

        ! map properties:
        domain=[-30,90,30,75]

    """

    def __init__(self, rcfile, rcbase="", env={}, indent=""):
@@ -1083,18 +1093,6 @@ class CSO_GriddedCatalogue(CSO_CatalogueBase):
                raise Exception
            # endif

            # input file:
            infile = self.GetSetting("input.file")
            infile = t.strftime(infile)

            # input file present?
            if os.path.isfile(infile):
                # info ...
                logging.info(indent + "    found input file: %s" % infile)

                # no data read yet:
                indata = None

            # loop over variables ...
            for varname in varnames:
                # target file:
@@ -1106,41 +1104,79 @@ class CSO_GriddedCatalogue(CSO_CatalogueBase):
                    # info ..
                    logging.info(indent + "    create %s ..." % figfile)

                        # read?
                        if indata is None:
                    # input file(s):
                    if "-" in varname:
                        # split:
                        varname1,varname2 = varname.split("-")
                        # first input file:
                        infile1 = self.GetSetting(f"var.{varname1}.input.file")
                        infile1 = t.strftime(infile1)
                        # second input file:
                        infile2 = self.GetSetting(f"var.{varname2}.input.file")
                        infile2 = t.strftime(infile2)
                    else:
                        # copy:
                        varname1 = varname
                        # input file:
                        infile1 = self.GetSetting(f"var.{varname}.input.file")
                        infile1 = t.strftime(infile1)
                        # no second file
                        infile2 = None
                    #endif
                    # input file present?
                    if os.path.isfile(infile1):
                        # info ...
                            logging.info(indent + "      read ...")
                        logging.info(f"{indent}    found input file: {infile1}")

                        # info ...
                        logging.info(f"{indent}      read ...")
                        # init storage and read:
                            indata = cso_file.CSO_File(filename=infile)
                        indata1 = cso_file.CSO_File(filename=infile1)
                        # second file?
                        if infile2 is not None:
                            indata2 = cso_file.CSO_File(filename=infile2)
                        #endif

                        # annote:
                        title = long_timestamp

                        # settings for this variable:
                        vkey = "var.%s" % varname
                        # originating variable:
                        vsource = self.GetSetting(vkey + ".source")
                        vsource1 = self.GetSetting(f"var.{varname1}.input.var")
                        if infile2 is not None:
                            vsource2 = self.GetSetting(f"var.{varname2}.input.var")
                        # optional layer selection:
                        vsource_layer = self.GetSetting(
                            vkey + ".source_layer", totype="int", default=0
                            "var.{varname1}.source_layer", totype="int", default=0
                        )
                        # target units:
                        vunits = self.GetSetting(vkey + ".units", default="None")
                        vunits = self.GetSetting(f"var.{varname}.units", default="None")
                        # plot type:
                        ptype = self.GetSetting(vkey + ".type", default="map")
                        ptype = self.GetSetting(f"var.{varname}.type", default="map")
                        # long name used in labels:
                        long_name = self.GetSetting(vkey + ".long_name", default=varname)
                        long_name = self.GetSetting(f"var.{varname}.long_name" )#, default=varname)

                        # switch:
                        if ptype == "map":
                            # style:
                            vmin = eval(self.GetSetting(vkey + ".vmin", default="None"))
                            vmax = eval(self.GetSetting(vkey + ".vmax", default="None"))
                            colors = eval(self.GetSetting(vkey + ".colors", default="None"))
                            vmin = eval(self.GetSetting(f"var.{varname}.vmin", default="None"))
                            vmax = eval(self.GetSetting(f"var.{varname}.vmax", default="None"))

                            # colormap:
                            #~ older setting:
                            colors = self.GetSetting(f"var.{varname}.colors", default="None")
                            if colors != "None":
                                cmap = eval( f"dict( colors={colors}" )
                            else:
                                cmap = self.GetSetting(f"var.{varname}.cmap", totype="dict", default="{}")
                            #endif

                            # extract values:
                            vda = indata.ds[vsource]
                            if infile2 is not None:
                                vda = indata1.ds[vsource1] - indata2.ds[vsource2]
                                vda.attrs = indata1.ds[vsource1].attrs
                            else:
                                vda = indata1.ds[vsource1]
                            #endif

                            # grid:
                            if "longitude" in vda.coords.keys():
@@ -1183,22 +1219,27 @@ class CSO_GriddedCatalogue(CSO_CatalogueBase):
                            values, units = self._ConvertUnits(values, vda.attrs, vunits)

                            # annote:
                            label = "%s [%s]" % (long_name, units)
                            label = f"{long_name} [{units}]"

                            # map properties:
                            bmp = dict(countries=True)
                            bmp.update(bmp_kwargs)

                            # colorbar properties:
                            cbar = dict(label=label)
                            if "ncolor" in cmap :
                                cbar["ntick"] = cmap["ncolor"]

                            # create map figure:
                            fig = cso_plot.QuickPat(
                                values,
                                x=xm,
                                y=ym,
                                xm=xm,
                                ym=ym,
                                vmin=vmin,
                                vmax=vmax,
                                cmap=dict(colors=colors),
                                cmap=cmap,
                                bmp=bmp,
                                cbar=dict(label=label),
                                cbar=cbar,
                                title=title,
                                figsize=figsize,
                            )
@@ -1209,7 +1250,7 @@ class CSO_GriddedCatalogue(CSO_CatalogueBase):
                            fig.Close()

                        else:
                            logging.error('unsupported plot type "%s"' % (ptype))
                            logging.error(f"unsupported plot type '{ptype}'")
                            raise Exception
                        # endif  # ptype

@@ -1218,12 +1259,12 @@ class CSO_GriddedCatalogue(CSO_CatalogueBase):
                    ## testing ...
                    # break

                # endfor # variables

                else:
                logging.info(indent + "    no    input file: %s" % infile)
                    logging.info(f"{indent}    no    input file: {infile}")
                # endif # state file present

            # endfor # variables

            # next:
            t = t + dt

+199 −163
Original line number Diff line number Diff line
@@ -37,6 +37,9 @@
# 2025-04, Arjo Segers
#   Changed imports for python packaging.
#
# 2025-09, Arjo Segers
#   Updated calculation of temporal means for files with time records.
#


########################################################################
@@ -1155,8 +1158,15 @@ class CSO_GriddedAverageMeans(utopya.UtopyaRc):
        ! time resolutions: daily, monthly, yearly
        <rcbase>.resolutions                     :  daily monthly yearly

    For each of these resolutions, specify a filename pattern for the input files,
    and a template for the output file.
    A number of files could be proccessed in one call
    Define keywords for these files, for example the file with observations
    and the one with simulations::

        ! output files to be processed:
        <rcbase>.files                           :  obs sim

    For each of the resolutions and for each of the file keywords, 
    specify a filename pattern for the input files, and a template for the output file.
    Both should have templates to evaluate the target time a temporal mean,
    for example ``%%Y%%m%%d`` for a specific day.
    The input pattern should use ``*`` or ``?`` patterns to denote the higher time resolutions.
@@ -1164,21 +1174,21 @@ class CSO_GriddedAverageMeans(utopya.UtopyaRc):
    and an output pattern for a daily file::

        ! daily means from hourly files:
        <rcbase>.resolution.daily.input.file     :  /work/gridded/europe/%Y/%m/CSO_gridded_%Y%m%d_????_S5p-glyox.nc
        <rcbase>.resolution.daily.output.file    :  /work/gridded/europe/daily/CSO_gridded_%Y%m%d_S5p-glyox.nc
        <rcbase>.resolution.daily.obs.input.file     :  /work/gridded/europe/%Y/%m/CSO_gridded_%Y%m%d_????_S5p-glyox.nc
        <rcbase>.resolution.daily.obs.output.file    :  /work/gridded/europe/daily/CSO_gridded_%Y%m%d_S5p-glyox.nc

    Similar specification should be provided for ``monthly`` and ``yearly`` resolutions.
    To speedup calculations, the ``monthly`` means could use the ``daily`` means as input::

        ! monthly means from daily files:
        <rcbase>.resolution.monthly.input.file   :  /work/gridded/europe/daily/CSO_gridded_%Y%m??_S5p-glyox.nc
        <rcbase>.resolution.monthly.output.file  :  /work/gridded/europe/monthly/CSO_gridded_%Y%m_S5p-glyox.nc
        <rcbase>.resolution.monthly.obs.input.file   :  /work/gridded/europe/daily/CSO_gridded_%Y%m??_S5p-glyox.nc
        <rcbase>.resolution.monthly.obs.output.file  :  /work/gridded/europe/monthly/CSO_gridded_%Y%m_S5p-glyox.nc

     and similar the ``yearly`` means could use the ``monthly`` means as input::

        ! yearly means from monthly files
        <rcbase>.resolution.yearly.input.file    :  /work/gridded/europe/monthly/CSO_gridded_%Y??_S5p-glyox.nc
        <rcbase>.resolution.yearly.output.file   :  /work/gridded/europe/yearly/CSO_gridded_%Y_S5p-glyox.nc
        <rcbase>.resolution.yearly.obs.input.file    :  /work/gridded/europe/monthly/CSO_gridded_%Y??_S5p-glyox.nc
        <rcbase>.resolution.yearly.obs.output.file   :  /work/gridded/europe/yearly/CSO_gridded_%Y_S5p-glyox.nc

    To reduce file size, by default all variables are written to file as short-integers (2 bytes)
    accompanied by ``add_offset`` and ``scale_factor`` attributes.
@@ -1241,49 +1251,65 @@ class CSO_GriddedAverageMeans(utopya.UtopyaRc):
        # renew?
        renew = self.GetSetting("renew", totype="bool")

        # keys to process multiple files:
        filekeys = self.GetSetting("files").split()

        # resolutions to be created:
        resolutions = self.GetSetting("resolutions").split()
        # loop:
        for resolution in resolutions:
            # info ...
            logging.info(indent + 'time resoloution "%s" ...' % resolution)
            logging.info(f"{indent}time resoloution '{resolution}' ...")

            # time values:
            if resolution == "daily":
                tt = pandas.date_range(t1, t2, freq="D")
                xtt1 = pandas.date_range(t1, t2, freq="D")
                dt = pandas.Timedelta(days=1)
                xtt2 = pandas.date_range(t1 + dt, t2 + dt, freq="D")
                tfmt = "%Y-%m-%d"
            elif resolution == "monthly":
                tt = pandas.date_range(t1, t2, freq="M")
                xtt1 = pandas.date_range(t1, t2, freq="MS")
                xtt2 = pandas.date_range(t1, t2, freq="M") + pandas.Timedelta(days=1)
                tfmt = "%Y-%m"
            elif resolution == "yearly":
                tt = pandas.date_range(t1, t2, freq="Y")
                xtt1 = pandas.date_range(t1, t2, freq="YS")
                xtt2 = pandas.date_range(t1, t2, freq="Y")
                tfmt = "%Y"
            else:
                logging.error('unsupported time resolution "%s"' % resolution)
                raise Exception
            # endif

            # loop over files:
            for filekey in filekeys:
                # info ...
                logging.info(f"{indent}  filekey '{filekey}' ...")

                # pattern for input files:
            input_files = self.GetSetting("resolution.%s.input.file" % resolution)
                input_files = self.GetSetting(f"resolution.{resolution}.{filekey}.input.file")
                # output file:
            output_file = self.GetSetting("resolution.%s.output.file" % resolution)
                output_file = self.GetSetting(f"resolution.{resolution}.{filekey}.output.file")

                # info ..
                logging.info(f"{indent}  time loop ...")
                # loop:
            for t in tt:
                for irec in range(len(xtt1)):
                    # current interval:
                    xt1 = xtt1[irec]
                    xt2 = xtt2[irec]
                    # info ..
                logging.info(f"{indent}  %s .." % t.strftime(tfmt))
                    logging.info(f"{indent}    {xt1.strftime(tfmt)} .." )
                    logging.info(f"{indent}      collect samples in [{xt1},{xt2})")

                    # target file:
                outfile = t.strftime(output_file)
                    outfile = xt1.strftime(output_file)
                    # renew?
                    if (not os.path.isfile(outfile)) or renew:
                        # info ...
                        logging.info(f"{indent}      create %s ..." % outfile)

                        # search pattern for input files:
                    fpattern = t.strftime(input_files)
                        fpattern = xt1.strftime(input_files)
                        # list:
                        fnames = glob.glob(fpattern)
                        # check ...
@@ -1296,18 +1322,23 @@ class CSO_GriddedAverageMeans(utopya.UtopyaRc):
                        ds = xarray.open_mfdataset(fnames)
                        # info ...
                        logging.info(
                        indent
                        + "      read %i records with time range [%s,%s]"
                        % (
                            len(ds["time"]),
                            ds["time"].values.min(),
                            ds["time"].values.max(),
                        )
                            f"{indent}         read {len(ds['time'])} records with" +
                            f" time range [{ds['time'].values.min()},{ds['time'].values.max()}]"
                        )
                        # shape:
                        nlon = ds.dims["longitude"]
                        nlat = ds.dims["latitude"]

                        # select time records:
                        iit, = numpy.where( (ds["time"] >= xt1) & (ds["time"] < xt2) )
                        # check ..
                        if len(iit) == 0:
                            logging.error(f"no input records found withing sampling time range")
                            raise Exception
                        #endif
                        # info ...
                        logging.info(f"{indent}         selected {len(iit)} records ...")

                        # update history list:
                        history = []
                        history.append(
@@ -1325,7 +1356,7 @@ class CSO_GriddedAverageMeans(utopya.UtopyaRc):
                        # endif
                        # new time coordinate:
                        coords["time"] = xarray.DataArray(
                        [t], dims=("time"), attrs={"standard_name": "time"}
                            [xt1], dims=("time"), attrs={"standard_name": "time"}
                        )
                        coords["time"].encoding["units"] = tunits

@@ -1372,7 +1403,7 @@ class CSO_GriddedAverageMeans(utopya.UtopyaRc):
                            if vname in ["cell_area", "pixel_area"]:
                                continue
                            # info ...
                        logging.info(indent + '      variable "%s" ..' % vname)
                            logging.info(f"        variable '{vname}' ...")

                            # convert to masked array to avoid warnings about all-nan series:
                            values_in = numpy.ma.array(
@@ -1386,7 +1417,7 @@ class CSO_GriddedAverageMeans(utopya.UtopyaRc):
                            if len(shp) == 3:
                                # weighted average, convert output to array with nans:
                                values[0, :, :] = numpy.ma.average(
                                values_in, axis=0, weights=pixel_area
                                    values_in[iit,:,:], axis=0, weights=pixel_area[iit,:,:]
                                ).filled(fill_value=numpy.nan)
                            #
                            elif len(shp) == 4:
@@ -1396,7 +1427,7 @@ class CSO_GriddedAverageMeans(utopya.UtopyaRc):
                                for iv in range(shp[3]):
                                    # weighted average, convert output to array with nans:
                                    values[0, :, :, iv] = numpy.ma.average(
                                    values_in[:, :, :, iv], axis=0, weights=pixel_area
                                        values_in[iit, :, :, iv], axis=0, weights=pixel_area[iit,:,:]
                                    ).filled(fill_value=numpy.nan)
                                # endif
                            #
@@ -1430,14 +1461,19 @@ class CSO_GriddedAverageMeans(utopya.UtopyaRc):
                        # logging.warning( 'break after first created file' )
                        # break

                # endif # create
                    else:
                        # info ...
                        logging.info(f"{indent}      keep  {outfile} ...")
                    # endif # create target file?

            # endfor # time
                # endfor # target time record

                ## testing ..
                # logging.warning( 'break after first resolution' )
                # break

            #endfor # timekeys

        # endfor # resolution

        # info ...
+7 −1

File changed.

Preview size limit exceeded, changes collapsed.