TNO Intern

Commit af3d2346 authored by Arjo Segers's avatar Arjo Segers
Browse files

Merge branch 'fix-colocate' into 'master'

Fixes for colocated output.

See merge request !9
parents e1549873 9ae52602
Loading
Loading
Loading
Loading
Loading
+85 −69
Original line number Diff line number Diff line
@@ -7,6 +7,10 @@
# 2023, Lewis Blake, Arjo Segers
#   Formatted using "black".
#
# 2025-04, Arjo Segers
#   Added option to drop some columns of the location csv after reading,
#   this prevents these columns from being added to the co-location output.
#   


########################################################################
@@ -73,7 +77,7 @@ class CSO_CoLocate(utopya.UtopyaRc):
    surface locations. The value assigned to location and time is an average over all pixels
    within a specified distance and valid for the time.

    Location should be specified in a csv file with columns at least longitude and latiude::
    Location should be specified in a csv file with at least columns that specify the longitude and latiude::

      name                   ;longitude;latitude
      Peyrusse Vieille       ;     0.18;   43.62
@@ -92,12 +96,19 @@ class CSO_CoLocate(utopya.UtopyaRc):
        <rcbase>.locations.sep            :  ;
        <rcbase>.locations.comment        :  #

    Also specify the column names that used for the longitude and latitude::
    Also specify the column names that are used for the longitude and latitude::

        ! column names:
        <rcbase>.locations.longitude      :  longitude
        <rcbase>.locations.latiutde       :  latiutde

    The content of the csv file is included in the output file as 1D variables with dimension ``(location,)``.
    These variables have the name of the column in the csv file, preceeded by ``location_``.
    Eventually specify that some columns should be skipped::

        ! skip auxilary columns:
        <rcbase>.locations.skip_columns  :  flag value

    Time series are created within a time range::

      ! time range:
@@ -198,9 +209,9 @@ class CSO_CoLocate(utopya.UtopyaRc):
        utopya.UtopyaRc.__init__(self, rcfile, rcbase=rcbase, env=env)

        # info ...
        logging.info(indent + "")
        logging.info(indent + "** Co-located CSO data")
        logging.info(indent + "")
        logging.info(f"{indent}")
        logging.info(f"{indent}** Co-located CSO data")
        logging.info(f"{indent}")

        # time range:
        t1 = self.GetSetting("timerange.start", totype="datetime")
@@ -208,12 +219,12 @@ class CSO_CoLocate(utopya.UtopyaRc):

        # info ...
        tfmt = "%Y-%m-%d %H:%M"
        logging.info(indent + "timerange: [%s,%s]" % (t1.strftime(tfmt), t2.strftime(tfmt)))
        logging.info(f"{indent}timerange: [{t1.strftime(tfmt)},{t2.strftime(tfmt)}]")

        # output time step:
        output_freq = self.GetSetting("output.freq")
        # info ...
        logging.info(indent + "output freqency: %s" % output_freq)
        logging.info(f"{indent}output freqency: {output_freq}")
        # output time intervals:
        if output_freq in ["D", "day", "daily"]:
            outtimes1 = pandas.date_range(t1, t2, freq="D")
@@ -221,20 +232,23 @@ class CSO_CoLocate(utopya.UtopyaRc):
        elif output_freq in ["M", "month", "monthly"]:
            outtimes1 = pandas.date_range(t1, t2, freq="MS")
            outtimes2 = outtimes1 + pandas.DateOffset(months=1, seconds=-1)
        elif output_freq in ["Y", "year", "yearly"]:
            outtimes1 = pandas.date_range(t1, t2, freq="YS")
            outtimes2 = pandas.date_range(t1, t2, freq="Y") + pandas.DateOffset(days=1, seconds=-1)
        else:
            logging.error('unsupported output time step "%s"' % step)
            logging.error(f"unsupported output time step '{step}'")
            raise Exception
        # endif

        # target file:
        outfile_template = self.GetSetting("output.file")
        # info ...
        logging.info(indent + "target files: %s" % outfile_template)
        logging.info(f"{indent}target files: {outfile_template}")

        # renew?
        renew = self.GetSetting("renew", totype="bool")
        # info ...
        logging.info(indent + "renew output: %s" % renew)
        logging.info(f"{indent}renew output: {renew}")

        # pack floats as shorts?
        packed = self.GetSetting("output.packed", "bool")
@@ -245,21 +259,21 @@ class CSO_CoLocate(utopya.UtopyaRc):
        gattrs = {}
        attrnames = self.GetSetting("output.attrs").split()
        for key in attrnames:
            gattrs[key] = self.GetSetting("output.attr.%s" % (key))
            gattrs[key] = self.GetSetting(f"output.attr.{key}")
        # endfor

        # station file:
        locations_file = self.GetSetting("locations.file")
        # info ...
        logging.info(indent + "location file: %s" % locations_file)
        logging.info(f"{indent}location file: {locations_file}")
        # check ..
        if not os.path.isfile(locations_file):
            logging.error("file not found: %s" % locations_file)
            logging.error(f"file not found: {locations_file}")
            raise Exception
        # endif
        # special characters:
        sep = self.GetSetting("locations.sep", default=";")
        comment = self.GetSetting("locations.sep", comment="#")
        comment = self.GetSetting("locations.sep", default="#")
        # column names:
        loc_lon = self.GetSetting("locations.longitude")
        loc_lat = self.GetSetting("locations.latitude")
@@ -270,6 +284,16 @@ class CSO_CoLocate(utopya.UtopyaRc):
        # count:
        nloc = len(locations)

        # skip some colunns from the file?
        skip_columns = self.GetSetting("locations.skip_columns", default="").split()
        # defined?
        if len(skip_columns) > 0:
            # info ...
            logging.info(f"{indent}skip columns: {skip_columns}")
            # remove columns:
            locations = locations.drop(columns=skip_columns)
        # endif

        # time resolution
        resolution = self.GetSetting("resolution")

@@ -284,7 +308,7 @@ class CSO_CoLocate(utopya.UtopyaRc):
        for stype in stypes:
            # tempate for input file:
            sources[stype] = {}
            sources[stype]["input"] = self.GetSetting("source.%s" % stype)
            sources[stype]["input"] = self.GetSetting(f"source.{stype}")
        # endfor

        # first source type, this should have the footprints:
@@ -296,7 +320,7 @@ class CSO_CoLocate(utopya.UtopyaRc):
        varsources = {}
        for vkey in vkeys:
            # source description:
            varsources[vkey] = self.GetSetting("output.%s.source" % (vkey))
            varsources[vkey] = self.GetSetting(f"output.{vkey}.source")
        # endfor

        # loop over times for output files:
@@ -309,13 +333,13 @@ class CSO_CoLocate(utopya.UtopyaRc):
            # create?
            if (not os.path.isfile(outfile)) or renew:
                # info ...
                logging.info(indent + "create %s .." % outfile)
                logging.info(f"{indent}create {outfile} ..")

                # averaging time intervals:
                if resolution == "hourly":
                    # start and end times:
                    tt1 = pandas.date_range(outtime1, outtime2, freq="H")
                    tt2 = t1 + pandas.Timedelta(hours=1)
                    tt2 = tt1 + pandas.Timedelta(hours=1)
                #
                elif resolution == "daily":
                    # start and end times:
@@ -323,7 +347,7 @@ class CSO_CoLocate(utopya.UtopyaRc):
                    tt2 = tt1 + pandas.Timedelta(days=1)
                #
                else:
                    logging.error('unsupported resolution "%s"' % resolution)
                    logging.error(f"unsupported resolution '{resolution}'")
                    raise Exception
                # endif
                # mid:
@@ -332,7 +356,7 @@ class CSO_CoLocate(utopya.UtopyaRc):
                nt = len(ttm)

                # info ...
                logging.info(indent + "init output file ...")
                logging.info(f"{indent}init output file ...")
                # init output file:
                outf = cso_file.CSO_File()
                # locations:
@@ -349,13 +373,13 @@ class CSO_CoLocate(utopya.UtopyaRc):
                # store location info:
                for key in locations.keys():
                    # variable name:
                    vname = "location_%s" % key
                    vname = f"location_{key}"
                    # add data:
                    outf.AddDataVariable(
                        vname,
                        ("location",),
                        values=locations[key],
                        attrs={"long_name": "location %s" % key, "units": "1"},
                        attrs={"long_name": f"location {key}", "units": "1"},
                    )
                # endfor
                # storage for number of selected pixels:
@@ -364,7 +388,7 @@ class CSO_CoLocate(utopya.UtopyaRc):
                    ("time", "location"),
                    values=numpy.zeros((nt, nloc), dtype="i2"),
                    attrs={
                        "long_name": "number of pixels within %i km" % distance_km,
                        "long_name": f"number of pixels within {distance_km} km",
                        "units": "1",
                    },
                )
@@ -372,10 +396,7 @@ class CSO_CoLocate(utopya.UtopyaRc):
                outf_setup = False

                # init history:
                line = "averages over pixels with %i km from locations specified in %s;" % (
                    distance_km,
                    locations_file,
                )
                line = f"averages over pixels within {distance_km} km from locations specified in {locations_file};"
                line = line + "data files:"
                for stype in stypes:
                    line = line + " " + sources[stype]["input"]
@@ -386,10 +407,8 @@ class CSO_CoLocate(utopya.UtopyaRc):
                # assume input has hourly timestamps ..
                times = pandas.date_range(outtime1, outtime2, freq="H")
                # info ...
                logging.info(
                    indent + "search for input files in [%s,%s] .." % (outtime1, outtime2)
                )
                logging.info(indent + "  search times: %s, .., %s" % (times[0], times[-1]))
                logging.info(f"{indent}search for input files in [{outtime1},{outtime2}] ..")
                logging.info(f"{indent}  search times: {times[0]}, .., {times[-1]}")
                # list of processed files:
                donefiles = []
                # loop over input times:
@@ -404,41 +423,41 @@ class CSO_CoLocate(utopya.UtopyaRc):
                        continue

                    # info ...
                    logging.info(indent + "  read input file(s) ...")
                    logging.info(f"{indent}  read input file(s) ...")
                    # loop:
                    for stype in stypes:
                        # input file:
                        infile = time.strftime(sources[stype]["input"])
                        # info ..
                        logging.info(indent + "    %s" % infile)
                        logging.info(f"{indent}    {infile}")
                        # read:
                        sources[stype]["file"] = cso_file.CSO_File(filename=infile)
                        ## update history:
                        # history.insert( 0, 'pixel data from %s' % os.path.basename(infile) )
                        # history.insert( 0, f"pixel data from {os.path.basename(infile)}")
                    # endfor

                    # filter:
                    selected, history = sources[stype0]["file"].SelectPixels(
                        self.rcf, rcbase, indent=indent + "    "
                        self.rcf, rcbase, indent=f"{indent}    "
                    )
                    # count:
                    npix = selected.size
                    nsel = selected.sum()
                    # none?
                    if nsel == 0:
                        loggin.info(indent + "      no pixels selected ..")
                        loggin.info(f"{indent}      no pixels selected ..")
                        continue
                    # endif
                    # info ..
                    logging.info(indent + "      selected %i / %i pixels .." % (nsel, npix))
                    logging.info(f"{indent}      selected {nsel} / {npix} pixels ..")

                    # target record:
                    (ii,) = numpy.where((time >= tt1) & (time < tt2))
                    (ii,) = numpy.where((time > tt1) & (time <= tt2))
                    # check ..
                    if len(ii) != 1:
                        logging.error("time %s found %i times within bounds:" % (time, len(ii)))
                        logging.error("time {time} found {len(ii)} times within bounds:")
                        for it in range(len(tt1)):
                            logging.error("[%s,%s]" % (t1[it], t2[it]))
                            logging.error("[{t1[it]},{t2[it]}]")
                        # endfor
                        raise Exception
                    # endif
@@ -446,7 +465,7 @@ class CSO_CoLocate(utopya.UtopyaRc):
                    itime = ii[0]
                    # info ..
                    logging.info(
                        indent + "      target time interval: [%s,%s)" % (tt1[itime], tt2[itime])
                        f"{indent}      target time interval: [{tt1[itime]},{tt2[itime]})"
                    )

                    # storage:
@@ -462,8 +481,7 @@ class CSO_CoLocate(utopya.UtopyaRc):
                        shp = das_in[vkey].shape[1:]
                        if numpy.prod(shp) != 1:
                            logging.error(
                                'source variable "%s" has shape %s per pixel, but currently only scalars supported ...'
                                % (vkey, str(shp))
                                f"source variable '{vkey}' has shape {str(shp)} per pixel, but currently only scalars supported ..."
                            )
                            raise Exception
                        # endif
@@ -483,7 +501,7 @@ class CSO_CoLocate(utopya.UtopyaRc):
                    # endif # init output?

                    # info ..
                    logging.info(indent + "      loop over locations ...")
                    logging.info(f"{indent}      loop over locations ...")
                    # loop over locations:
                    for iloc in range(nloc):
                        # index:
@@ -492,7 +510,7 @@ class CSO_CoLocate(utopya.UtopyaRc):
                        lon = locations.at[indx, loc_lon]
                        lat = locations.at[indx, loc_lat]
                        # info ...
                        logging.info(indent + "        location % (%f,%f)" % (iloc + 1, lon, lat))
                        logging.info(f"{indent}        location {iloc+1} ({lon:7.2f},{lat:6.2f})")
                        # distance in m from pixels to location:
                        dist_m = cso_mapping.LonLatDistance(
                            lon,
@@ -505,15 +523,13 @@ class CSO_CoLocate(utopya.UtopyaRc):
                        # any?
                        if len(iipix) == 0:
                            # info ...
                            logging.info(indent + "          no pixels within %i km" % distance_km)
                            logging.info(f"{indent}          no pixels within {distance_km} km")
                        else:
                            # count:
                            nval = len(iipix)
                            # info ...
                            logging.info(
                                indent
                                + "          found %i selected pixels within %i km"
                                % (nval, distance_km)
                                f"{indent}          found {nval} selected pixels within {distance_km} km"
                            )
                            # add contribution:
                            outf.ds["npixel"].values[itime, iloc] += nval
@@ -534,7 +550,7 @@ class CSO_CoLocate(utopya.UtopyaRc):
                # endfor # input times

                # info ...
                logging.info(indent + "  averages ...")
                logging.info(f"{indent}  averages ...")
                # times/locations with pixels found:
                kk, ii = numpy.where(outf.ds["npixel"].values > 0)
                # any?
@@ -547,7 +563,7 @@ class CSO_CoLocate(utopya.UtopyaRc):
                # endif # any pixels co-located

                # info ...
                logging.info(indent + "  write ...")
                logging.info(f"{indent}  write ...")
                # write:
                outf.Write(
                    outfile,
@@ -560,7 +576,7 @@ class CSO_CoLocate(utopya.UtopyaRc):

            else:
                # info ...
                logging.info(indent + "keep  %s .." % outfile)
                logging.info(f"{indent}keep  {outfile} ...")
            # endif # renew

        # endfor # outtimes
@@ -568,9 +584,9 @@ class CSO_CoLocate(utopya.UtopyaRc):
        # endfor # time loop

        # info ...
        logging.info(indent + "")
        logging.info(indent + "** end")
        logging.info(indent + "")
        logging.info(f"{indent}")
        logging.info(f"{indent}** end")
        logging.info(f"{indent}")

    # enddef __init__

@@ -690,9 +706,9 @@ class CSO_CoLocatePlotTimeSeries(cso_catalogue.CSO_CatalogueBase):
        cso_catalogue.CSO_CatalogueBase.__init__(self, rcfile, rcbase=rcbase, env=env)

        # info ...
        logging.info(indent + "")
        logging.info(indent + "** Plot Co-located time series")
        logging.info(indent + "")
        logging.info(f"{indent}")
        logging.info(f"{indent}** Plot Co-located time series")
        logging.info(f"{indent}")

        # time range:
        t1 = self.GetSetting("timerange.start", totype="datetime")
@@ -700,12 +716,12 @@ class CSO_CoLocatePlotTimeSeries(cso_catalogue.CSO_CatalogueBase):

        # info ...
        tfmt = "%Y-%m-%d %H:%M"
        logging.info(indent + "timerange: [%s,%s]" % (t1.strftime(tfmt), t2.strftime(tfmt)))
        logging.info(f"{indent}timerange: [%s,%s]" % (t1.strftime(tfmt), t2.strftime(tfmt)))

        # renew?
        renew = self.GetSetting("renew", totype="bool")
        # info ...
        logging.info(indent + "renew output: %s" % renew)
        logging.info(f"{indent}renew output: %s" % renew)

        # filter for input files:
        fpattern = self.GetSetting("input.files")
@@ -771,13 +787,13 @@ class CSO_CoLocatePlotTimeSeries(cso_catalogue.CSO_CatalogueBase):
            styles[varname] = {}
            for resolution in resolutions:
                styles[varname][resolution] = self.GetSetting(
                    "var.%s.%s.style" % (varname, resolution), totype="dict"
                    f"var.{varname}.{resolution}.style", totype="dict"
                )
            # endfor
        # endfor

        # info ...
        logging.info(indent + "resample ...")
        logging.info(f"{indent}resample ...")
        # resample (all locations):
        #   das['N']['yr'   ] = ..
        #   das['N']['times'] = ..
@@ -832,16 +848,16 @@ class CSO_CoLocatePlotTimeSeries(cso_catalogue.CSO_CatalogueBase):
        # endfor # time resols

        # loop over locations:
        for iloc in range(ds.dims["location"]):
        for iloc in range(ds.sizes["location"]):
            # info ..
            logging.info("location %i / %i .." % (iloc + 1, ds.dims["location"]))
            logging.info(f"{indent}location {iloc+1} / {ds.sizes['location']} ...")

            # target file, replace templates:
            figfile = figfile_template.format(**loci[iloc])
            # renew?
            if (not os.path.isfile(figfile)) or renew:
                # info ..
                logging.info("  create %s .." % figfile)
                logging.info(f"{indent}  create {figfile} ...")

                # new:
                fig = plt.figure(figsize=figsize)
@@ -896,15 +912,15 @@ class CSO_CoLocatePlotTimeSeries(cso_catalogue.CSO_CatalogueBase):

            else:
                # info ...
                logging.info("  keep   %s .." % figfile)
                logging.info(f"{indent}  keep   {figfile} ...")
            # endif

        # endfor # locations

        # info ...
        logging.info(indent + "")
        logging.info(indent + "** end")
        logging.info(indent + "")
        logging.info(f"{indent}")
        logging.info(f"{indent}** end")
        logging.info(f"{indent}")

    # enddef __init__

+21 −6
Original line number Diff line number Diff line
@@ -36,6 +36,10 @@
#   - added GetValue method
#   Removed usage of undefined filename in error messages.
#
# 2025-04, Arjo Segers
#   Enable zlib compression only for numerical data.
#   Avoid warnings from packing in case of all-nan values.
#

########################################################################
###
@@ -185,9 +189,16 @@ def Pack_DataArray(da, dtype="i2"):

    # only floats ...
    if da.dtype in [numpy.float32, numpy.float64]:
        # should have some values ..
        if numpy.any( ~ numpy.isnan(da.values) ):
            # value range, ignore nan's:
            vmin = numpy.nanmin(da.values)
            vmax = numpy.nanmax(da.values)
        else:
            # dummy range:
            vmin = 0.0
            vmax = 0.0
        #end if
        # target data type could be integer or float:
        if dtype.startswith("i"):
            # use absolute minimum to represent nans:
@@ -606,9 +617,13 @@ class CSO_File(object):
        if complevel > 0:
            # loop over variables:
            for vname in self.ds.keys():
                # only numerical values:
                dtype = self.ds[vname].dtype
                if numpy.issubdtype(dtype,numpy.integer) or numpy.issubdtype(dtype,numpy.floating):
                    # enable zlib compression and set level:
                    self.ds[vname].encoding["zlib"] = True
                    self.ds[vname].encoding["complevel"] = complevel
                # endif
            # endfor # variables
        # endif # comppress