TNO Intern

Commit c0e8bdb0 authored by Arjo Segers's avatar Arjo Segers
Browse files

Fixed averaging over time values when creating super-observations.

parent a44a822b
Loading
Loading
Loading
Loading
+50 −23
Original line number Diff line number Diff line
@@ -8,7 +8,9 @@
# 2023, Lewis Blake, Arjo Segers
#   Formatted using "black".
#

# 2023-01, Arjo Segers
#   Fixed averaging over time values.
#

########################################################################
###
@@ -273,7 +275,7 @@ class CSO_SuperObs(utopya.UtopyaRc):
            minimum_coverage = self.GetSetting("mapping.minimum_coverage", "float")
            # description:
            mapping_threshold = (
                "at least %4.2f%% of area covered by original pixels" % minimum_coverage
                f"at least {minimum_coverage*100} % of area covered by original pixels"
            )
            # info ..
            logging.info("  map by pixel footprint coverage; %s" % mapping_threshold)
@@ -371,7 +373,7 @@ class CSO_SuperObs(utopya.UtopyaRc):
                    #
                elif wtype == "area":
                    # info ...
                    logging.info(indent + "  distribute points over footprints ...")
                    logging.info(indent + "  populate pixel footprints with points ...")
                    # check ...
                    if ("longitude_bounds" not in source_file.ds) or (
                        "latitude_bounds" not in source_file.ds
@@ -447,20 +449,35 @@ class CSO_SuperObs(utopya.UtopyaRc):
                    # store:
                    vkeys.append(vkey)

                    # number of data values per pixels:
                    # number of data values per pixel:
                    nv = int(numpy.product(vshp[1:]))
                    # selection, and reshape to have row of values per pixel;
                    # row should have length 1 for variables with dims: ('pixel')
                    # create dataframe columns "var_1","var_2", etc for the data values;
                    # columns have length np*npix (one value for each sampling point in footprint)
                    # and are filled with the pixel value times weight of the point,
                    # so that later on the ".sum()" grouping operator will give the weighted sum;
                    # for "time" this cannot be used, as the average time is not a weighted sum:
                    if vkey == "time":
                        # convert time values:
                        times = pandas.to_datetime(source_file.ds[vkey].values)
                        times = pandas.to_datetime(source_file.ds[vkey].values[iipix])
                        # reference time:
                        tref = pandas.Timestamp("%i-01-01 00:00:00" % times[0].year)
                        tunits = tref.strftime("seconds since %Y-%m-%d %H:%M:%S")
                        # convert to timestep:
                        values = numpy.array((times - tref).total_seconds()).reshape((npix, nv))
                    elif len(vshp) == 1:
                        vshp = (vshp[0], 1)
                        values = numpy.array((times - tref).total_seconds())
                        # dummy weights with ones to expand from (npix) to (np,npix):
                        wwp1 = numpy.ones((wwp.shape[0], npix))
                        # create seperate "df" just for time values,
                        # so that grouping operator ".mean()" could be used instead of ".sum()";
                        # the '*' product gives an element-wise product of wwp (np,npix)
                        # and the values (npix) with result shaped (np,npix);
                        # after flattening, result has length np*npix :
                        df__time = pandas.DataFrame()
                        df__time["ii"] = ii.flatten()
                        df__time["jj"] = jj.flatten()
                        df__time[vkey] = (wwp1 * values).flatten()
                    else:
                        # scalar or multiple values per pixel?
                        if len(vshp) == 1:
                            values = source_file.ds[vkey].values[iipix].reshape((npix, nv))
                        else:
                            values = source_file.ds[vkey].values[iipix, :].reshape((npix, nv))
@@ -469,10 +486,13 @@ class CSO_SuperObs(utopya.UtopyaRc):
                        for iv in range(nv):
                            # column name:
                            ckey = "%s_%i" % (vkey, iv)
                        # store weights*values in data frame for sumation;
                        # note that '*' gives an element-wise product of wwp (np,npix) and the values (npix):
                            # store weights*values in data frame to be summed later on;
                            # the '*' product gives an element-wise product of wwp (np,npix)
                            # and the values (npix) with result shaped (np,npix);
                            # after flattening, result has length np*npix :
                            df[ckey] = (wwp * values[:, iv]).flatten()
                    # endfor
                        # endfor  # iv
                    # endif  # time or other values

                # endfor # source variables

@@ -485,6 +505,9 @@ class CSO_SuperObs(utopya.UtopyaRc):
                ii2 = df2.index.get_level_values("ii").values
                jj2 = df2.index.get_level_values("jj").values

                # idem for time:
                df2__time = df__time.groupby(["jj", "ii"]).mean()

                # mask for super pixels with enough contributions(by number or area)
                # of original pixels:
                if wtype == "number":
@@ -668,14 +691,18 @@ class CSO_SuperObs(utopya.UtopyaRc):
                        # loop over super pixels:
                        for k in kk:
                            # average time:
                            values.append(tref + pandas.Timedelta(df2[ckey].values[k], "seconds"))
                            values.append(
                                tref + pandas.Timedelta(df2__time[vkey].values[k], "seconds")
                            )
                        # endfor

                        # update attributes, conversion has not always set correct long_name ...
                        attrs["long_name"] = "averaged time"
                        # attrs['units'    ] = tunits
                        # store:
                        outf.ds[vkey] = xarray.DataArray(values, dims=vdims, attrs=attrs)
                        outf.ds[vkey] = xarray.DataArray(
                            values, dims=vdims, coords=pixel_coords, attrs=attrs
                        )
                        # ensure correct output:
                        outf.ds[vkey].encoding["units"] = tunits
                        outf.ds[vkey].encoding["calendar"] = "standard"