Fixed averaging over time values when creating super-observations. (c0e8bdb0) · Commits · CAMS / CSO

py/cso_superobs.py

+50 −23

Original line number	Diff line number	Diff line
		@@ -8,7 +8,9 @@
		# 2023, Lewis Blake, Arjo Segers
		# Formatted using "black".
		#

		# 2023-01, Arjo Segers
		# Fixed averaging over time values.
		#

		########################################################################
		###
		@@ -273,7 +275,7 @@ class CSO_SuperObs(utopya.UtopyaRc):
		minimum_coverage = self.GetSetting("mapping.minimum_coverage", "float")
		# description:
		mapping_threshold = (
		"at least %4.2f%% of area covered by original pixels" % minimum_coverage
		f"at least {minimum_coverage*100} % of area covered by original pixels"
		)
		# info ..
		logging.info(" map by pixel footprint coverage; %s" % mapping_threshold)
		@@ -371,7 +373,7 @@ class CSO_SuperObs(utopya.UtopyaRc):
		#
		elif wtype == "area":
		# info ...
		logging.info(indent + " distribute points over footprints ...")
		logging.info(indent + " populate pixel footprints with points ...")
		# check ...
		if ("longitude_bounds" not in source_file.ds) or (
		"latitude_bounds" not in source_file.ds
		@@ -447,20 +449,35 @@ class CSO_SuperObs(utopya.UtopyaRc):
		# store:
		vkeys.append(vkey)

		# number of data values per pixels:
		# number of data values per pixel:
		nv = int(numpy.product(vshp[1:]))
		# selection, and reshape to have row of values per pixel;
		# row should have length 1 for variables with dims: ('pixel')
		# create dataframe columns "var_1","var_2", etc for the data values;
		# columns have length np*npix (one value for each sampling point in footprint)
		# and are filled with the pixel value times weight of the point,
		# so that later on the ".sum()" grouping operator will give the weighted sum;
		# for "time" this cannot be used, as the average time is not a weighted sum:
		if vkey == "time":
		# convert time values:
		times = pandas.to_datetime(source_file.ds[vkey].values)
		times = pandas.to_datetime(source_file.ds[vkey].values[iipix])
		# reference time:
		tref = pandas.Timestamp("%i-01-01 00:00:00" % times[0].year)
		tunits = tref.strftime("seconds since %Y-%m-%d %H:%M:%S")
		# convert to timestep:
		values = numpy.array((times - tref).total_seconds()).reshape((npix, nv))
		elif len(vshp) == 1:
		vshp = (vshp[0], 1)
		values = numpy.array((times - tref).total_seconds())
		# dummy weights with ones to expand from (npix) to (np,npix):
		wwp1 = numpy.ones((wwp.shape[0], npix))
		# create seperate "df" just for time values,
		# so that grouping operator ".mean()" could be used instead of ".sum()";
		# the '*' product gives an element-wise product of wwp (np,npix)
		# and the values (npix) with result shaped (np,npix);
		# after flattening, result has length np*npix :
		df__time = pandas.DataFrame()
		df__time["ii"] = ii.flatten()
		df__time["jj"] = jj.flatten()
		df__time[vkey] = (wwp1 * values).flatten()
		else:
		# scalar or multiple values per pixel?
		if len(vshp) == 1:
		values = source_file.ds[vkey].values[iipix].reshape((npix, nv))
		else:
		values = source_file.ds[vkey].values[iipix, :].reshape((npix, nv))
		@@ -469,10 +486,13 @@ class CSO_SuperObs(utopya.UtopyaRc):
		for iv in range(nv):
		# column name:
		ckey = "%s_%i" % (vkey, iv)
		# store weights*values in data frame for sumation;
		# note that '*' gives an element-wise product of wwp (np,npix) and the values (npix):
		# store weights*values in data frame to be summed later on;
		# the '*' product gives an element-wise product of wwp (np,npix)
		# and the values (npix) with result shaped (np,npix);
		# after flattening, result has length np*npix :
		df[ckey] = (wwp * values[:, iv]).flatten()
		# endfor
		# endfor # iv
		# endif # time or other values

		# endfor # source variables

		@@ -485,6 +505,9 @@ class CSO_SuperObs(utopya.UtopyaRc):
		ii2 = df2.index.get_level_values("ii").values
		jj2 = df2.index.get_level_values("jj").values

		# idem for time:
		df2__time = df__time.groupby(["jj", "ii"]).mean()

		# mask for super pixels with enough contributions(by number or area)
		# of original pixels:
		if wtype == "number":
		@@ -668,14 +691,18 @@ class CSO_SuperObs(utopya.UtopyaRc):
		# loop over super pixels:
		for k in kk:
		# average time:
		values.append(tref + pandas.Timedelta(df2[ckey].values[k], "seconds"))
		values.append(
		tref + pandas.Timedelta(df2__time[vkey].values[k], "seconds")
		)
		# endfor

		# update attributes, conversion has not always set correct long_name ...
		attrs["long_name"] = "averaged time"
		# attrs['units' ] = tunits
		# store:
		outf.ds[vkey] = xarray.DataArray(values, dims=vdims, attrs=attrs)
		outf.ds[vkey] = xarray.DataArray(
		values, dims=vdims, coords=pixel_coords, attrs=attrs
		)
		# ensure correct output:
		outf.ds[vkey].encoding["units"] = tunits
		outf.ds[vkey].encoding["calendar"] = "standard"