Merge branch 'fix-colocate' into 'master' (af3d2346) · Commits · CAMS / CSO

src/cso/cso_colocate.py

+85 −69

Original line number	Diff line number	Diff line
		@@ -7,6 +7,10 @@
		# 2023, Lewis Blake, Arjo Segers
		# Formatted using "black".
		#
		# 2025-04, Arjo Segers
		# Added option to drop some columns of the location csv after reading,
		# this prevents these columns from being added to the co-location output.
		#


		########################################################################
		@@ -73,7 +77,7 @@ class CSO_CoLocate(utopya.UtopyaRc):
		surface locations. The value assigned to location and time is an average over all pixels
		within a specified distance and valid for the time.

		Location should be specified in a csv file with columns at least longitude and latiude::
		Location should be specified in a csv file with at least columns that specify the longitude and latiude::

		name ;longitude;latitude
		Peyrusse Vieille ; 0.18; 43.62
		@@ -92,12 +96,19 @@ class CSO_CoLocate(utopya.UtopyaRc):
		<rcbase>.locations.sep : ;
		<rcbase>.locations.comment : #

		Also specify the column names that used for the longitude and latitude::
		Also specify the column names that are used for the longitude and latitude::

		! column names:
		<rcbase>.locations.longitude : longitude
		<rcbase>.locations.latiutde : latiutde

		The content of the csv file is included in the output file as 1D variables with dimension ``(location,)``.
		These variables have the name of the column in the csv file, preceeded by ``location_``.
		Eventually specify that some columns should be skipped::

		! skip auxilary columns:
		<rcbase>.locations.skip_columns : flag value

		Time series are created within a time range::

		! time range:
		@@ -198,9 +209,9 @@ class CSO_CoLocate(utopya.UtopyaRc):
		utopya.UtopyaRc.__init__(self, rcfile, rcbase=rcbase, env=env)

		# info ...
		logging.info(indent + "")
		logging.info(indent + "** Co-located CSO data")
		logging.info(indent + "")
		logging.info(f"{indent}")
		logging.info(f"{indent}** Co-located CSO data")
		logging.info(f"{indent}")

		# time range:
		t1 = self.GetSetting("timerange.start", totype="datetime")
		@@ -208,12 +219,12 @@ class CSO_CoLocate(utopya.UtopyaRc):

		# info ...
		tfmt = "%Y-%m-%d %H:%M"
		logging.info(indent + "timerange: [%s,%s]" % (t1.strftime(tfmt), t2.strftime(tfmt)))
		logging.info(f"{indent}timerange: [{t1.strftime(tfmt)},{t2.strftime(tfmt)}]")

		# output time step:
		output_freq = self.GetSetting("output.freq")
		# info ...
		logging.info(indent + "output freqency: %s" % output_freq)
		logging.info(f"{indent}output freqency: {output_freq}")
		# output time intervals:
		if output_freq in ["D", "day", "daily"]:
		outtimes1 = pandas.date_range(t1, t2, freq="D")
		@@ -221,20 +232,23 @@ class CSO_CoLocate(utopya.UtopyaRc):
		elif output_freq in ["M", "month", "monthly"]:
		outtimes1 = pandas.date_range(t1, t2, freq="MS")
		outtimes2 = outtimes1 + pandas.DateOffset(months=1, seconds=-1)
		elif output_freq in ["Y", "year", "yearly"]:
		outtimes1 = pandas.date_range(t1, t2, freq="YS")
		outtimes2 = pandas.date_range(t1, t2, freq="Y") + pandas.DateOffset(days=1, seconds=-1)
		else:
		logging.error('unsupported output time step "%s"' % step)
		logging.error(f"unsupported output time step '{step}'")
		raise Exception
		# endif

		# target file:
		outfile_template = self.GetSetting("output.file")
		# info ...
		logging.info(indent + "target files: %s" % outfile_template)
		logging.info(f"{indent}target files: {outfile_template}")

		# renew?
		renew = self.GetSetting("renew", totype="bool")
		# info ...
		logging.info(indent + "renew output: %s" % renew)
		logging.info(f"{indent}renew output: {renew}")

		# pack floats as shorts?
		packed = self.GetSetting("output.packed", "bool")
		@@ -245,21 +259,21 @@ class CSO_CoLocate(utopya.UtopyaRc):
		gattrs = {}
		attrnames = self.GetSetting("output.attrs").split()
		for key in attrnames:
		gattrs[key] = self.GetSetting("output.attr.%s" % (key))
		gattrs[key] = self.GetSetting(f"output.attr.{key}")
		# endfor

		# station file:
		locations_file = self.GetSetting("locations.file")
		# info ...
		logging.info(indent + "location file: %s" % locations_file)
		logging.info(f"{indent}location file: {locations_file}")
		# check ..
		if not os.path.isfile(locations_file):
		logging.error("file not found: %s" % locations_file)
		logging.error(f"file not found: {locations_file}")
		raise Exception
		# endif
		# special characters:
		sep = self.GetSetting("locations.sep", default=";")
		comment = self.GetSetting("locations.sep", comment="#")
		comment = self.GetSetting("locations.sep", default="#")
		# column names:
		loc_lon = self.GetSetting("locations.longitude")
		loc_lat = self.GetSetting("locations.latitude")
		@@ -270,6 +284,16 @@ class CSO_CoLocate(utopya.UtopyaRc):
		# count:
		nloc = len(locations)

		# skip some colunns from the file?
		skip_columns = self.GetSetting("locations.skip_columns", default="").split()
		# defined?
		if len(skip_columns) > 0:
		# info ...
		logging.info(f"{indent}skip columns: {skip_columns}")
		# remove columns:
		locations = locations.drop(columns=skip_columns)
		# endif

		# time resolution
		resolution = self.GetSetting("resolution")

		@@ -284,7 +308,7 @@ class CSO_CoLocate(utopya.UtopyaRc):
		for stype in stypes:
		# tempate for input file:
		sources[stype] = {}
		sources[stype]["input"] = self.GetSetting("source.%s" % stype)
		sources[stype]["input"] = self.GetSetting(f"source.{stype}")
		# endfor

		# first source type, this should have the footprints:
		@@ -296,7 +320,7 @@ class CSO_CoLocate(utopya.UtopyaRc):
		varsources = {}
		for vkey in vkeys:
		# source description:
		varsources[vkey] = self.GetSetting("output.%s.source" % (vkey))
		varsources[vkey] = self.GetSetting(f"output.{vkey}.source")
		# endfor

		# loop over times for output files:
		@@ -309,13 +333,13 @@ class CSO_CoLocate(utopya.UtopyaRc):
		# create?
		if (not os.path.isfile(outfile)) or renew:
		# info ...
		logging.info(indent + "create %s .." % outfile)
		logging.info(f"{indent}create {outfile} ..")

		# averaging time intervals:
		if resolution == "hourly":
		# start and end times:
		tt1 = pandas.date_range(outtime1, outtime2, freq="H")
		tt2 = t1 + pandas.Timedelta(hours=1)
		tt2 = tt1 + pandas.Timedelta(hours=1)
		#
		elif resolution == "daily":
		# start and end times:
		@@ -323,7 +347,7 @@ class CSO_CoLocate(utopya.UtopyaRc):
		tt2 = tt1 + pandas.Timedelta(days=1)
		#
		else:
		logging.error('unsupported resolution "%s"' % resolution)
		logging.error(f"unsupported resolution '{resolution}'")
		raise Exception
		# endif
		# mid:
		@@ -332,7 +356,7 @@ class CSO_CoLocate(utopya.UtopyaRc):
		nt = len(ttm)

		# info ...
		logging.info(indent + "init output file ...")
		logging.info(f"{indent}init output file ...")
		# init output file:
		outf = cso_file.CSO_File()
		# locations:
		@@ -349,13 +373,13 @@ class CSO_CoLocate(utopya.UtopyaRc):
		# store location info:
		for key in locations.keys():
		# variable name:
		vname = "location_%s" % key
		vname = f"location_{key}"
		# add data:
		outf.AddDataVariable(
		vname,
		("location",),
		values=locations[key],
		attrs={"long_name": "location %s" % key, "units": "1"},
		attrs={"long_name": f"location {key}", "units": "1"},
		)
		# endfor
		# storage for number of selected pixels:
		@@ -364,7 +388,7 @@ class CSO_CoLocate(utopya.UtopyaRc):
		("time", "location"),
		values=numpy.zeros((nt, nloc), dtype="i2"),
		attrs={
		"long_name": "number of pixels within %i km" % distance_km,
		"long_name": f"number of pixels within {distance_km} km",
		"units": "1",
		},
		)
		@@ -372,10 +396,7 @@ class CSO_CoLocate(utopya.UtopyaRc):
		outf_setup = False

		# init history:
		line = "averages over pixels with %i km from locations specified in %s;" % (
		distance_km,
		locations_file,
		)
		line = f"averages over pixels within {distance_km} km from locations specified in {locations_file};"
		line = line + "data files:"
		for stype in stypes:
		line = line + " " + sources[stype]["input"]
		@@ -386,10 +407,8 @@ class CSO_CoLocate(utopya.UtopyaRc):
		# assume input has hourly timestamps ..
		times = pandas.date_range(outtime1, outtime2, freq="H")
		# info ...
		logging.info(
		indent + "search for input files in [%s,%s] .." % (outtime1, outtime2)
		)
		logging.info(indent + " search times: %s, .., %s" % (times[0], times[-1]))
		logging.info(f"{indent}search for input files in [{outtime1},{outtime2}] ..")
		logging.info(f"{indent} search times: {times[0]}, .., {times[-1]}")
		# list of processed files:
		donefiles = []
		# loop over input times:
		@@ -404,41 +423,41 @@ class CSO_CoLocate(utopya.UtopyaRc):
		continue

		# info ...
		logging.info(indent + " read input file(s) ...")
		logging.info(f"{indent} read input file(s) ...")
		# loop:
		for stype in stypes:
		# input file:
		infile = time.strftime(sources[stype]["input"])
		# info ..
		logging.info(indent + " %s" % infile)
		logging.info(f"{indent} {infile}")
		# read:
		sources[stype]["file"] = cso_file.CSO_File(filename=infile)
		## update history:
		# history.insert( 0, 'pixel data from %s' % os.path.basename(infile) )
		# history.insert( 0, f"pixel data from {os.path.basename(infile)}")
		# endfor

		# filter:
		selected, history = sources[stype0]["file"].SelectPixels(
		self.rcf, rcbase, indent=indent + " "
		self.rcf, rcbase, indent=f"{indent} "
		)
		# count:
		npix = selected.size
		nsel = selected.sum()
		# none?
		if nsel == 0:
		loggin.info(indent + " no pixels selected ..")
		loggin.info(f"{indent} no pixels selected ..")
		continue
		# endif
		# info ..
		logging.info(indent + " selected %i / %i pixels .." % (nsel, npix))
		logging.info(f"{indent} selected {nsel} / {npix} pixels ..")

		# target record:
		(ii,) = numpy.where((time >= tt1) & (time < tt2))
		(ii,) = numpy.where((time > tt1) & (time <= tt2))
		# check ..
		if len(ii) != 1:
		logging.error("time %s found %i times within bounds:" % (time, len(ii)))
		logging.error("time {time} found {len(ii)} times within bounds:")
		for it in range(len(tt1)):
		logging.error("[%s,%s]" % (t1[it], t2[it]))
		logging.error("[{t1[it]},{t2[it]}]")
		# endfor
		raise Exception
		# endif
		@@ -446,7 +465,7 @@ class CSO_CoLocate(utopya.UtopyaRc):
		itime = ii[0]
		# info ..
		logging.info(
		indent + " target time interval: [%s,%s)" % (tt1[itime], tt2[itime])
		f"{indent} target time interval: [{tt1[itime]},{tt2[itime]})"
		)

		# storage:
		@@ -462,8 +481,7 @@ class CSO_CoLocate(utopya.UtopyaRc):
		shp = das_in[vkey].shape[1:]
		if numpy.prod(shp) != 1:
		logging.error(
		'source variable "%s" has shape %s per pixel, but currently only scalars supported ...'
		% (vkey, str(shp))
		f"source variable '{vkey}' has shape {str(shp)} per pixel, but currently only scalars supported ..."
		)
		raise Exception
		# endif
		@@ -483,7 +501,7 @@ class CSO_CoLocate(utopya.UtopyaRc):
		# endif # init output?

		# info ..
		logging.info(indent + " loop over locations ...")
		logging.info(f"{indent} loop over locations ...")
		# loop over locations:
		for iloc in range(nloc):
		# index:
		@@ -492,7 +510,7 @@ class CSO_CoLocate(utopya.UtopyaRc):
		lon = locations.at[indx, loc_lon]
		lat = locations.at[indx, loc_lat]
		# info ...
		logging.info(indent + " location % (%f,%f)" % (iloc + 1, lon, lat))
		logging.info(f"{indent} location {iloc+1} ({lon:7.2f},{lat:6.2f})")
		# distance in m from pixels to location:
		dist_m = cso_mapping.LonLatDistance(
		lon,
		@@ -505,15 +523,13 @@ class CSO_CoLocate(utopya.UtopyaRc):
		# any?
		if len(iipix) == 0:
		# info ...
		logging.info(indent + " no pixels within %i km" % distance_km)
		logging.info(f"{indent} no pixels within {distance_km} km")
		else:
		# count:
		nval = len(iipix)
		# info ...
		logging.info(
		indent
		+ " found %i selected pixels within %i km"
		% (nval, distance_km)
		f"{indent} found {nval} selected pixels within {distance_km} km"
		)
		# add contribution:
		outf.ds["npixel"].values[itime, iloc] += nval
		@@ -534,7 +550,7 @@ class CSO_CoLocate(utopya.UtopyaRc):
		# endfor # input times

		# info ...
		logging.info(indent + " averages ...")
		logging.info(f"{indent} averages ...")
		# times/locations with pixels found:
		kk, ii = numpy.where(outf.ds["npixel"].values > 0)
		# any?
		@@ -547,7 +563,7 @@ class CSO_CoLocate(utopya.UtopyaRc):
		# endif # any pixels co-located

		# info ...
		logging.info(indent + " write ...")
		logging.info(f"{indent} write ...")
		# write:
		outf.Write(
		outfile,
		@@ -560,7 +576,7 @@ class CSO_CoLocate(utopya.UtopyaRc):

		else:
		# info ...
		logging.info(indent + "keep %s .." % outfile)
		logging.info(f"{indent}keep {outfile} ...")
		# endif # renew

		# endfor # outtimes
		@@ -568,9 +584,9 @@ class CSO_CoLocate(utopya.UtopyaRc):
		# endfor # time loop

		# info ...
		logging.info(indent + "")
		logging.info(indent + "** end")
		logging.info(indent + "")
		logging.info(f"{indent}")
		logging.info(f"{indent}** end")
		logging.info(f"{indent}")

		# enddef __init__

		@@ -690,9 +706,9 @@ class CSO_CoLocatePlotTimeSeries(cso_catalogue.CSO_CatalogueBase):
		cso_catalogue.CSO_CatalogueBase.__init__(self, rcfile, rcbase=rcbase, env=env)

		# info ...
		logging.info(indent + "")
		logging.info(indent + "** Plot Co-located time series")
		logging.info(indent + "")
		logging.info(f"{indent}")
		logging.info(f"{indent}** Plot Co-located time series")
		logging.info(f"{indent}")

		# time range:
		t1 = self.GetSetting("timerange.start", totype="datetime")
		@@ -700,12 +716,12 @@ class CSO_CoLocatePlotTimeSeries(cso_catalogue.CSO_CatalogueBase):

		# info ...
		tfmt = "%Y-%m-%d %H:%M"
		logging.info(indent + "timerange: [%s,%s]" % (t1.strftime(tfmt), t2.strftime(tfmt)))
		logging.info(f"{indent}timerange: [%s,%s]" % (t1.strftime(tfmt), t2.strftime(tfmt)))

		# renew?
		renew = self.GetSetting("renew", totype="bool")
		# info ...
		logging.info(indent + "renew output: %s" % renew)
		logging.info(f"{indent}renew output: %s" % renew)

		# filter for input files:
		fpattern = self.GetSetting("input.files")
		@@ -771,13 +787,13 @@ class CSO_CoLocatePlotTimeSeries(cso_catalogue.CSO_CatalogueBase):
		styles[varname] = {}
		for resolution in resolutions:
		styles[varname][resolution] = self.GetSetting(
		"var.%s.%s.style" % (varname, resolution), totype="dict"
		f"var.{varname}.{resolution}.style", totype="dict"
		)
		# endfor
		# endfor

		# info ...
		logging.info(indent + "resample ...")
		logging.info(f"{indent}resample ...")
		# resample (all locations):
		# das['N']['yr' ] = ..
		# das['N']['times'] = ..
		@@ -832,16 +848,16 @@ class CSO_CoLocatePlotTimeSeries(cso_catalogue.CSO_CatalogueBase):
		# endfor # time resols

		# loop over locations:
		for iloc in range(ds.dims["location"]):
		for iloc in range(ds.sizes["location"]):
		# info ..
		logging.info("location %i / %i .." % (iloc + 1, ds.dims["location"]))
		logging.info(f"{indent}location {iloc+1} / {ds.sizes['location']} ...")

		# target file, replace templates:
		figfile = figfile_template.format(**loci[iloc])
		# renew?
		if (not os.path.isfile(figfile)) or renew:
		# info ..
		logging.info(" create %s .." % figfile)
		logging.info(f"{indent} create {figfile} ...")

		# new:
		fig = plt.figure(figsize=figsize)
		@@ -896,15 +912,15 @@ class CSO_CoLocatePlotTimeSeries(cso_catalogue.CSO_CatalogueBase):

		else:
		# info ...
		logging.info(" keep %s .." % figfile)
		logging.info(f"{indent} keep {figfile} ...")
		# endif

		# endfor # locations

		# info ...
		logging.info(indent + "")
		logging.info(indent + "** end")
		logging.info(indent + "")
		logging.info(f"{indent}")
		logging.info(f"{indent}** end")
		logging.info(f"{indent}")

		# enddef __init__

src/cso/cso_file.py

+21 −6

Original line number	Diff line number	Diff line
		@@ -36,6 +36,10 @@
		# - added GetValue method
		# Removed usage of undefined filename in error messages.
		#
		# 2025-04, Arjo Segers
		# Enable zlib compression only for numerical data.
		# Avoid warnings from packing in case of all-nan values.
		#

		########################################################################
		###
		@@ -185,9 +189,16 @@ def Pack_DataArray(da, dtype="i2"):

		# only floats ...
		if da.dtype in [numpy.float32, numpy.float64]:
		# should have some values ..
		if numpy.any( ~ numpy.isnan(da.values) ):
		# value range, ignore nan's:
		vmin = numpy.nanmin(da.values)
		vmax = numpy.nanmax(da.values)
		else:
		# dummy range:
		vmin = 0.0
		vmax = 0.0
		#end if
		# target data type could be integer or float:
		if dtype.startswith("i"):
		# use absolute minimum to represent nans:
		@@ -606,9 +617,13 @@ class CSO_File(object):
		if complevel > 0:
		# loop over variables:
		for vname in self.ds.keys():
		# only numerical values:
		dtype = self.ds[vname].dtype
		if numpy.issubdtype(dtype,numpy.integer) or numpy.issubdtype(dtype,numpy.floating):
		# enable zlib compression and set level:
		self.ds[vname].encoding["zlib"] = True
		self.ds[vname].encoding["complevel"] = complevel
		# endif
		# endfor # variables
		# endif # comppress