Loading src/cso/cso_colocate.py +85 −69 Original line number Diff line number Diff line Loading @@ -7,6 +7,10 @@ # 2023, Lewis Blake, Arjo Segers # Formatted using "black". # # 2025-04, Arjo Segers # Added option to drop some columns of the location csv after reading, # this prevents these columns from being added to the co-location output. # ######################################################################## Loading Loading @@ -73,7 +77,7 @@ class CSO_CoLocate(utopya.UtopyaRc): surface locations. The value assigned to location and time is an average over all pixels within a specified distance and valid for the time. Location should be specified in a csv file with columns at least longitude and latiude:: Location should be specified in a csv file with at least columns that specify the longitude and latiude:: name ;longitude;latitude Peyrusse Vieille ; 0.18; 43.62 Loading @@ -92,12 +96,19 @@ class CSO_CoLocate(utopya.UtopyaRc): <rcbase>.locations.sep : ; <rcbase>.locations.comment : # Also specify the column names that used for the longitude and latitude:: Also specify the column names that are used for the longitude and latitude:: ! column names: <rcbase>.locations.longitude : longitude <rcbase>.locations.latiutde : latiutde The content of the csv file is included in the output file as 1D variables with dimension ``(location,)``. These variables have the name of the column in the csv file, preceeded by ``location_``. Eventually specify that some columns should be skipped:: ! skip auxilary columns: <rcbase>.locations.skip_columns : flag value Time series are created within a time range:: ! time range: Loading Loading @@ -198,9 +209,9 @@ class CSO_CoLocate(utopya.UtopyaRc): utopya.UtopyaRc.__init__(self, rcfile, rcbase=rcbase, env=env) # info ... logging.info(indent + "") logging.info(indent + "** Co-located CSO data") logging.info(indent + "") logging.info(f"{indent}") logging.info(f"{indent}** Co-located CSO data") logging.info(f"{indent}") # time range: t1 = self.GetSetting("timerange.start", totype="datetime") Loading @@ -208,12 +219,12 @@ class CSO_CoLocate(utopya.UtopyaRc): # info ... tfmt = "%Y-%m-%d %H:%M" logging.info(indent + "timerange: [%s,%s]" % (t1.strftime(tfmt), t2.strftime(tfmt))) logging.info(f"{indent}timerange: [{t1.strftime(tfmt)},{t2.strftime(tfmt)}]") # output time step: output_freq = self.GetSetting("output.freq") # info ... logging.info(indent + "output freqency: %s" % output_freq) logging.info(f"{indent}output freqency: {output_freq}") # output time intervals: if output_freq in ["D", "day", "daily"]: outtimes1 = pandas.date_range(t1, t2, freq="D") Loading @@ -221,20 +232,23 @@ class CSO_CoLocate(utopya.UtopyaRc): elif output_freq in ["M", "month", "monthly"]: outtimes1 = pandas.date_range(t1, t2, freq="MS") outtimes2 = outtimes1 + pandas.DateOffset(months=1, seconds=-1) elif output_freq in ["Y", "year", "yearly"]: outtimes1 = pandas.date_range(t1, t2, freq="YS") outtimes2 = pandas.date_range(t1, t2, freq="Y") + pandas.DateOffset(days=1, seconds=-1) else: logging.error('unsupported output time step "%s"' % step) logging.error(f"unsupported output time step '{step}'") raise Exception # endif # target file: outfile_template = self.GetSetting("output.file") # info ... logging.info(indent + "target files: %s" % outfile_template) logging.info(f"{indent}target files: {outfile_template}") # renew? renew = self.GetSetting("renew", totype="bool") # info ... logging.info(indent + "renew output: %s" % renew) logging.info(f"{indent}renew output: {renew}") # pack floats as shorts? packed = self.GetSetting("output.packed", "bool") Loading @@ -245,21 +259,21 @@ class CSO_CoLocate(utopya.UtopyaRc): gattrs = {} attrnames = self.GetSetting("output.attrs").split() for key in attrnames: gattrs[key] = self.GetSetting("output.attr.%s" % (key)) gattrs[key] = self.GetSetting(f"output.attr.{key}") # endfor # station file: locations_file = self.GetSetting("locations.file") # info ... logging.info(indent + "location file: %s" % locations_file) logging.info(f"{indent}location file: {locations_file}") # check .. if not os.path.isfile(locations_file): logging.error("file not found: %s" % locations_file) logging.error(f"file not found: {locations_file}") raise Exception # endif # special characters: sep = self.GetSetting("locations.sep", default=";") comment = self.GetSetting("locations.sep", comment="#") comment = self.GetSetting("locations.sep", default="#") # column names: loc_lon = self.GetSetting("locations.longitude") loc_lat = self.GetSetting("locations.latitude") Loading @@ -270,6 +284,16 @@ class CSO_CoLocate(utopya.UtopyaRc): # count: nloc = len(locations) # skip some colunns from the file? skip_columns = self.GetSetting("locations.skip_columns", default="").split() # defined? if len(skip_columns) > 0: # info ... logging.info(f"{indent}skip columns: {skip_columns}") # remove columns: locations = locations.drop(columns=skip_columns) # endif # time resolution resolution = self.GetSetting("resolution") Loading @@ -284,7 +308,7 @@ class CSO_CoLocate(utopya.UtopyaRc): for stype in stypes: # tempate for input file: sources[stype] = {} sources[stype]["input"] = self.GetSetting("source.%s" % stype) sources[stype]["input"] = self.GetSetting(f"source.{stype}") # endfor # first source type, this should have the footprints: Loading @@ -296,7 +320,7 @@ class CSO_CoLocate(utopya.UtopyaRc): varsources = {} for vkey in vkeys: # source description: varsources[vkey] = self.GetSetting("output.%s.source" % (vkey)) varsources[vkey] = self.GetSetting(f"output.{vkey}.source") # endfor # loop over times for output files: Loading @@ -309,13 +333,13 @@ class CSO_CoLocate(utopya.UtopyaRc): # create? if (not os.path.isfile(outfile)) or renew: # info ... logging.info(indent + "create %s .." % outfile) logging.info(f"{indent}create {outfile} ..") # averaging time intervals: if resolution == "hourly": # start and end times: tt1 = pandas.date_range(outtime1, outtime2, freq="H") tt2 = t1 + pandas.Timedelta(hours=1) tt2 = tt1 + pandas.Timedelta(hours=1) # elif resolution == "daily": # start and end times: Loading @@ -323,7 +347,7 @@ class CSO_CoLocate(utopya.UtopyaRc): tt2 = tt1 + pandas.Timedelta(days=1) # else: logging.error('unsupported resolution "%s"' % resolution) logging.error(f"unsupported resolution '{resolution}'") raise Exception # endif # mid: Loading @@ -332,7 +356,7 @@ class CSO_CoLocate(utopya.UtopyaRc): nt = len(ttm) # info ... logging.info(indent + "init output file ...") logging.info(f"{indent}init output file ...") # init output file: outf = cso_file.CSO_File() # locations: Loading @@ -349,13 +373,13 @@ class CSO_CoLocate(utopya.UtopyaRc): # store location info: for key in locations.keys(): # variable name: vname = "location_%s" % key vname = f"location_{key}" # add data: outf.AddDataVariable( vname, ("location",), values=locations[key], attrs={"long_name": "location %s" % key, "units": "1"}, attrs={"long_name": f"location {key}", "units": "1"}, ) # endfor # storage for number of selected pixels: Loading @@ -364,7 +388,7 @@ class CSO_CoLocate(utopya.UtopyaRc): ("time", "location"), values=numpy.zeros((nt, nloc), dtype="i2"), attrs={ "long_name": "number of pixels within %i km" % distance_km, "long_name": f"number of pixels within {distance_km} km", "units": "1", }, ) Loading @@ -372,10 +396,7 @@ class CSO_CoLocate(utopya.UtopyaRc): outf_setup = False # init history: line = "averages over pixels with %i km from locations specified in %s;" % ( distance_km, locations_file, ) line = f"averages over pixels within {distance_km} km from locations specified in {locations_file};" line = line + "data files:" for stype in stypes: line = line + " " + sources[stype]["input"] Loading @@ -386,10 +407,8 @@ class CSO_CoLocate(utopya.UtopyaRc): # assume input has hourly timestamps .. times = pandas.date_range(outtime1, outtime2, freq="H") # info ... logging.info( indent + "search for input files in [%s,%s] .." % (outtime1, outtime2) ) logging.info(indent + " search times: %s, .., %s" % (times[0], times[-1])) logging.info(f"{indent}search for input files in [{outtime1},{outtime2}] ..") logging.info(f"{indent} search times: {times[0]}, .., {times[-1]}") # list of processed files: donefiles = [] # loop over input times: Loading @@ -404,41 +423,41 @@ class CSO_CoLocate(utopya.UtopyaRc): continue # info ... logging.info(indent + " read input file(s) ...") logging.info(f"{indent} read input file(s) ...") # loop: for stype in stypes: # input file: infile = time.strftime(sources[stype]["input"]) # info .. logging.info(indent + " %s" % infile) logging.info(f"{indent} {infile}") # read: sources[stype]["file"] = cso_file.CSO_File(filename=infile) ## update history: # history.insert( 0, 'pixel data from %s' % os.path.basename(infile) ) # history.insert( 0, f"pixel data from {os.path.basename(infile)}") # endfor # filter: selected, history = sources[stype0]["file"].SelectPixels( self.rcf, rcbase, indent=indent + " " self.rcf, rcbase, indent=f"{indent} " ) # count: npix = selected.size nsel = selected.sum() # none? if nsel == 0: loggin.info(indent + " no pixels selected ..") loggin.info(f"{indent} no pixels selected ..") continue # endif # info .. logging.info(indent + " selected %i / %i pixels .." % (nsel, npix)) logging.info(f"{indent} selected {nsel} / {npix} pixels ..") # target record: (ii,) = numpy.where((time >= tt1) & (time < tt2)) (ii,) = numpy.where((time > tt1) & (time <= tt2)) # check .. if len(ii) != 1: logging.error("time %s found %i times within bounds:" % (time, len(ii))) logging.error("time {time} found {len(ii)} times within bounds:") for it in range(len(tt1)): logging.error("[%s,%s]" % (t1[it], t2[it])) logging.error("[{t1[it]},{t2[it]}]") # endfor raise Exception # endif Loading @@ -446,7 +465,7 @@ class CSO_CoLocate(utopya.UtopyaRc): itime = ii[0] # info .. logging.info( indent + " target time interval: [%s,%s)" % (tt1[itime], tt2[itime]) f"{indent} target time interval: [{tt1[itime]},{tt2[itime]})" ) # storage: Loading @@ -462,8 +481,7 @@ class CSO_CoLocate(utopya.UtopyaRc): shp = das_in[vkey].shape[1:] if numpy.prod(shp) != 1: logging.error( 'source variable "%s" has shape %s per pixel, but currently only scalars supported ...' % (vkey, str(shp)) f"source variable '{vkey}' has shape {str(shp)} per pixel, but currently only scalars supported ..." ) raise Exception # endif Loading @@ -483,7 +501,7 @@ class CSO_CoLocate(utopya.UtopyaRc): # endif # init output? # info .. logging.info(indent + " loop over locations ...") logging.info(f"{indent} loop over locations ...") # loop over locations: for iloc in range(nloc): # index: Loading @@ -492,7 +510,7 @@ class CSO_CoLocate(utopya.UtopyaRc): lon = locations.at[indx, loc_lon] lat = locations.at[indx, loc_lat] # info ... logging.info(indent + " location % (%f,%f)" % (iloc + 1, lon, lat)) logging.info(f"{indent} location {iloc+1} ({lon:7.2f},{lat:6.2f})") # distance in m from pixels to location: dist_m = cso_mapping.LonLatDistance( lon, Loading @@ -505,15 +523,13 @@ class CSO_CoLocate(utopya.UtopyaRc): # any? if len(iipix) == 0: # info ... logging.info(indent + " no pixels within %i km" % distance_km) logging.info(f"{indent} no pixels within {distance_km} km") else: # count: nval = len(iipix) # info ... logging.info( indent + " found %i selected pixels within %i km" % (nval, distance_km) f"{indent} found {nval} selected pixels within {distance_km} km" ) # add contribution: outf.ds["npixel"].values[itime, iloc] += nval Loading @@ -534,7 +550,7 @@ class CSO_CoLocate(utopya.UtopyaRc): # endfor # input times # info ... logging.info(indent + " averages ...") logging.info(f"{indent} averages ...") # times/locations with pixels found: kk, ii = numpy.where(outf.ds["npixel"].values > 0) # any? Loading @@ -547,7 +563,7 @@ class CSO_CoLocate(utopya.UtopyaRc): # endif # any pixels co-located # info ... logging.info(indent + " write ...") logging.info(f"{indent} write ...") # write: outf.Write( outfile, Loading @@ -560,7 +576,7 @@ class CSO_CoLocate(utopya.UtopyaRc): else: # info ... logging.info(indent + "keep %s .." % outfile) logging.info(f"{indent}keep {outfile} ...") # endif # renew # endfor # outtimes Loading @@ -568,9 +584,9 @@ class CSO_CoLocate(utopya.UtopyaRc): # endfor # time loop # info ... logging.info(indent + "") logging.info(indent + "** end") logging.info(indent + "") logging.info(f"{indent}") logging.info(f"{indent}** end") logging.info(f"{indent}") # enddef __init__ Loading Loading @@ -690,9 +706,9 @@ class CSO_CoLocatePlotTimeSeries(cso_catalogue.CSO_CatalogueBase): cso_catalogue.CSO_CatalogueBase.__init__(self, rcfile, rcbase=rcbase, env=env) # info ... logging.info(indent + "") logging.info(indent + "** Plot Co-located time series") logging.info(indent + "") logging.info(f"{indent}") logging.info(f"{indent}** Plot Co-located time series") logging.info(f"{indent}") # time range: t1 = self.GetSetting("timerange.start", totype="datetime") Loading @@ -700,12 +716,12 @@ class CSO_CoLocatePlotTimeSeries(cso_catalogue.CSO_CatalogueBase): # info ... tfmt = "%Y-%m-%d %H:%M" logging.info(indent + "timerange: [%s,%s]" % (t1.strftime(tfmt), t2.strftime(tfmt))) logging.info(f"{indent}timerange: [%s,%s]" % (t1.strftime(tfmt), t2.strftime(tfmt))) # renew? renew = self.GetSetting("renew", totype="bool") # info ... logging.info(indent + "renew output: %s" % renew) logging.info(f"{indent}renew output: %s" % renew) # filter for input files: fpattern = self.GetSetting("input.files") Loading Loading @@ -771,13 +787,13 @@ class CSO_CoLocatePlotTimeSeries(cso_catalogue.CSO_CatalogueBase): styles[varname] = {} for resolution in resolutions: styles[varname][resolution] = self.GetSetting( "var.%s.%s.style" % (varname, resolution), totype="dict" f"var.{varname}.{resolution}.style", totype="dict" ) # endfor # endfor # info ... logging.info(indent + "resample ...") logging.info(f"{indent}resample ...") # resample (all locations): # das['N']['yr' ] = .. # das['N']['times'] = .. Loading Loading @@ -832,16 +848,16 @@ class CSO_CoLocatePlotTimeSeries(cso_catalogue.CSO_CatalogueBase): # endfor # time resols # loop over locations: for iloc in range(ds.dims["location"]): for iloc in range(ds.sizes["location"]): # info .. logging.info("location %i / %i .." % (iloc + 1, ds.dims["location"])) logging.info(f"{indent}location {iloc+1} / {ds.sizes['location']} ...") # target file, replace templates: figfile = figfile_template.format(**loci[iloc]) # renew? if (not os.path.isfile(figfile)) or renew: # info .. logging.info(" create %s .." % figfile) logging.info(f"{indent} create {figfile} ...") # new: fig = plt.figure(figsize=figsize) Loading Loading @@ -896,15 +912,15 @@ class CSO_CoLocatePlotTimeSeries(cso_catalogue.CSO_CatalogueBase): else: # info ... logging.info(" keep %s .." % figfile) logging.info(f"{indent} keep {figfile} ...") # endif # endfor # locations # info ... logging.info(indent + "") logging.info(indent + "** end") logging.info(indent + "") logging.info(f"{indent}") logging.info(f"{indent}** end") logging.info(f"{indent}") # enddef __init__ Loading src/cso/cso_file.py +21 −6 Original line number Diff line number Diff line Loading @@ -36,6 +36,10 @@ # - added GetValue method # Removed usage of undefined filename in error messages. # # 2025-04, Arjo Segers # Enable zlib compression only for numerical data. # Avoid warnings from packing in case of all-nan values. # ######################################################################## ### Loading Loading @@ -185,9 +189,16 @@ def Pack_DataArray(da, dtype="i2"): # only floats ... if da.dtype in [numpy.float32, numpy.float64]: # should have some values .. if numpy.any( ~ numpy.isnan(da.values) ): # value range, ignore nan's: vmin = numpy.nanmin(da.values) vmax = numpy.nanmax(da.values) else: # dummy range: vmin = 0.0 vmax = 0.0 #end if # target data type could be integer or float: if dtype.startswith("i"): # use absolute minimum to represent nans: Loading Loading @@ -606,9 +617,13 @@ class CSO_File(object): if complevel > 0: # loop over variables: for vname in self.ds.keys(): # only numerical values: dtype = self.ds[vname].dtype if numpy.issubdtype(dtype,numpy.integer) or numpy.issubdtype(dtype,numpy.floating): # enable zlib compression and set level: self.ds[vname].encoding["zlib"] = True self.ds[vname].encoding["complevel"] = complevel # endif # endfor # variables # endif # comppress Loading Loading
src/cso/cso_colocate.py +85 −69 Original line number Diff line number Diff line Loading @@ -7,6 +7,10 @@ # 2023, Lewis Blake, Arjo Segers # Formatted using "black". # # 2025-04, Arjo Segers # Added option to drop some columns of the location csv after reading, # this prevents these columns from being added to the co-location output. # ######################################################################## Loading Loading @@ -73,7 +77,7 @@ class CSO_CoLocate(utopya.UtopyaRc): surface locations. The value assigned to location and time is an average over all pixels within a specified distance and valid for the time. Location should be specified in a csv file with columns at least longitude and latiude:: Location should be specified in a csv file with at least columns that specify the longitude and latiude:: name ;longitude;latitude Peyrusse Vieille ; 0.18; 43.62 Loading @@ -92,12 +96,19 @@ class CSO_CoLocate(utopya.UtopyaRc): <rcbase>.locations.sep : ; <rcbase>.locations.comment : # Also specify the column names that used for the longitude and latitude:: Also specify the column names that are used for the longitude and latitude:: ! column names: <rcbase>.locations.longitude : longitude <rcbase>.locations.latiutde : latiutde The content of the csv file is included in the output file as 1D variables with dimension ``(location,)``. These variables have the name of the column in the csv file, preceeded by ``location_``. Eventually specify that some columns should be skipped:: ! skip auxilary columns: <rcbase>.locations.skip_columns : flag value Time series are created within a time range:: ! time range: Loading Loading @@ -198,9 +209,9 @@ class CSO_CoLocate(utopya.UtopyaRc): utopya.UtopyaRc.__init__(self, rcfile, rcbase=rcbase, env=env) # info ... logging.info(indent + "") logging.info(indent + "** Co-located CSO data") logging.info(indent + "") logging.info(f"{indent}") logging.info(f"{indent}** Co-located CSO data") logging.info(f"{indent}") # time range: t1 = self.GetSetting("timerange.start", totype="datetime") Loading @@ -208,12 +219,12 @@ class CSO_CoLocate(utopya.UtopyaRc): # info ... tfmt = "%Y-%m-%d %H:%M" logging.info(indent + "timerange: [%s,%s]" % (t1.strftime(tfmt), t2.strftime(tfmt))) logging.info(f"{indent}timerange: [{t1.strftime(tfmt)},{t2.strftime(tfmt)}]") # output time step: output_freq = self.GetSetting("output.freq") # info ... logging.info(indent + "output freqency: %s" % output_freq) logging.info(f"{indent}output freqency: {output_freq}") # output time intervals: if output_freq in ["D", "day", "daily"]: outtimes1 = pandas.date_range(t1, t2, freq="D") Loading @@ -221,20 +232,23 @@ class CSO_CoLocate(utopya.UtopyaRc): elif output_freq in ["M", "month", "monthly"]: outtimes1 = pandas.date_range(t1, t2, freq="MS") outtimes2 = outtimes1 + pandas.DateOffset(months=1, seconds=-1) elif output_freq in ["Y", "year", "yearly"]: outtimes1 = pandas.date_range(t1, t2, freq="YS") outtimes2 = pandas.date_range(t1, t2, freq="Y") + pandas.DateOffset(days=1, seconds=-1) else: logging.error('unsupported output time step "%s"' % step) logging.error(f"unsupported output time step '{step}'") raise Exception # endif # target file: outfile_template = self.GetSetting("output.file") # info ... logging.info(indent + "target files: %s" % outfile_template) logging.info(f"{indent}target files: {outfile_template}") # renew? renew = self.GetSetting("renew", totype="bool") # info ... logging.info(indent + "renew output: %s" % renew) logging.info(f"{indent}renew output: {renew}") # pack floats as shorts? packed = self.GetSetting("output.packed", "bool") Loading @@ -245,21 +259,21 @@ class CSO_CoLocate(utopya.UtopyaRc): gattrs = {} attrnames = self.GetSetting("output.attrs").split() for key in attrnames: gattrs[key] = self.GetSetting("output.attr.%s" % (key)) gattrs[key] = self.GetSetting(f"output.attr.{key}") # endfor # station file: locations_file = self.GetSetting("locations.file") # info ... logging.info(indent + "location file: %s" % locations_file) logging.info(f"{indent}location file: {locations_file}") # check .. if not os.path.isfile(locations_file): logging.error("file not found: %s" % locations_file) logging.error(f"file not found: {locations_file}") raise Exception # endif # special characters: sep = self.GetSetting("locations.sep", default=";") comment = self.GetSetting("locations.sep", comment="#") comment = self.GetSetting("locations.sep", default="#") # column names: loc_lon = self.GetSetting("locations.longitude") loc_lat = self.GetSetting("locations.latitude") Loading @@ -270,6 +284,16 @@ class CSO_CoLocate(utopya.UtopyaRc): # count: nloc = len(locations) # skip some colunns from the file? skip_columns = self.GetSetting("locations.skip_columns", default="").split() # defined? if len(skip_columns) > 0: # info ... logging.info(f"{indent}skip columns: {skip_columns}") # remove columns: locations = locations.drop(columns=skip_columns) # endif # time resolution resolution = self.GetSetting("resolution") Loading @@ -284,7 +308,7 @@ class CSO_CoLocate(utopya.UtopyaRc): for stype in stypes: # tempate for input file: sources[stype] = {} sources[stype]["input"] = self.GetSetting("source.%s" % stype) sources[stype]["input"] = self.GetSetting(f"source.{stype}") # endfor # first source type, this should have the footprints: Loading @@ -296,7 +320,7 @@ class CSO_CoLocate(utopya.UtopyaRc): varsources = {} for vkey in vkeys: # source description: varsources[vkey] = self.GetSetting("output.%s.source" % (vkey)) varsources[vkey] = self.GetSetting(f"output.{vkey}.source") # endfor # loop over times for output files: Loading @@ -309,13 +333,13 @@ class CSO_CoLocate(utopya.UtopyaRc): # create? if (not os.path.isfile(outfile)) or renew: # info ... logging.info(indent + "create %s .." % outfile) logging.info(f"{indent}create {outfile} ..") # averaging time intervals: if resolution == "hourly": # start and end times: tt1 = pandas.date_range(outtime1, outtime2, freq="H") tt2 = t1 + pandas.Timedelta(hours=1) tt2 = tt1 + pandas.Timedelta(hours=1) # elif resolution == "daily": # start and end times: Loading @@ -323,7 +347,7 @@ class CSO_CoLocate(utopya.UtopyaRc): tt2 = tt1 + pandas.Timedelta(days=1) # else: logging.error('unsupported resolution "%s"' % resolution) logging.error(f"unsupported resolution '{resolution}'") raise Exception # endif # mid: Loading @@ -332,7 +356,7 @@ class CSO_CoLocate(utopya.UtopyaRc): nt = len(ttm) # info ... logging.info(indent + "init output file ...") logging.info(f"{indent}init output file ...") # init output file: outf = cso_file.CSO_File() # locations: Loading @@ -349,13 +373,13 @@ class CSO_CoLocate(utopya.UtopyaRc): # store location info: for key in locations.keys(): # variable name: vname = "location_%s" % key vname = f"location_{key}" # add data: outf.AddDataVariable( vname, ("location",), values=locations[key], attrs={"long_name": "location %s" % key, "units": "1"}, attrs={"long_name": f"location {key}", "units": "1"}, ) # endfor # storage for number of selected pixels: Loading @@ -364,7 +388,7 @@ class CSO_CoLocate(utopya.UtopyaRc): ("time", "location"), values=numpy.zeros((nt, nloc), dtype="i2"), attrs={ "long_name": "number of pixels within %i km" % distance_km, "long_name": f"number of pixels within {distance_km} km", "units": "1", }, ) Loading @@ -372,10 +396,7 @@ class CSO_CoLocate(utopya.UtopyaRc): outf_setup = False # init history: line = "averages over pixels with %i km from locations specified in %s;" % ( distance_km, locations_file, ) line = f"averages over pixels within {distance_km} km from locations specified in {locations_file};" line = line + "data files:" for stype in stypes: line = line + " " + sources[stype]["input"] Loading @@ -386,10 +407,8 @@ class CSO_CoLocate(utopya.UtopyaRc): # assume input has hourly timestamps .. times = pandas.date_range(outtime1, outtime2, freq="H") # info ... logging.info( indent + "search for input files in [%s,%s] .." % (outtime1, outtime2) ) logging.info(indent + " search times: %s, .., %s" % (times[0], times[-1])) logging.info(f"{indent}search for input files in [{outtime1},{outtime2}] ..") logging.info(f"{indent} search times: {times[0]}, .., {times[-1]}") # list of processed files: donefiles = [] # loop over input times: Loading @@ -404,41 +423,41 @@ class CSO_CoLocate(utopya.UtopyaRc): continue # info ... logging.info(indent + " read input file(s) ...") logging.info(f"{indent} read input file(s) ...") # loop: for stype in stypes: # input file: infile = time.strftime(sources[stype]["input"]) # info .. logging.info(indent + " %s" % infile) logging.info(f"{indent} {infile}") # read: sources[stype]["file"] = cso_file.CSO_File(filename=infile) ## update history: # history.insert( 0, 'pixel data from %s' % os.path.basename(infile) ) # history.insert( 0, f"pixel data from {os.path.basename(infile)}") # endfor # filter: selected, history = sources[stype0]["file"].SelectPixels( self.rcf, rcbase, indent=indent + " " self.rcf, rcbase, indent=f"{indent} " ) # count: npix = selected.size nsel = selected.sum() # none? if nsel == 0: loggin.info(indent + " no pixels selected ..") loggin.info(f"{indent} no pixels selected ..") continue # endif # info .. logging.info(indent + " selected %i / %i pixels .." % (nsel, npix)) logging.info(f"{indent} selected {nsel} / {npix} pixels ..") # target record: (ii,) = numpy.where((time >= tt1) & (time < tt2)) (ii,) = numpy.where((time > tt1) & (time <= tt2)) # check .. if len(ii) != 1: logging.error("time %s found %i times within bounds:" % (time, len(ii))) logging.error("time {time} found {len(ii)} times within bounds:") for it in range(len(tt1)): logging.error("[%s,%s]" % (t1[it], t2[it])) logging.error("[{t1[it]},{t2[it]}]") # endfor raise Exception # endif Loading @@ -446,7 +465,7 @@ class CSO_CoLocate(utopya.UtopyaRc): itime = ii[0] # info .. logging.info( indent + " target time interval: [%s,%s)" % (tt1[itime], tt2[itime]) f"{indent} target time interval: [{tt1[itime]},{tt2[itime]})" ) # storage: Loading @@ -462,8 +481,7 @@ class CSO_CoLocate(utopya.UtopyaRc): shp = das_in[vkey].shape[1:] if numpy.prod(shp) != 1: logging.error( 'source variable "%s" has shape %s per pixel, but currently only scalars supported ...' % (vkey, str(shp)) f"source variable '{vkey}' has shape {str(shp)} per pixel, but currently only scalars supported ..." ) raise Exception # endif Loading @@ -483,7 +501,7 @@ class CSO_CoLocate(utopya.UtopyaRc): # endif # init output? # info .. logging.info(indent + " loop over locations ...") logging.info(f"{indent} loop over locations ...") # loop over locations: for iloc in range(nloc): # index: Loading @@ -492,7 +510,7 @@ class CSO_CoLocate(utopya.UtopyaRc): lon = locations.at[indx, loc_lon] lat = locations.at[indx, loc_lat] # info ... logging.info(indent + " location % (%f,%f)" % (iloc + 1, lon, lat)) logging.info(f"{indent} location {iloc+1} ({lon:7.2f},{lat:6.2f})") # distance in m from pixels to location: dist_m = cso_mapping.LonLatDistance( lon, Loading @@ -505,15 +523,13 @@ class CSO_CoLocate(utopya.UtopyaRc): # any? if len(iipix) == 0: # info ... logging.info(indent + " no pixels within %i km" % distance_km) logging.info(f"{indent} no pixels within {distance_km} km") else: # count: nval = len(iipix) # info ... logging.info( indent + " found %i selected pixels within %i km" % (nval, distance_km) f"{indent} found {nval} selected pixels within {distance_km} km" ) # add contribution: outf.ds["npixel"].values[itime, iloc] += nval Loading @@ -534,7 +550,7 @@ class CSO_CoLocate(utopya.UtopyaRc): # endfor # input times # info ... logging.info(indent + " averages ...") logging.info(f"{indent} averages ...") # times/locations with pixels found: kk, ii = numpy.where(outf.ds["npixel"].values > 0) # any? Loading @@ -547,7 +563,7 @@ class CSO_CoLocate(utopya.UtopyaRc): # endif # any pixels co-located # info ... logging.info(indent + " write ...") logging.info(f"{indent} write ...") # write: outf.Write( outfile, Loading @@ -560,7 +576,7 @@ class CSO_CoLocate(utopya.UtopyaRc): else: # info ... logging.info(indent + "keep %s .." % outfile) logging.info(f"{indent}keep {outfile} ...") # endif # renew # endfor # outtimes Loading @@ -568,9 +584,9 @@ class CSO_CoLocate(utopya.UtopyaRc): # endfor # time loop # info ... logging.info(indent + "") logging.info(indent + "** end") logging.info(indent + "") logging.info(f"{indent}") logging.info(f"{indent}** end") logging.info(f"{indent}") # enddef __init__ Loading Loading @@ -690,9 +706,9 @@ class CSO_CoLocatePlotTimeSeries(cso_catalogue.CSO_CatalogueBase): cso_catalogue.CSO_CatalogueBase.__init__(self, rcfile, rcbase=rcbase, env=env) # info ... logging.info(indent + "") logging.info(indent + "** Plot Co-located time series") logging.info(indent + "") logging.info(f"{indent}") logging.info(f"{indent}** Plot Co-located time series") logging.info(f"{indent}") # time range: t1 = self.GetSetting("timerange.start", totype="datetime") Loading @@ -700,12 +716,12 @@ class CSO_CoLocatePlotTimeSeries(cso_catalogue.CSO_CatalogueBase): # info ... tfmt = "%Y-%m-%d %H:%M" logging.info(indent + "timerange: [%s,%s]" % (t1.strftime(tfmt), t2.strftime(tfmt))) logging.info(f"{indent}timerange: [%s,%s]" % (t1.strftime(tfmt), t2.strftime(tfmt))) # renew? renew = self.GetSetting("renew", totype="bool") # info ... logging.info(indent + "renew output: %s" % renew) logging.info(f"{indent}renew output: %s" % renew) # filter for input files: fpattern = self.GetSetting("input.files") Loading Loading @@ -771,13 +787,13 @@ class CSO_CoLocatePlotTimeSeries(cso_catalogue.CSO_CatalogueBase): styles[varname] = {} for resolution in resolutions: styles[varname][resolution] = self.GetSetting( "var.%s.%s.style" % (varname, resolution), totype="dict" f"var.{varname}.{resolution}.style", totype="dict" ) # endfor # endfor # info ... logging.info(indent + "resample ...") logging.info(f"{indent}resample ...") # resample (all locations): # das['N']['yr' ] = .. # das['N']['times'] = .. Loading Loading @@ -832,16 +848,16 @@ class CSO_CoLocatePlotTimeSeries(cso_catalogue.CSO_CatalogueBase): # endfor # time resols # loop over locations: for iloc in range(ds.dims["location"]): for iloc in range(ds.sizes["location"]): # info .. logging.info("location %i / %i .." % (iloc + 1, ds.dims["location"])) logging.info(f"{indent}location {iloc+1} / {ds.sizes['location']} ...") # target file, replace templates: figfile = figfile_template.format(**loci[iloc]) # renew? if (not os.path.isfile(figfile)) or renew: # info .. logging.info(" create %s .." % figfile) logging.info(f"{indent} create {figfile} ...") # new: fig = plt.figure(figsize=figsize) Loading Loading @@ -896,15 +912,15 @@ class CSO_CoLocatePlotTimeSeries(cso_catalogue.CSO_CatalogueBase): else: # info ... logging.info(" keep %s .." % figfile) logging.info(f"{indent} keep {figfile} ...") # endif # endfor # locations # info ... logging.info(indent + "") logging.info(indent + "** end") logging.info(indent + "") logging.info(f"{indent}") logging.info(f"{indent}** end") logging.info(f"{indent}") # enddef __init__ Loading
src/cso/cso_file.py +21 −6 Original line number Diff line number Diff line Loading @@ -36,6 +36,10 @@ # - added GetValue method # Removed usage of undefined filename in error messages. # # 2025-04, Arjo Segers # Enable zlib compression only for numerical data. # Avoid warnings from packing in case of all-nan values. # ######################################################################## ### Loading Loading @@ -185,9 +189,16 @@ def Pack_DataArray(da, dtype="i2"): # only floats ... if da.dtype in [numpy.float32, numpy.float64]: # should have some values .. if numpy.any( ~ numpy.isnan(da.values) ): # value range, ignore nan's: vmin = numpy.nanmin(da.values) vmax = numpy.nanmax(da.values) else: # dummy range: vmin = 0.0 vmax = 0.0 #end if # target data type could be integer or float: if dtype.startswith("i"): # use absolute minimum to represent nans: Loading Loading @@ -606,9 +617,13 @@ class CSO_File(object): if complevel > 0: # loop over variables: for vname in self.ds.keys(): # only numerical values: dtype = self.ds[vname].dtype if numpy.issubdtype(dtype,numpy.integer) or numpy.issubdtype(dtype,numpy.floating): # enable zlib compression and set level: self.ds[vname].encoding["zlib"] = True self.ds[vname].encoding["complevel"] = complevel # endif # endfor # variables # endif # comppress Loading