Extended processing of listing files. (f65776aa) · Commits · CAMS / CSO

src/cso/cso_file.py

+75 −43

Original line number	Diff line number	Diff line
		# Changes
		#
		# CHANGES
		#
		# 2022-09, Arjo Segers
		# Write files with zlib-compression, option to disable this.
		@@ -39,6 +40,14 @@
		# 2025-04, Arjo Segers
		# Enable zlib compression only for numerical data.
		# Avoid warnings from packing in case of all-nan values.
		# Open a file rather than loading it.
		# Extended sort options for listing files.
		# Support creation of listing file objects without filename.
		# Support selection of multiple records from listing file.
		#
		# 2025-04, Arjo Segers
		# Changed imports for python packaging.
		# Change expansion of datetime values from csv files for recent pandas version.
		#

		########################################################################
		@@ -189,7 +198,7 @@ def Pack_DataArray(da, dtype="i2"):

		# only floats ...
		if da.dtype in [numpy.float32, numpy.float64]:
		# should have some values ..
		# any values defined?
		if numpy.any( ~ numpy.isnan(da.values) ) :
		# value range, ignore nan's:
		vmin = numpy.nanmin(da.values)
		@@ -276,11 +285,13 @@ class CSO_File(object):
		raise Exception
		# endif

		# access dataset:
		with xarray.open_dataset(self.filename) as self.ds:
		# load entire file:
		self.ds.load()
		# endwith # xarray
		# open file:
		try:
		self.ds = xarray.open_dataset(self.filename)
		except:
		logging.error(f"could not open (corrupted?) file: {self.filename}")
		raise
		#endtry

		else:
		# dummy:
		@@ -1291,14 +1302,25 @@ class CSO_Listing(object):
		# head for index column:
		self.index_label = "filename"

		# read?
		if filename is not None:
		# store filename:
		self.filename = filename

		# directory name:
		if self.filename is not None:

		# check ..
		if not os.path.isfile(filename):
		logging.error("listing file not found: %s" % filename)
		raise Exception
		# endif

		# base directory:
		self.dirname = os.path.dirname(self.filename)
		# could be empty ..
		if len(self.dirname) == 0 :
		self.dirname = os.curdir
		# endif

		# info ...
		logging.info(f"{indent} read listing {filename} ...")
		# read:
		@@ -1314,7 +1336,11 @@ class CSO_Listing(object):
		self.df["end_time"] = pandas.to_datetime(self.df["end_time"])

		else:
		# new table:

		# not defined yet, assume current location:
		self.dirname = os.curdir

		# new empty table:
		self.df = pandas.DataFrame(columns=["start_time", "end_time"])

		# endif
		@@ -1343,7 +1369,7 @@ class CSO_Listing(object):
		# save, also write the index column:
		self.df.to_csv(filename, sep=self.sep, columns=columns, index_label=self.index_label)

		# enddef Close
		# enddef Save

		# *

		@@ -1465,7 +1491,7 @@ class CSO_Listing(object):

		# check ..
		if fname not in self.df.index:
		logging.error('file "%s" is not a record in table: %s' % (fname, filename))
		logging.error(f"file '{fname}' is not a record in table: {self.filename}")
		raise Exception
		# endif

		@@ -1532,9 +1558,9 @@ class CSO_Listing(object):

		# *

		def Select(self, tr=None, method="overlap", expr=None, blacklist=[], indent="", **kwargs):
		def Select(self, tr=None, method="overlap", expr=None, blacklist=[], verbose=True, indent="", **kwargs):
		"""
		Return :py:class:`CSO_Listing` objects with selection of records.
		Return :py:class:`CSO_Listing` object with selection of records.

		Optional arguments:

		@@ -1594,7 +1620,7 @@ class CSO_Listing(object):
		for key, value in kwargs.items():
		# check ..
		if key not in df.keys():
		logging.error(f"key '{key}' not defined in listing")
		logging.error(f"key '{key}' not defined in listing: {self.filename}")
		raise Exception
		# endif
		# select:
		@@ -1606,6 +1632,9 @@ class CSO_Listing(object):
		# evaluate selection expression?
		if expr is not None:
		# replace templates:
		# %{orbit} == '12345'
		# to:
		# xrec['orbit'] == '12345'
		for key in self.df.keys():
		expr = expr.replace("%{" + key + "}", "xrec['" + key + "']")
		# endfor
		@@ -1632,21 +1661,19 @@ class CSO_Listing(object):
		if eval(selection):
		selected.append(indx)
		filestatus[indx] = "selected"
		rec = xrec
		# endif
		# endfor # records
		# exactly one? then leave:
		if len(selected) == 1:

		# any selected?
		if len(selected) > 0:
		# leave:
		break
		elif len(selected) > 1:
		logging.error(f"found more than one record matching selection: {selection}")
		for fname in selected:
		logging.error(f" {fname}")
		# endfor
		raise Exception
		# endif # number found
		#endif

		# endfor # selection criteria

		# show selection?
		if verbose :
		# info ...
		logging.info(f"{indent}available records(s):")
		# loop:
		@@ -1656,6 +1683,7 @@ class CSO_Listing(object):
		line = line + " [" + filestatus[fname] + "]"
		logging.info(f"{indent} {line}")
		# endfor
		#endif # verbose

		# no match?
		if len(selected) == 0:
		@@ -1670,8 +1698,8 @@ class CSO_Listing(object):
		# create empty dataframe as result:
		df = pandas.DataFrame(columns=df.columns)
		else:
		# extract selected record:
		df = df.loc[[selected[0]]]
		# extract selected record(s):
		df = df.loc[selected]
		# endif

		# endif
		@@ -1721,13 +1749,17 @@ class CSO_Listing(object):

		# *

		def Sort(self, by="filename"):
		def Sort(self, by=None):
		"""
		Sort listing table by filename or other key.
		Sort listing table by index (default, this is the "filename") or by a named column.
		"""

		# sort inplace:
		# sort index or values:
		if by is None:
		self.df.sort_index(inplace=True)
		else :
		self.df.sort_values(by, inplace=True)
		# endif

		# endef Sort