Improved speed of selecting listing records. (3769a099) · Commits · CAMS / CSO

src/cso/cso_file.py

+30 −22

Original line number	Diff line number	Diff line
		@@ -52,6 +52,9 @@
		# 2025-06, Arjo Segers
		# Set flag to avoid warnings when decoding time arrays from VIIRS files.
		#
		# 2025-09, Arjo Segers
		# Improved speed of CSO_Listing.Selection method.
		#

		########################################################################
		###
		@@ -1639,35 +1642,40 @@ class CSO_Listing(object):

		# evaluate selection expression?
		if expr is not None:
		# replace templates:
		# %{orbit} == '12345'
		# to:
		# xrec['orbit'] == '12345'
		for key in self.df.keys():
		expr = expr.replace("%{" + key + "}", "xrec['" + key + "']")
		# endfor
		# split:
		# split into sequnece of critera; first expression with macthing record will be used:
		selections = expr.split(";")

		# initialize empty list of selected record indices (filenames):
		selected = []
		# storage for status label: "selected", "blacklisted", ...
		filestatus = {}
		# no match yet ..
		seleted = []
		# loop over selection criteria,
		# this should give either none or a single file:
		for selection in selections:
		# make empty again:
		# replace templates:
		# %{orbit} == '12345'
		# to:
		# df['orbit'] == '12345'
		for key in self.df.keys():
		selection = selection.replace(f"%{{{key}}}", f"df['{key}']")
		# endfor
		# testing ...
		logging.info(f"{indent}selection `{selection}` ...")
		# evaluate:
		xdf = df[ eval(selection) ]
		# any?
		if len(xdf) > 0:
		# selected indices (filenames):
		selected = []
		# loop over records:
		for indx, xrec in df.iterrows():
		for indx, xrec in xdf.iterrows():
		# skip?
		if os.path.basename(indx) in blacklist:
		filestatus[indx] = "blacklisted"
		continue
		# endif
		# evaluate expression including 'xrec[key]' values:
		if eval(selection):
		# store:
		selected.append(indx)
		# set status:
		filestatus[indx] = "selected"
		# endif
		# endfor # records
		@@ -1688,7 +1696,7 @@ class CSO_Listing(object):
		for fname, row in df.iterrows():
		line = fname
		if fname in filestatus.keys():
		line = line + " [" + filestatus[fname] + "]"
		line = line + f" [{filestatus[fname]}]"
		logging.info(f"{indent} {line}")
		# endfor
		# endif # verbose

src/cso/cso_viirs.py

+2 −2

Original line number	Diff line number	Diff line
		@@ -1289,7 +1289,7 @@ class CSO_VIIRS_Convert(utopya.UtopyaRc):
		# read:
		listing = cso_file.CSO_Listing(listing_file)
		# info ...
		logging.info(f"{indent}number of files : %i" % len(listing))
		logging.info(f"{indent}number of files : {len(listing)}")

		# path:
		listing_dir = os.path.dirname(listing_file)
		@@ -1325,7 +1325,7 @@ class CSO_VIIRS_Convert(utopya.UtopyaRc):
		logging.info(f"{indent} orbit '{orbit}' ...")

		# select orbits:
		olst = xlst.Select(expr=f"%{{orbit}} == '{orbit}'", verbose=False)
		olst = xlst.Select(expr=f"%{{orbit}} == '{orbit}'", verbose=False, indent=indent+" ")
		# info ..
		logging.info(f"{indent} process {len(olst)} files ...")
		# next orbit if none: