TNO Intern

Commit 3769a099 authored by Arjo Segers's avatar Arjo Segers
Browse files

Improved speed of selecting listing records.

parent 7afe471c
Loading
Loading
Loading
Loading
+30 −22
Original line number Diff line number Diff line
@@ -52,6 +52,9 @@
# 2025-06, Arjo Segers
#   Set flag to avoid warnings when decoding time arrays from VIIRS files.
#
# 2025-09, Arjo Segers
#   Improved speed of CSO_Listing.Selection method.
#

########################################################################
###
@@ -1639,35 +1642,40 @@ class CSO_Listing(object):

        # evaluate selection expression?
        if expr is not None:
            # replace templates:
            #    %{orbit} == '12345'
            # to:
            #    xrec['orbit'] == '12345'
            for key in self.df.keys():
                expr = expr.replace("%{" + key + "}", "xrec['" + key + "']")
            # endfor
            # split:
            # split into sequnece of critera; first expression with macthing record will be used:
            selections = expr.split(";")

            # initialize empty list of selected record indices (filenames):
            selected = []
            # storage for status label: "selected", "blacklisted", ...
            filestatus = {}
            # no match yet ..
            seleted = []
            # loop over selection criteria,
            # this should give either none or a single file:
            for selection in selections:
                # make empty again:
                # replace templates:
                #    %{orbit} == '12345'
                # to:
                #    df['orbit'] == '12345'
                for key in self.df.keys():
                    selection = selection.replace(f"%{{{key}}}", f"df['{key}']")
                # endfor
                # testing ...
                logging.info(f"{indent}selection `{selection}` ...")
                # evaluate:
                xdf = df[ eval(selection) ]
                # any?
                if len(xdf) > 0:
                    # selected indices (filenames):
                    selected = []
                # loop over records:
                for indx, xrec in df.iterrows():
                    for indx, xrec in xdf.iterrows():
                        # skip?
                        if os.path.basename(indx) in blacklist:
                            filestatus[indx] = "blacklisted"
                            continue
                        # endif
                    # evaluate expression including 'xrec[key]' values:
                    if eval(selection):
                        # store:
                        selected.append(indx)
                        # set status:
                        filestatus[indx] = "selected"
                    # endif
                # endfor # records
@@ -1688,7 +1696,7 @@ class CSO_Listing(object):
                for fname, row in df.iterrows():
                    line = fname
                    if fname in filestatus.keys():
                        line = line + " [" + filestatus[fname] + "]"
                        line = line + f" [{filestatus[fname]}]"
                    logging.info(f"{indent}  {line}")
                # endfor
            # endif # verbose
+2 −2
Original line number Diff line number Diff line
@@ -1289,7 +1289,7 @@ class CSO_VIIRS_Convert(utopya.UtopyaRc):
        # read:
        listing = cso_file.CSO_Listing(listing_file)
        # info ...
        logging.info(f"{indent}number of files : %i" % len(listing))
        logging.info(f"{indent}number of files : {len(listing)}")

        # path:
        listing_dir = os.path.dirname(listing_file)
@@ -1325,7 +1325,7 @@ class CSO_VIIRS_Convert(utopya.UtopyaRc):
            logging.info(f"{indent}  orbit '{orbit}' ...")

            # select orbits:
            olst = xlst.Select(expr=f"%{{orbit}} == '{orbit}'", verbose=False)
            olst = xlst.Select(expr=f"%{{orbit}} == '{orbit}'", verbose=False, indent=indent+"    ")
            # info ..
            logging.info(f"{indent}    process {len(olst)} files ...")
            # next orbit if none: