TNO Intern

Commit f65776aa authored by Arjo Segers's avatar Arjo Segers
Browse files

Extended processing of listing files.

parent e30e1602
Loading
Loading
Loading
Loading
+75 −43
Original line number Diff line number Diff line
# Changes
#
# CHANGES
#
# 2022-09, Arjo Segers
#   Write files with zlib-compression, option to disable this.
@@ -39,6 +40,14 @@
# 2025-04, Arjo Segers
#   Enable zlib compression only for numerical data.
#   Avoid warnings from packing in case of all-nan values.
#   Open a file rather than loading it.
#   Extended sort options for listing files.
#   Support creation of listing file objects without filename.
#   Support selection of multiple records from listing file.
#
# 2025-04, Arjo Segers
#   Changed imports for python packaging.
#   Change expansion of datetime values from csv files for recent pandas version.
#

########################################################################
@@ -189,7 +198,7 @@ def Pack_DataArray(da, dtype="i2"):

    # only floats ...
    if da.dtype in [numpy.float32, numpy.float64]:
        # should have some values ..
        # any values defined?
        if numpy.any( ~ numpy.isnan(da.values) ) :
            # value range, ignore nan's:
            vmin = numpy.nanmin(da.values)
@@ -276,11 +285,13 @@ class CSO_File(object):
                raise Exception
            # endif

            # access dataset:
            with xarray.open_dataset(self.filename) as self.ds:
                # load entire file:
                self.ds.load()
            # endwith # xarray
            # open file:
            try:
                self.ds = xarray.open_dataset(self.filename)
            except:
                logging.error(f"could not open (corrupted?) file: {self.filename}")
                raise
            #endtry
            
        else:
            # dummy:
@@ -1291,14 +1302,25 @@ class CSO_Listing(object):
        # head for index column:
        self.index_label = "filename"

        # read?
        if filename is not None:
        # store filename:
        self.filename = filename

        # directory name:
        if self.filename is not None:

            # check ..
            if not os.path.isfile(filename):
                logging.error("listing file not found: %s" % filename)
                raise Exception
            # endif

            # base directory:
            self.dirname = os.path.dirname(self.filename)
            # could be empty ..
            if len(self.dirname) == 0 :
                self.dirname = os.curdir
            # endif

            # info ...
            logging.info(f"{indent} read listing {filename} ...")
            # read:
@@ -1314,7 +1336,11 @@ class CSO_Listing(object):
            self.df["end_time"] = pandas.to_datetime(self.df["end_time"])

        else:
            # new table:

            # not defined yet, assume current location:
            self.dirname = os.curdir

            # new empty table:
            self.df = pandas.DataFrame(columns=["start_time", "end_time"])

        # endif
@@ -1343,7 +1369,7 @@ class CSO_Listing(object):
        # save, also write the index column:
        self.df.to_csv(filename, sep=self.sep, columns=columns, index_label=self.index_label)

    # enddef Close
    # enddef Save

    # *

@@ -1465,7 +1491,7 @@ class CSO_Listing(object):

        # check ..
        if fname not in self.df.index:
            logging.error('file "%s" is not a record in table: %s' % (fname, filename))
            logging.error(f"file '{fname}' is not a record in table: {self.filename}")
            raise Exception
        # endif

@@ -1532,9 +1558,9 @@ class CSO_Listing(object):

    # *

    def Select(self, tr=None, method="overlap", expr=None, blacklist=[], indent="", **kwargs):
    def Select(self, tr=None, method="overlap", expr=None, blacklist=[], verbose=True, indent="", **kwargs):
        """
        Return :py:class:`CSO_Listing` objects with selection of records.
        Return :py:class:`CSO_Listing` object with selection of records.

        Optional arguments:

@@ -1594,7 +1620,7 @@ class CSO_Listing(object):
        for key, value in kwargs.items():
            # check ..
            if key not in df.keys():
                logging.error(f"key '{key}' not defined in listing")
                logging.error(f"key '{key}' not defined in listing: {self.filename}")
                raise Exception
            # endif
            # select:
@@ -1606,6 +1632,9 @@ class CSO_Listing(object):
        # evaluate selection expression?
        if expr is not None:
            # replace templates:
            #    %{orbit} == '12345'
            # to:
            #    xrec['orbit'] == '12345'
            for key in self.df.keys():
                expr = expr.replace("%{" + key + "}", "xrec['" + key + "']")
            # endfor
@@ -1632,21 +1661,19 @@ class CSO_Listing(object):
                    if eval(selection):
                        selected.append(indx)
                        filestatus[indx] = "selected"
                        rec = xrec
                    # endif
                # endfor # records
                # exactly one? then leave:
                if len(selected) == 1:

                # any selected?
                if len(selected) > 0:
                    # leave:
                    break
                elif len(selected) > 1:
                    logging.error(f"found more than one record matching selection: {selection}")
                    for fname in selected:
                        logging.error(f"  {fname}")
                    # endfor
                    raise Exception
                # endif  # number found
                #endif

            # endfor # selection criteria

            # show selection?
            if verbose :
                # info ...
                logging.info(f"{indent}available records(s):")
                # loop:
@@ -1656,6 +1683,7 @@ class CSO_Listing(object):
                        line = line + " [" + filestatus[fname] + "]"
                    logging.info(f"{indent}  {line}")
                # endfor
            #endif # verbose

            # no match?
            if len(selected) == 0:
@@ -1670,8 +1698,8 @@ class CSO_Listing(object):
                # create empty dataframe as result:
                df = pandas.DataFrame(columns=df.columns)
            else:
                # extract selected record:
                df = df.loc[[selected[0]]]
                # extract selected record(s):
                df = df.loc[selected]
            # endif

        # endif
@@ -1721,13 +1749,17 @@ class CSO_Listing(object):

    # *

    def Sort(self, by="filename"):
    def Sort(self, by=None):
        """
        Sort listing table by filename or other key.
        Sort listing table by index (default, this is the "filename") or by a named column.
        """

        # sort inplace:
        # sort index or values:
        if by is None:
            self.df.sort_index(inplace=True)
        else :
            self.df.sort_values(by, inplace=True)
        # endif

    # endef Sort